In [1]:
from Game import Game
from policies.qLearning import QLearningParams
from Maps import *

"""
  kick starts a particular experiment/ game
"""

if __name__ == '__main__':
    params = QLearningParams()
    params.gamma = 0.5
    params.eps = 0.01
    params.alpha = 0.6

    game = Game(policy="qlearning",
                params=params,
                num_episodes=1500,
                num_steps_per_episode=500,
                mapp=small_map,
                num_ghosts=1,
                verbose=True)
    game.run()
    game.report_results()
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 ...
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
Episode # 0
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:1, Action:North
State  193
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [5.4 0.  0.  0. ]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[0. 0. 0. 0.]
------
Step:2, Action:North
State  122
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-180.6    0.     0.     0. ]
Reward: -301  Episode Reward:  -292
xxxxx
x.a x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-180.6    0.     0.     0. ]
------
Step:3, Action:East
State  122
Old Q Values:  [-180.6    0.     0.     0. ]
New Q values:  [-180.6    0.    -0.6    0. ]
Reward: -1  Episode Reward:  -293
xxxxx
x. ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[0. 0. 0. 0.]
------
Step:4, Action:North
State  138
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-180.6    0.     0.     0. ]
Reward: -301  Episode Reward:  -594
xxxxx
x. ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6    0.     0.     0. ]
------
Step:5, Action:South
State  138
Old Q Values:  [-180.6    0.     0.     0. ]
New Q values:  [ -180.6 -5994.6     0.      0. ]
Reward: -9991  Episode Reward:  -10585
xxxxx
x.  x
x. gx
x...x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[0. 0. 0. 0.]
------
Step:1, Action:East
State  192
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.  0.  5.4 0. ]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[0. 0. 0. 0.]
------
Step:2, Action:North
State  210
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [5.4 0.  0.  0. ]
Reward: 9  Episode Reward:  18
xxxxx
x. ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6 -5994.6     0.      0. ]
------
Step:3, Action:East
State  138
Old Q Values:  [ -180.6 -5994.6     0.      0. ]
New Q values:  [ -180.6 -5994.6  -180.6     0. ]
Reward: -301  Episode Reward:  -283
xxxxx
x. ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6 -5994.6  -180.6     0. ]
------
Step:4, Action:West
State  138
Old Q Values:  [ -180.6 -5994.6  -180.6     0. ]
New Q values:  [-1.8060e+02 -5.9946e+03 -1.8060e+02 -6.0000e-01]
Reward: -1  Episode Reward:  -284
xxxxx
x.a x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[0. 0. 0. 0.]
------
Step:5, Action:North
State  122
Old Q Values:  [-180.6    0.    -0.6    0. ]
New Q values:  [-252.84    0.     -0.6     0.  ]
Reward: -301  Episode Reward:  -585
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-252.84    0.     -0.6     0.  ]
------
Step:6, Action:South
State  123
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [    0.  -6000.6     0.      0. ]
Reward: -10001  Episode Reward:  -10586
xxxxx
x.a x
x. gx
x...x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.8060e+02 -5.9946e+03 -1.8060e+02 -6.0000e-01]
------
Step:1, Action:West
State  138
Old Q Values:  [-1.8060e+02 -5.9946e+03 -1.8060e+02 -6.0000e-01]
New Q values:  [-1.8060e+02 -5.9946e+03 -1.8060e+02  5.1600e+00]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-252.84    0.     -0.6     0.  ]
------
Step:2, Action:South
State  122
Old Q Values:  [-252.84    0.     -0.6     0.  ]
New Q values:  [-252.84    5.4    -0.6     0.  ]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[0. 0. 0. 0.]
------
Step:3, Action:North
State  192
Old Q Values:  [0.  0.  5.4 0. ]
New Q values:  [1.02 0.   5.4  0.  ]
Reward: -1  Episode Reward:  17
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-252.84    5.4    -0.6     0.  ]
------
Step:4, Action:West
State  120
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.  0.  0.  5.4]
Reward: 9  Episode Reward:  26
xxxxx
xag x
x. .x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:5, Action:North
State  104
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-6180.6     0.      0.      0. ]
Reward: -10301  Episode Reward:  -10275
xxxxx
xg  x
x. .x
x. .x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[1.02 0.   5.4  0.  ]
------
Step:1, Action:East
State  192
Old Q Values:  [1.02 0.   5.4  0.  ]
New Q values:  [1.02 0.   7.56 0.  ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[0. 0. 0. 0.]
------
Step:2, Action:North
State  210
Old Q Values:  [5.4 0.  0.  0. ]
New Q values:  [7.56 0.   0.   0.  ]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[0. 0. 0. 0.]
------
Step:3, Action:North
State  130
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-180.6    0.     0.     0. ]
Reward: -301  Episode Reward:  -283
xxxxx
x..ax
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6    0.     0.     0. ]
------
Step:4, Action:South
State  138
Old Q Values:  [-1.8060e+02 -5.9946e+03 -1.8060e+02  5.1600e+00]
New Q values:  [-1.80600e+02 -8.39844e+03 -1.80600e+02  5.16000e+00]
Reward: -10001  Episode Reward:  -10284
xxxxx
x.. x
x  gx
x...x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[0. 0. 0. 0.]
------
Step:1, Action:North
State  260
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [5.4 0.  0.  0. ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[0. 0. 0. 0.]
------
Step:2, Action:North
State  180
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [5.4 0.  0.  0. ]
Reward: 9  Episode Reward:  18
xxxxx
xa..x
xg..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[0. 0. 0. 0.]
------
Step:3, Action:North
State  108
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-6180.6     0.      0.      0. ]
Reward: -10301  Episode Reward:  -10283
xxxxx
xg..x
x ..x
x  .x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600e+02 -8.39844e+03 -1.80600e+02  5.16000e+00]
------
Step:1, Action:West
State  136
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [    0.       0.       0.   -5992.98]
Reward: -9991  Episode Reward:  -9991
xxxxx
x.g x
x. .x
x...x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:1, Action:North
State  120
Old Q Values:  [0.  0.  0.  5.4]
New Q values:  [-6.17898e+03  0.00000e+00  0.00000e+00  5.40000e+00]
Reward: -10301  Episode Reward:  -10301
xxxxx
x.g x
x...x
x...x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[0. 0. 0. 0.]
------
Step:1, Action:North
State  216
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [5.4 0.  0.  0. ]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[    0.       0.       0.   -5992.98]
------
Step:2, Action:North
State  136
Old Q Values:  [    0.       0.       0.   -5992.98]
New Q values:  [-6180.6      0.       0.   -5992.98]
Reward: -10301  Episode Reward:  -10292
xxxxx
x .gx
x.. x
x...x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0. 0. 0. 0.]
------
Step:1, Action:North
State  103
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-180.6    0.     0.     0. ]
Reward: -301  Episode Reward:  -301
xxxxx
xa..x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6    0.     0.     0. ]
------
Step:2, Action:South
State  111
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.  5.4 0.  0. ]
Reward: 9  Episode Reward:  -292
xxxxx
x ..x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[0. 0. 0. 0.]
------
Step:3, Action:North
State  183
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-0.6  0.   0.   0. ]
Reward: -1  Episode Reward:  -293
xxxxx
xa..x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6    0.     0.     0. ]
------
Step:4, Action:South
State  110
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [ 0.  -0.6  0.   0. ]
Reward: -1  Episode Reward:  -294
xxxxx
x ..x
xa .x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[0. 0. 0. 0.]
------
Step:5, Action:North
State  180
Old Q Values:  [5.4 0.  0.  0. ]
New Q values:  [1.56 0.   0.   0.  ]
Reward: -1  Episode Reward:  -295
xxxxx
xa..x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ 0.  -0.6  0.   0. ]
------
Step:6, Action:North
State  110
Old Q Values:  [ 0.  -0.6  0.   0. ]
New Q values:  [-180.6   -0.6    0.     0. ]
Reward: -301  Episode Reward:  -596
xxxxx
xa..x
x  .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6   -0.6    0.     0. ]
------
Step:7, Action:East
State  110
Old Q Values:  [-180.6   -0.6    0.     0. ]
New Q values:  [-180.6    -0.6     7.02    0.  ]
Reward: 9  Episode Reward:  -587
xxxxx
x a.x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-252.84    5.4    -0.6     0.  ]
------
Step:8, Action:South
State  122
Old Q Values:  [-252.84    5.4    -0.6     0.  ]
New Q values:  [-2.52840e+02 -5.99844e+03 -6.00000e-01  0.00000e+00]
Reward: -10001  Episode Reward:  -10588
xxxxx
x  .x
x g.x
x...x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[0. 0. 0. 0.]
------
Step:1, Action:North
State  208
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-5994.6     0.      0.      0. ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x. gx
x.. x
x...x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:1, Action:North
State  272
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [7.668 0.    0.    0.   ]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x.a.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[1.02 0.   7.56 0.  ]
------
Step:2, Action:East
State  192
Old Q Values:  [1.02 0.   7.56 0.  ]
New Q values:  [1.02  0.    8.424 0.   ]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5994.6     0.      0.      0. ]
------
Step:3, Action:South
State  208
Old Q Values:  [-5994.6     0.      0.      0. ]
New Q values:  [-5.9946e+03  5.4000e+00  0.0000e+00  0.0000e+00]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[0. 0. 0. 0.]
------
Step:4, Action:East
State  288
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [   0.     0.  -180.6    0. ]
Reward: -301  Episode Reward:  -274
xxxxx
x.. x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   0.     0.  -180.6    0. ]
------
Step:5, Action:North
State  288
Old Q Values:  [   0.     0.  -180.6    0. ]
New Q values:  [   1.02    0.   -180.6     0.  ]
Reward: -1  Episode Reward:  -275
xxxxx
x.. x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5.9946e+03  5.4000e+00  0.0000e+00  0.0000e+00]
------
Step:6, Action:South
State  208
Old Q Values:  [-5.9946e+03  5.4000e+00  0.0000e+00  0.0000e+00]
New Q values:  [-5.9946e+03  1.8660e+00  0.0000e+00  0.0000e+00]
Reward: -1  Episode Reward:  -276
xxxxx
x.. x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   1.02    0.   -180.6     0.  ]
------
Step:7, Action:North
State  288
Old Q Values:  [   1.02    0.   -180.6     0.  ]
New Q values:  [   0.3678    0.     -180.6       0.    ]
Reward: -1  Episode Reward:  -277
xxxxx
x.g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5.9946e+03  1.8660e+00  0.0000e+00  0.0000e+00]
------
Step:8, Action:South
State  208
Old Q Values:  [-5.9946e+03  1.8660e+00  0.0000e+00  0.0000e+00]
New Q values:  [-5.9946e+03  2.5674e-01  0.0000e+00  0.0000e+00]
Reward: -1  Episode Reward:  -278
xxxxx
x.. x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   0.3678    0.     -180.6       0.    ]
------
Step:9, Action:North
State  288
Old Q Values:  [   0.3678    0.     -180.6       0.    ]
New Q values:  [-6000.375858     0.        -180.6          0.      ]
Reward: -10001  Episode Reward:  -10279
xxxxx
x.. x
x. gx
x.  x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[0. 0. 0. 0.]
------
Step:1, Action:North
State  180
Old Q Values:  [1.56 0.   0.   0.  ]
New Q values:  [8.13 0.   0.   0.  ]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6    -0.6     7.02    0.  ]
------
Step:2, Action:East
State  111
Old Q Values:  [0.  5.4 0.  0. ]
New Q values:  [0.  5.4 5.4 0. ]
Reward: 9  Episode Reward:  18
xxxxx
x a.x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[0. 0. 0. 0.]
------
Step:3, Action:North
State  123
Old Q Values:  [    0.  -6000.6     0.      0. ]
New Q values:  [ -180.6 -6000.6     0.      0. ]
Reward: -301  Episode Reward:  -283
xxxxx
x a.x
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -180.6 -6000.6     0.      0. ]
------
Step:4, Action:East
State  123
Old Q Values:  [ -180.6 -6000.6     0.      0. ]
New Q values:  [ -180.6   -6000.6       6.948     0.   ]
Reward: 9  Episode Reward:  -274
xxxxx
x  ax
x ..x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600e+02 -8.39844e+03 -1.80600e+02  5.16000e+00]
------
Step:5, Action:West
State  138
Old Q Values:  [-1.80600e+02 -8.39844e+03 -1.80600e+02  5.16000e+00]
New Q values:  [-1.80600e+02 -8.39844e+03 -1.80600e+02  3.54840e+00]
Reward: -1  Episode Reward:  -275
xxxxx
x a x
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -180.6   -6000.6       6.948     0.   ]
------
Step:6, Action:East
State  126
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.      0.      0.46452 0.     ]
Reward: -1  Episode Reward:  -276
xxxxx
x  ax
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600e+02 -8.39844e+03 -1.80600e+02  3.54840e+00]
------
Step:7, Action:West
State  136
Old Q Values:  [-6180.6      0.       0.   -5992.98]
New Q values:  [-6180.6       0.        0.    -8396.172]
Reward: -10001  Episode Reward:  -10277
xxxxx
x g x
x ..x
x ..x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[0. 0. 0. 0.]
------
Step:1, Action:North
State  109
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-180.6    0.     0.     0. ]
Reward: -301  Episode Reward:  -301
xxxxx
xag x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-180.6    0.     0.     0. ]
------
Step:2, Action:South
State  108
Old Q Values:  [-6180.6     0.      0.      0. ]
New Q values:  [-6180.6       7.839     0.        0.   ]
Reward: 9  Episode Reward:  -292
xxxxx
xg. x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[8.13 0.   0.   0.  ]
------
Step:3, Action:West
State  181
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [   0.     0.     0.  -180.6]
Reward: -301  Episode Reward:  -593
xxxxx
x g x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   0.     0.     0.  -180.6]
------
Step:4, Action:North
State  180
Old Q Values:  [8.13 0.   0.   0.  ]
New Q values:  [-5994.9963     0.         0.         0.    ]
Reward: -10001  Episode Reward:  -10594
xxxxx
xg. x
x ..x
x...x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[0. 0. 0. 0.]
------
Step:1, Action:North
State  261
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [5.4 0.  0.  0. ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.6  0.   0.   0. ]
------
Step:2, Action:South
State  181
Old Q Values:  [   0.     0.     0.  -180.6]
New Q values:  [   0.      1.02    0.   -180.6 ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5.4 0.  0.  0. ]
------
Step:3, Action:North
State  261
Old Q Values:  [5.4 0.  0.  0. ]
New Q values:  [1.866 0.    0.    0.   ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   0.      1.02    0.   -180.6 ]
------
Step:4, Action:South
State  181
Old Q Values:  [   0.      1.02    0.   -180.6 ]
New Q values:  [   0.        0.3678    0.     -180.6   ]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1.866 0.    0.    0.   ]
------
Step:5, Action:North
State  261
Old Q Values:  [1.866 0.    0.    0.   ]
New Q values:  [0.25674 0.      0.      0.     ]
Reward: -1  Episode Reward:  5
xxxxx
x..gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   0.        0.3678    0.     -180.6   ]
------
Step:6, Action:South
State  181
Old Q Values:  [   0.        0.3678    0.     -180.6   ]
New Q values:  [   0.         -0.375858    0.       -180.6     ]
Reward: -1  Episode Reward:  4
xxxxx
x...x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[0.25674 0.      0.      0.     ]
------
Step:7, Action:North
State  261
Old Q Values:  [0.25674 0.      0.      0.     ]
New Q values:  [-0.497304  0.        0.        0.      ]
Reward: -1  Episode Reward:  3
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   0.         -0.375858    0.       -180.6     ]
------
Step:8, Action:North
State  181
Old Q Values:  [   0.         -0.375858    0.       -180.6     ]
New Q values:  [   5.4        -0.375858    0.       -180.6     ]
Reward: 9  Episode Reward:  12
xxxxx
xag.x
x ..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-180.6    0.     0.     0. ]
------
Step:9, Action:South
State  103
Old Q Values:  [-180.6    0.     0.     0. ]
New Q values:  [-180.6     1.02    0.      0.  ]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   5.4        -0.375858    0.       -180.6     ]
------
Step:10, Action:North
State  181
Old Q Values:  [   5.4        -0.375858    0.       -180.6     ]
New Q values:  [   1.866      -0.375858    0.       -180.6     ]
Reward: -1  Episode Reward:  10
xxxxx
xa..x
x .gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6     1.02    0.      0.  ]
------
Step:11, Action:South
State  109
Old Q Values:  [-180.6    0.     0.     0. ]
New Q values:  [-1.806e+02 -4.020e-02  0.000e+00  0.000e+00]
Reward: -1  Episode Reward:  9
xxxxx
x .gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   1.866      -0.375858    0.       -180.6     ]
------
Step:12, Action:North
State  181
Old Q Values:  [   1.866      -0.375858    0.       -180.6     ]
New Q values:  [ 1.46400e-01 -3.75858e-01  0.00000e+00 -1.80600e+02]
Reward: -1  Episode Reward:  8
xxxxx
xag.x
x ..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.806e+02 -4.020e-02  0.000e+00  0.000e+00]
------
Step:13, Action:East
State  109
Old Q Values:  [-1.806e+02 -4.020e-02  0.000e+00  0.000e+00]
New Q values:  [-1.8060e+02 -4.0200e-02 -6.0006e+03  0.0000e+00]
Reward: -10001  Episode Reward:  -9993
xxxxx
xa.gx
x ..x
x  .x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5.9946e+03  2.5674e-01  0.0000e+00  0.0000e+00]
------
Step:1, Action:South
State  208
Old Q Values:  [-5.9946e+03  2.5674e-01  0.0000e+00  0.0000e+00]
New Q values:  [-5.994600e+03  5.502696e+00  0.000000e+00  0.000000e+00]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858     0.        -180.6          0.      ]
------
Step:2, Action:South
State  288
Old Q Values:  [-6000.375858     0.        -180.6          0.      ]
New Q values:  [-6000.375858  -180.6       -180.6          0.      ]
Reward: -301  Episode Reward:  -292
xxxxx
x. .x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858  -180.6       -180.6          0.      ]
------
Step:3, Action:West
State  288
Old Q Values:  [-6000.375858  -180.6       -180.6          0.      ]
New Q values:  [-6000.375858  -180.6       -180.6          7.7004  ]
Reward: 9  Episode Reward:  -283
xxxxx
x. .x
x.. x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[7.668 0.    0.    0.   ]
------
Step:4, Action:North
State  272
Old Q Values:  [7.668 0.    0.    0.   ]
New Q values:  [10.9944  0.      0.      0.    ]
Reward: 9  Episode Reward:  -274
xxxxx
x. .x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[1.02  0.    8.424 0.   ]
------
Step:5, Action:East
State  192
Old Q Values:  [1.02  0.    8.424 0.   ]
New Q values:  [1.02      0.        4.4204088 0.       ]
Reward: -1  Episode Reward:  -275
xxxxx
x. .x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5.994600e+03  5.502696e+00  0.000000e+00  0.000000e+00]
------
Step:6, Action:South
State  208
Old Q Values:  [-5.994600e+03  5.502696e+00  0.000000e+00  0.000000e+00]
New Q values:  [-5.9946000e+03  3.9111984e+00  0.0000000e+00  0.0000000e+00]
Reward: -1  Episode Reward:  -276
xxxxx
x. .x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858  -180.6       -180.6          7.7004  ]
------
Step:7, Action:West
State  288
Old Q Values:  [-6000.375858  -180.6       -180.6          7.7004  ]
New Q values:  [-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  2.48016000e+00]
Reward: -1  Episode Reward:  -277
xxxxx
x. .x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:8, Action:North
State  272
Old Q Values:  [10.9944  0.      0.      0.    ]
New Q values:  [3.79776 0.      0.      0.     ]
Reward: -1  Episode Reward:  -278
xxxxx
x. .x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[0. 0. 0. 0.]
------
Step:9, Action:North
State  194
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-0.6  0.   0.   0. ]
Reward: -1  Episode Reward:  -279
xxxxx
x.a.x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[0. 0. 0. 0.]
------
Step:10, Action:North
State  122
Old Q Values:  [-2.52840e+02 -5.99844e+03 -6.00000e-01  0.00000e+00]
New Q values:  [-2.81736e+02 -5.99844e+03 -6.00000e-01  0.00000e+00]
Reward: -301  Episode Reward:  -580
xxxxx
x.a.x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736e+02 -5.99844e+03 -6.00000e-01  0.00000e+00]
------
Step:11, Action:West
State  122
Old Q Values:  [-2.81736e+02 -5.99844e+03 -6.00000e-01  0.00000e+00]
New Q values:  [-2.81736e+02 -5.99844e+03 -6.00000e-01  5.40000e+00]
Reward: 9  Episode Reward:  -571
xxxxx
xa .x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[0. 0. 0. 0.]
------
Step:12, Action:North
State  102
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-180.6    0.     0.     0. ]
Reward: -301  Episode Reward:  -872
xxxxx
xa .x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6    0.     0.     0. ]
------
Step:13, Action:East
State  107
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.   0.   1.02 0.  ]
Reward: -1  Episode Reward:  -873
xxxxx
x a.x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736e+02 -5.99844e+03 -6.00000e-01  5.40000e+00]
------
Step:14, Action:West
State  122
Old Q Values:  [-2.81736e+02 -5.99844e+03 -6.00000e-01  5.40000e+00]
New Q values:  [-2.81736e+02 -5.99844e+03 -6.00000e-01  1.86600e+00]
Reward: -1  Episode Reward:  -874
xxxxx
xa .x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[0.   0.   1.02 0.  ]
------
Step:15, Action:East
State  102
Old Q Values:  [-180.6    0.     0.     0. ]
New Q values:  [-180.6    0.    -0.6    0. ]
Reward: -1  Episode Reward:  -875
xxxxx
x a.x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[0. 0. 0. 0.]
------
Step:16, Action:North
State  118
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-180.6    0.     0.     0. ]
Reward: -301  Episode Reward:  -1176
xxxxx
x a.x
xg  x
x.  x
xxxxx
Step:17, Action:South
State  122
Old Q Values:  [-2.81736e+02 -5.99844e+03 -6.00000e-01  1.86600e+00]
New Q values:  [-2.81736000e+02 -8.39864988e+03 -6.00000000e-01  1.86600000e+00]
Reward: -10001  Episode Reward:  -11177
xxxxx
x  .x
x.g x
x.  x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:1, Action:North
State  273
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [7.02 0.   0.   0.  ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[5.4 0.  0.  0. ]
------
Step:2, Action:North
State  195
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [7.4844 0.     0.     0.    ]
Reward: 9  Episode Reward:  18
xxxxx
x.a.x
x. .x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -180.6   -6000.6       6.948     0.   ]
------
Step:3, Action:East
State  123
Old Q Values:  [ -180.6   -6000.6       6.948     0.   ]
New Q values:  [ -180.6     -6000.6         9.24372     0.     ]
Reward: 9  Episode Reward:  27
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600e+02 -8.39844e+03 -1.80600e+02  3.54840e+00]
------
Step:4, Action:West
State  138
Old Q Values:  [-1.80600e+02 -8.39844e+03 -1.80600e+02  3.54840e+00]
New Q values:  [-1.80600e+02 -8.39844e+03 -1.80600e+02  1.37916e+00]
Reward: -1  Episode Reward:  26
xxxxx
x.a x
x.g.x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03 -6.00000000e-01  1.86600000e+00]
------
Step:5, Action:West
State  123
Old Q Values:  [ -180.6     -6000.6         9.24372     0.     ]
New Q values:  [-1.80600e+02 -6.00060e+03  9.24372e+00  5.70600e+00]
Reward: 9  Episode Reward:  35
xxxxx
xa  x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[0.   0.   1.02 0.  ]
------
Step:6, Action:East
State  107
Old Q Values:  [0.   0.   1.02 0.  ]
New Q values:  [0.       0.       2.581116 0.      ]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x. .x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-1.80600e+02 -6.00060e+03  9.24372e+00  5.70600e+00]
------
Step:7, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03 -6.00000000e-01  1.86600000e+00]
New Q values:  [-2.81736000e+02 -8.39864988e+03 -4.26252000e-01  1.86600000e+00]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x. .x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600e+02 -8.39844e+03 -1.80600e+02  1.37916e+00]
------
Step:8, Action:West
State  138
Old Q Values:  [-1.80600e+02 -8.39844e+03 -1.80600e+02  1.37916e+00]
New Q values:  [-1.80600e+02 -8.39844e+03 -1.80600e+02  5.11464e-01]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x.g.x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03 -4.26252000e-01  1.86600000e+00]
------
Step:9, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03 -4.26252000e-01  1.86600000e+00]
New Q values:  [-2.81736000e+02 -8.39864988e+03 -4.26252000e-01  9.20734800e-01]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
x. .x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[0.       0.       2.581116 0.      ]
------
Step:10, Action:East
State  106
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [ 0.          0.         -0.32377956  0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x. .x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03 -4.26252000e-01  9.20734800e-01]
------
Step:11, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03 -4.26252000e-01  9.20734800e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03 -4.26252000e-01  5.42628720e-01]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
x. .x
x.g x
xxxxx
Step:12, Action:North
State  107
Old Q Values:  [0.       0.       2.581116 0.      ]
New Q values:  [-179.8256652    0.           2.581116     0.       ]
Reward: -301  Episode Reward:  -272
xxxxx
xa  x
x. .x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-179.8256652    0.           2.581116     0.       ]
------
Step:13, Action:East
State  107
Old Q Values:  [-179.8256652    0.           2.581116     0.       ]
New Q values:  [-179.8256652     0.            0.59523502    0.        ]
Reward: -1  Episode Reward:  -273
xxxxx
x a x
x. .x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03 -4.26252000e-01  5.42628720e-01]
------
Step:14, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03 -4.26252000e-01  5.42628720e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03 -4.26252000e-01 -3.82948512e-01]
Reward: -1  Episode Reward:  -274
xxxxx
xa  x
x. .x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ 0.          0.         -0.32377956  0.        ]
------
Step:15, Action:North
State  107
Old Q Values:  [-179.8256652     0.            0.59523502    0.        ]
New Q values:  [-252.35169558    0.            0.59523502    0.        ]
Reward: -301  Episode Reward:  -575
xxxxx
xa  x
x. .x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    0.            0.59523502    0.        ]
------
Step:16, Action:East
State  107
Old Q Values:  [-252.35169558    0.            0.59523502    0.        ]
New Q values:  [-252.35169558    0.           -0.47679055    0.        ]
Reward: -1  Episode Reward:  -576
xxxxx
x a x
x.g.x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03 -4.26252000e-01 -3.82948512e-01]
------
Step:17, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03 -4.26252000e-01 -3.82948512e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03 -4.26252000e-01 -7.53179405e-01]
Reward: -1  Episode Reward:  -577
xxxxx
xa  x
x. .x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    0.           -0.47679055    0.        ]
------
Step:18, Action:South
State  110
Old Q Values:  [-180.6    -0.6     7.02    0.  ]
New Q values:  [-180.6     5.16    7.02    0.  ]
Reward: 9  Episode Reward:  -568
xxxxx
x   x
xa .x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[0. 0. 0. 0.]
------
Step:19, Action:North
State  180
Old Q Values:  [-5994.9963     0.         0.         0.    ]
New Q values:  [-2398.59852     0.          0.          0.     ]
Reward: -1  Episode Reward:  -569
xxxxx
xa  x
xg .x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ 0.          0.         -0.32377956  0.        ]
------
Step:20, Action:North
State  104
Old Q Values:  [-6180.6     0.      0.      0. ]
New Q values:  [-8652.84     0.       0.       0.  ]
Reward: -10301  Episode Reward:  -10870
xxxxx
xg  x
x  .x
x.  x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[5.4 0.  0.  0. ]
------
Step:1, Action:North
State  260
Old Q Values:  [5.4 0.  0.  0. ]
New Q values:  [-5992.44     0.       0.       0.  ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x ..x
xg..x
x ..x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1.46400e-01 -3.75858e-01  0.00000e+00 -1.80600e+02]
------
Step:1, Action:North
State  180
Old Q Values:  [-2398.59852     0.          0.          0.     ]
New Q values:  [-6951.687708     0.           0.           0.      ]
Reward: -9991  Episode Reward:  -9991
xxxxx
xg .x
x ..x
x...x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600e+02 -8.39844e+03 -1.80600e+02  5.11464e-01]
------
Step:1, Action:West
State  136
Old Q Values:  [-6180.6       0.        0.    -8396.172]
New Q values:  [-6180.6        0.         0.     -3353.0688]
Reward: 9  Episode Reward:  9
xxxxx
x.agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:2, Action:North
State  123
Old Q Values:  [-1.80600e+02 -6.00060e+03  9.24372e+00  5.70600e+00]
New Q values:  [-2.50066884e+02 -6.00060000e+03  9.24372000e+00  5.70600000e+00]
Reward: -301  Episode Reward:  -292
xxxxx
x.a x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.50066884e+02 -6.00060000e+03  9.24372000e+00  5.70600000e+00]
------
Step:3, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03 -4.26252000e-01 -7.53179405e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03 -6.17061600e-01 -7.53179405e-01]
Reward: -1  Episode Reward:  -293
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600e+02 -8.39844e+03 -1.80600e+02  5.11464e-01]
------
Step:4, Action:West
State  136
Old Q Values:  [-6180.6        0.         0.     -3353.0688]
New Q values:  [-6180.6         0.          0.      -7340.20752]
Reward: -10001  Episode Reward:  -10294
xxxxx
x.g x
x.. x
x...x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6         0.          0.      -7340.20752]
------
Step:1, Action:South
State  138
Old Q Values:  [-1.80600e+02 -8.39844e+03 -1.80600e+02  5.11464e-01]
New Q values:  [-1.806000e+02 -3.352356e+03 -1.806000e+02  5.114640e-01]
Reward: 9  Episode Reward:  9
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[5.4 0.  0.  0. ]
------
Step:2, Action:North
State  208
Old Q Values:  [-5.9946000e+03  3.9111984e+00  0.0000000e+00  0.0000000e+00]
New Q values:  [-2398.2865608     3.9111984     0.            0.       ]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.806000e+02 -3.352356e+03 -1.806000e+02  5.114640e-01]
------
Step:3, Action:West
State  136
Old Q Values:  [-6180.6         0.          0.      -7340.20752]
New Q values:  [-6180.6          0.           0.       -2936.683008]
Reward: -1  Episode Reward:  7
xxxxx
x.agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:4, Action:North
State  123
Old Q Values:  [-2.50066884e+02 -6.00060000e+03  9.24372000e+00  5.70600000e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  9.24372000e+00  5.70600000e+00]
Reward: -301  Episode Reward:  -294
xxxxx
x.a x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  9.24372000e+00  5.70600000e+00]
------
Step:5, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03 -6.17061600e-01 -7.53179405e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03 -6.93385440e-01 -7.53179405e-01]
Reward: -1  Episode Reward:  -295
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.806000e+02 -3.352356e+03 -1.806000e+02  5.114640e-01]
------
Step:6, Action:West
State  138
Old Q Values:  [-1.806000e+02 -3.352356e+03 -1.806000e+02  5.114640e-01]
New Q values:  [-1.8060000e+02 -3.3523560e+03 -1.8060000e+02  2.3777016e+00]
Reward: -1  Episode Reward:  -296
xxxxx
x.a x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  9.24372000e+00  5.70600000e+00]
------
Step:7, Action:East
State  121
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [    0.      0.  -6000.6     0. ]
Reward: -10001  Episode Reward:  -10297
xxxxx
x. gx
x.. x
x...x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[3.79776 0.      0.      0.     ]
------
Step:1, Action:North
State  272
Old Q Values:  [3.79776 0.      0.      0.     ]
New Q values:  [6.919104 0.       0.       0.      ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6  0.   0.   0. ]
------
Step:2, Action:East
State  195
Old Q Values:  [7.4844 0.     0.     0.    ]
New Q values:  [7.4844 0.     7.668  0.    ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.56 0.   0.   0.  ]
------
Step:3, Action:North
State  210
Old Q Values:  [7.56 0.   0.   0.  ]
New Q values:  [8.424 0.    0.    0.   ]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6    0.     0.     0. ]
------
Step:4, Action:South
State  130
Old Q Values:  [-180.6    0.     0.     0. ]
New Q values:  [-180.6       1.9272    0.        0.    ]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.424 0.    0.    0.   ]
------
Step:5, Action:North
State  208
Old Q Values:  [-2398.2865608     3.9111984     0.            0.       ]
New Q values:  [-959.33646432    3.9111984     0.            0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6       1.9272    0.        0.    ]
------
Step:6, Action:South
State  136
Old Q Values:  [-6180.6          0.           0.       -2936.683008]
New Q values:  [-6.18060000e+03  5.73359520e-01  0.00000000e+00 -2.93668301e+03]
Reward: -1  Episode Reward:  24
xxxxx
xg. x
x. ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-959.33646432    3.9111984     0.            0.        ]
------
Step:7, Action:South
State  208
Old Q Values:  [-959.33646432    3.9111984     0.            0.        ]
New Q values:  [-959.33646432    7.70852736    0.            0.        ]
Reward: 9  Episode Reward:  33
xxxxx
xg. x
x.  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  2.48016000e+00]
------
Step:8, Action:West
State  288
Old Q Values:  [-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  2.48016000e+00]
New Q values:  [-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  2.46779520e+00]
Reward: -1  Episode Reward:  32
xxxxx
x.. x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[6.919104 0.       0.       0.      ]
------
Step:9, Action:North
State  272
Old Q Values:  [6.919104 0.       0.       0.      ]
New Q values:  [-5996.50623576     0.             0.             0.        ]
Reward: -10001  Episode Reward:  -9969
xxxxx
x.. x
x.g x
x   x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.8060000e+02 -3.3523560e+03 -1.8060000e+02  2.3777016e+00]
------
Step:1, Action:West
State  138
Old Q Values:  [-1.8060000e+02 -3.3523560e+03 -1.8060000e+02  2.3777016e+00]
New Q values:  [ -180.6        -3352.356       -180.6            9.12419664]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  9.24372000e+00  5.70600000e+00]
------
Step:2, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03 -6.93385440e-01 -7.53179405e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03  1.85990482e+00 -7.53179405e-01]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3352.356       -180.6            9.12419664]
------
Step:3, Action:West
State  138
Old Q Values:  [ -180.6        -3352.356       -180.6            9.12419664]
New Q values:  [ -180.6       -3352.356      -180.6           3.6076501]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03  1.85990482e+00 -7.53179405e-01]
------
Step:4, Action:East
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  9.24372000e+00  5.70600000e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  4.17978303e+00  5.70600000e+00]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6       -3352.356      -180.6           3.6076501]
------
Step:5, Action:West
State  138
Old Q Values:  [ -180.6       -3352.356      -180.6           3.6076501]
New Q values:  [-1.80600000e+02 -3.35235600e+03 -1.80600000e+02  2.55486004e+00]
Reward: -1  Episode Reward:  5
xxxxx
x.a x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  4.17978303e+00  5.70600000e+00]
------
Step:6, Action:West
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  4.17978303e+00  5.70600000e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  4.17978303e+00  7.68240000e+00]
Reward: 9  Episode Reward:  14
xxxxx
xa  x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    0.           -0.47679055    0.        ]
------
Step:7, Action:South
State  109
Old Q Values:  [-1.8060e+02 -4.0200e-02 -6.0006e+03  0.0000e+00]
New Q values:  [-1.80600e+02  5.42784e+00 -6.00060e+03  0.00000e+00]
Reward: 9  Episode Reward:  23
xxxxx
x  gx
xa..x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1.46400e-01 -3.75858e-01  0.00000e+00 -1.80600e+02]
------
Step:8, Action:North
State  181
Old Q Values:  [ 1.46400e-01 -3.75858e-01  0.00000e+00 -1.80600e+02]
New Q values:  [   1.086912   -0.375858    0.       -180.6     ]
Reward: -1  Episode Reward:  22
xxxxx
xa gx
x ..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600e+02  5.42784e+00 -6.00060e+03  0.00000e+00]
------
Step:9, Action:South
State  111
Old Q Values:  [0.  5.4 5.4 0. ]
New Q values:  [0.        1.8860736 5.4       0.       ]
Reward: -1  Episode Reward:  21
xxxxx
x   x
xa.gx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   1.086912   -0.375858    0.       -180.6     ]
------
Step:10, Action:North
State  181
Old Q Values:  [   1.086912   -0.375858    0.       -180.6     ]
New Q values:  [   1.4631168   -0.375858     0.        -180.6      ]
Reward: -1  Episode Reward:  20
xxxxx
xa gx
x ..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600e+02  5.42784e+00 -6.00060e+03  0.00000e+00]
------
Step:11, Action:South
State  111
Old Q Values:  [0.        1.8860736 5.4       0.       ]
New Q values:  [0.         0.59336448 5.4        0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x   x
xa.gx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   1.4631168   -0.375858     0.        -180.6      ]
------
Step:12, Action:North
State  183
Old Q Values:  [-0.6  0.   0.   0. ]
New Q values:  [0.78 0.   0.   0.  ]
Reward: -1  Episode Reward:  18
xxxxx
xa  x
x ..x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         0.59336448 5.4        0.        ]
------
Step:13, Action:East
State  111
Old Q Values:  [0.         0.59336448 5.4        0.        ]
New Q values:  [0.         0.59336448 2.11797144 0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03  1.85990482e+00 -7.53179405e-01]
------
Step:14, Action:East
State  126
Old Q Values:  [0.      0.      0.46452 0.     ]
New Q values:  [0.         0.         0.35226601 0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
x g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -3.35235600e+03 -1.80600000e+02  2.55486004e+00]
------
Step:15, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -3.35235600e+03 -1.80600000e+02  2.55486004e+00]
New Q values:  [-1.80600000e+02 -3.35235600e+03 -1.80600000e+02  2.72666402e+00]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x .gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  4.17978303e+00  7.68240000e+00]
------
Step:16, Action:West
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  4.17978303e+00  7.68240000e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  4.17978303e+00  3.10835143e+00]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
x ..x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         0.59336448 2.11797144 0.        ]
------
Step:17, Action:East
State  111
Old Q Values:  [0.         0.59336448 2.11797144 0.        ]
New Q values:  [0.         0.59336448 1.50112349 0.        ]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x .gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  4.17978303e+00  3.10835143e+00]
------
Step:18, Action:East
State  121
Old Q Values:  [    0.      0.  -6000.6     0. ]
New Q values:  [    0.             0.         -8400.66799214     0.        ]
Reward: -10001  Episode Reward:  -9988
xxxxx
x  gx
x ..x
x. .x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600e+02  5.42784e+00 -6.00060e+03  0.00000e+00]
------
Step:1, Action:South
State  109
Old Q Values:  [-1.80600e+02  5.42784e+00 -6.00060e+03  0.00000e+00]
New Q values:  [ -180.6            8.01007104 -6000.6            0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x g x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   1.4631168   -0.375858     0.        -180.6      ]
------
Step:2, Action:North
State  181
Old Q Values:  [   1.4631168   -0.375858     0.        -180.6      ]
New Q values:  [   0.43558377   -0.375858      0.         -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
xa. x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         0.59336448 1.50112349 0.        ]
------
Step:3, Action:East
State  111
Old Q Values:  [0.         0.59336448 1.50112349 0.        ]
New Q values:  [0.         0.59336448 6.55842084 0.        ]
Reward: 9  Episode Reward:  17
xxxxx
x a x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03  1.85990482e+00 -7.53179405e-01]
------
Step:4, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03  1.85990482e+00 -7.53179405e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03  9.61961131e-01 -7.53179405e-01]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -3.35235600e+03 -1.80600000e+02  2.72666402e+00]
------
Step:5, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -3.35235600e+03 -1.80600000e+02  2.72666402e+00]
New Q values:  [-1.80600000e+02 -3.35235600e+03 -1.80600000e+02  7.79253946e-01]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03  9.61961131e-01 -7.53179405e-01]
------
Step:6, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03  9.61961131e-01 -7.53179405e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03  1.85606362e-02 -7.53179405e-01]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -3.35235600e+03 -1.80600000e+02  7.79253946e-01]
------
Step:7, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -3.35235600e+03 -1.80600000e+02  7.79253946e-01]
New Q values:  [-1.80600000e+02 -3.35235600e+03 -1.80600000e+02 -2.82730231e-01]
Reward: -1  Episode Reward:  13
xxxxx
x a x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03  1.85606362e-02 -7.53179405e-01]
------
Step:8, Action:East
State  120
Old Q Values:  [-6.17898e+03  0.00000e+00  0.00000e+00  5.40000e+00]
New Q values:  [-6.17898000e+03  0.00000000e+00 -4.27992144e-01  5.40000000e+00]
Reward: -1  Episode Reward:  12
xxxxx
xg ax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  5.73359520e-01  0.00000000e+00 -2.93668301e+03]
------
Step:9, Action:South
State  138
Old Q Values:  [-1.80600000e+02 -3.35235600e+03 -1.80600000e+02 -2.82730231e-01]
New Q values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -2.82730231e-01]
Reward: 9  Episode Reward:  21
xxxxx
x   x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[5.4 0.  0.  0. ]
------
Step:10, Action:North
State  216
Old Q Values:  [5.4 0.  0.  0. ]
New Q values:  [1.73200786 0.         0.         0.        ]
Reward: -1  Episode Reward:  20
xxxxx
xg ax
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  5.73359520e-01  0.00000000e+00 -2.93668301e+03]
------
Step:11, Action:South
State  136
Old Q Values:  [-6.18060000e+03  5.73359520e-01  0.00000000e+00 -2.93668301e+03]
New Q values:  [-6.18060000e+03  1.48946165e-01  0.00000000e+00 -2.93668301e+03]
Reward: -1  Episode Reward:  19
xxxxx
x g x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1.73200786 0.         0.         0.        ]
------
Step:12, Action:North
State  216
Old Q Values:  [1.73200786 0.         0.         0.        ]
New Q values:  [-5999.86251301     0.             0.             0.        ]
Reward: -10001  Episode Reward:  -9982
xxxxx
x  gx
x . x
x...x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576     0.             0.             0.        ]
------
Step:1, Action:South
State  272
Old Q Values:  [-5996.50623576     0.             0.             0.        ]
New Q values:  [-5996.50623576  -180.6            0.             0.        ]
Reward: -301  Episode Reward:  -301
xxxxx
x.g.x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6            0.             0.        ]
------
Step:2, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6            0.             0.        ]
New Q values:  [-5996.50623576  -180.6            6.14033856     0.        ]
Reward: 9  Episode Reward:  -292
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  2.46779520e+00]
------
Step:3, Action:West
State  288
Old Q Values:  [-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  2.46779520e+00]
New Q values:  [-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  2.22921965e+00]
Reward: -1  Episode Reward:  -293
xxxxx
x.g.x
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6            6.14033856     0.        ]
------
Step:4, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6            6.14033856     0.        ]
New Q values:  [-5.99650624e+03 -1.80600000e+02  2.52490132e+00  0.00000000e+00]
Reward: -1  Episode Reward:  -294
xxxxx
x...x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  2.22921965e+00]
------
Step:5, Action:West
State  288
Old Q Values:  [-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  2.22921965e+00]
New Q values:  [-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  1.04915825e+00]
Reward: -1  Episode Reward:  -295
xxxxx
x...x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5.99650624e+03 -1.80600000e+02  2.52490132e+00  0.00000000e+00]
------
Step:6, Action:East
State  272
Old Q Values:  [-5.99650624e+03 -1.80600000e+02  2.52490132e+00  0.00000000e+00]
New Q values:  [-5.99650624e+03 -1.80600000e+02  7.24708004e-01  0.00000000e+00]
Reward: -1  Episode Reward:  -296
xxxxx
x...x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  1.04915825e+00]
------
Step:7, Action:West
State  288
Old Q Values:  [-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  1.04915825e+00]
New Q values:  [-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  3.70757030e-02]
Reward: -1  Episode Reward:  -297
xxxxx
x...x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5.99650624e+03 -1.80600000e+02  7.24708004e-01  0.00000000e+00]
------
Step:8, Action:East
State  272
Old Q Values:  [-5.99650624e+03 -1.80600000e+02  7.24708004e-01  0.00000000e+00]
New Q values:  [-5.99650624e+03 -1.80600000e+02 -2.98994088e-01  0.00000000e+00]
Reward: -1  Episode Reward:  -298
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  3.70757030e-02]
------
Step:9, Action:West
State  288
Old Q Values:  [-6.00037586e+03 -1.80600000e+02 -1.80600000e+02  3.70757030e-02]
New Q values:  [-6.00037586e+03 -1.80600000e+02 -1.80600000e+02 -5.85169719e-01]
Reward: -1  Episode Reward:  -299
xxxxx
x.g.x
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5.99650624e+03 -1.80600000e+02 -2.98994088e-01  0.00000000e+00]
------
Step:10, Action:West
State  272
Old Q Values:  [-5.99650624e+03 -1.80600000e+02 -2.98994088e-01  0.00000000e+00]
New Q values:  [-5.99650624e+03 -1.80600000e+02 -2.98994088e-01  5.40000000e+00]
Reward: 9  Episode Reward:  -290
xxxxx
x...x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[-0.497304  0.        0.        0.      ]
------
Step:11, Action:South
State  261
Old Q Values:  [-0.497304  0.        0.        0.      ]
New Q values:  [  -0.497304 -180.6         0.          0.      ]
Reward: -301  Episode Reward:  -591
xxxxx
x...x
x. .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  -0.497304 -180.6         0.          0.      ]
------
Step:12, Action:East
State  260
Old Q Values:  [-5992.44     0.       0.       0.  ]
New Q values:  [-5992.44     0.   -6000.6      0.  ]
Reward: -10001  Episode Reward:  -10592
xxxxx
x...x
x. .x
xg  x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[5.4 0.  0.  0. ]
------
Step:1, Action:North
State  193
Old Q Values:  [5.4 0.  0.  0. ]
New Q values:  [7.56 0.   0.   0.  ]
Reward: 9  Episode Reward:  9
xxxxx
x.agx
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8400.66799214     0.        ]
------
Step:2, Action:North
State  120
Old Q Values:  [-6.17898000e+03  0.00000000e+00 -4.27992144e-01  5.40000000e+00]
New Q values:  [-8.65057200e+03  0.00000000e+00 -4.27992144e-01  5.40000000e+00]
Reward: -10301  Episode Reward:  -10292
xxxxx
x.g.x
x.  x
x...x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  4.17978303e+00  3.10835143e+00]
------
Step:1, Action:East
State  121
Old Q Values:  [    0.             0.         -8400.66799214     0.        ]
New Q values:  [    0.             0.         -9354.82251301     0.        ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x. gx
x.. x
x...x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6.00037586e+03 -1.80600000e+02 -1.80600000e+02 -5.85169719e-01]
------
Step:1, Action:West
State  288
Old Q Values:  [-6.00037586e+03 -1.80600000e+02 -1.80600000e+02 -5.85169719e-01]
New Q values:  [-6000.375858    -180.6         -180.6            6.78593211]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5.99650624e+03 -1.80600000e+02 -2.98994088e-01  5.40000000e+00]
------
Step:2, Action:West
State  272
Old Q Values:  [-5.99650624e+03 -1.80600000e+02 -2.98994088e-01  5.40000000e+00]
New Q values:  [-5.99650624e+03 -1.80600000e+02 -2.98994088e-01  7.56000000e+00]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5992.44     0.   -6000.6      0.  ]
------
Step:3, Action:South
State  257
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [   0.  -180.6    0.     0. ]
Reward: -301  Episode Reward:  -283
xxxxx
x. .x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[   0.  -180.6    0.     0. ]
------
Step:4, Action:North
State  261
Old Q Values:  [  -0.497304 -180.6         0.          0.      ]
New Q values:  [   5.4350784 -180.6          0.           0.       ]
Reward: 9  Episode Reward:  -274
xxxxx
x. .x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[0.78 0.   0.   0.  ]
------
Step:5, Action:North
State  183
Old Q Values:  [0.78 0.   0.   0.  ]
New Q values:  [7.67952625 0.         0.         0.        ]
Reward: 9  Episode Reward:  -265
xxxxx
xa .x
x ..x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         0.59336448 6.55842084 0.        ]
------
Step:6, Action:East
State  111
Old Q Values:  [0.         0.59336448 6.55842084 0.        ]
New Q values:  [0.         0.59336448 2.02336834 0.        ]
Reward: -1  Episode Reward:  -266
xxxxx
x a.x
x ..x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[-180.6    0.     0.     0. ]
------
Step:7, Action:South
State  119
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.  5.4 0.  0. ]
Reward: 9  Episode Reward:  -257
xxxxx
x  .x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[0. 0. 0. 0.]
------
Step:8, Action:North
State  197
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [1.02 0.   0.   0.  ]
Reward: -1  Episode Reward:  -258
xxxxx
x a.x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SE
[0.  5.4 0.  0. ]
------
Step:9, Action:South
State  119
Old Q Values:  [0.  5.4 0.  0. ]
New Q values:  [0.   1.56 0.   0.  ]
Reward: -1  Episode Reward:  -259
xxxxx
x  .x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[0. 0. 0. 0.]
------
Step:10, Action:North
State  198
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-0.6  0.   0.   0. ]
Reward: -1  Episode Reward:  -260
xxxxx
x a.x
x  .x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[-180.6    0.     0.     0. ]
------
Step:11, Action:South
State  118
Old Q Values:  [-180.6    0.     0.     0. ]
New Q values:  [ -180.6 -6000.6     0.      0. ]
Reward: -10001  Episode Reward:  -10261
xxxxx
x  .x
x g.x
x   x
xxxxx
xxxxx
x.g.x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   5.4350784 -180.6          0.           0.       ]
------
Step:1, Action:North
State  261
Old Q Values:  [   5.4350784 -180.6          0.           0.       ]
New Q values:  [   7.70470649 -180.6           0.            0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   0.43558377   -0.375858      0.         -180.6       ]
------
Step:2, Action:North
State  183
Old Q Values:  [7.67952625 0.         0.         0.        ]
New Q values:  [9.078821 0.       0.       0.      ]
Reward: 9  Episode Reward:  18
xxxxx
xa .x
x ..x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         0.59336448 2.02336834 0.        ]
------
Step:3, Action:East
State  110
Old Q Values:  [-180.6     5.16    7.02    0.  ]
New Q values:  [-180.6           5.16          2.21356819    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x a.x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03  1.85606362e-02 -7.53179405e-01]
------
Step:4, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03  1.85606362e-02 -7.53179405e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03  5.32260519e+00 -7.53179405e-01]
Reward: 9  Episode Reward:  26
xxxxx
x  ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -2.82730231e-01]
------
Step:5, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -2.82730231e-01]
New Q values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  8.83689463e-01]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03  5.32260519e+00 -7.53179405e-01]
------
Step:6, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03  5.32260519e+00 -7.53179405e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03  1.79414891e+00 -7.53179405e-01]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  8.83689463e-01]
------
Step:7, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  8.83689463e-01]
New Q values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  2.91720459e-01]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03  1.79414891e+00 -7.53179405e-01]
------
Step:8, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03  1.79414891e+00 -7.53179405e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03  2.05175703e-01 -7.53179405e-01]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  2.91720459e-01]
------
Step:9, Action:West
State  136
Old Q Values:  [-6.18060000e+03  1.48946165e-01  0.00000000e+00 -2.93668301e+03]
New Q values:  [-6.18060000e+03  1.48946165e-01  0.00000000e+00 -1.17365320e+03]
Reward: -1  Episode Reward:  21
xxxxx
xga x
x ..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-8.65057200e+03  0.00000000e+00 -4.27992144e-01  5.40000000e+00]
------
Step:10, Action:South
State  124
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.  5.4 0.  0. ]
Reward: 9  Episode Reward:  30
xxxxx
x g x
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[0. 0. 0. 0.]
------
Step:11, Action:East
State  200
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.  0.  5.4 0. ]
Reward: 9  Episode Reward:  39
xxxxx
x   x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301     0.             0.             0.        ]
------
Step:12, Action:South
State  216
Old Q Values:  [-5999.86251301     0.             0.             0.        ]
New Q values:  [-5999.86251301     7.43577963     0.             0.        ]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x  gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6         -180.6            6.78593211]
------
Step:13, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6         -180.6            6.78593211]
New Q values:  [-6000.375858    -180.6         -180.6        60010.22037284]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x   x
x agx
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            8.01007104 -6000.6            0.        ]
------
Step:1, Action:South
State  111
Old Q Values:  [0.         0.59336448 2.02336834 0.        ]
New Q values:  [0.         5.76802092 2.02336834 0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x  .x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   0.43558377   -0.375858      0.         -180.6       ]
------
Step:2, Action:North
State  181
Old Q Values:  [   0.43558377   -0.375858      0.         -180.6       ]
New Q values:  [   1.97725482   -0.375858      0.         -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
xag.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            8.01007104 -6000.6            0.        ]
------
Step:3, Action:South
State  111
Old Q Values:  [0.         5.76802092 2.02336834 0.        ]
New Q values:  [0.         2.30038481 2.02336834 0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x  .x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   1.97725482   -0.375858      0.         -180.6       ]
------
Step:4, Action:North
State  183
Old Q Values:  [9.078821 0.       0.       0.      ]
New Q values:  [3.72164384 0.         0.         0.        ]
Reward: -1  Episode Reward:  6
xxxxx
xa .x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         2.30038481 2.02336834 0.        ]
------
Step:5, Action:South
State  111
Old Q Values:  [0.         2.30038481 2.02336834 0.        ]
New Q values:  [0.         1.43664708 2.02336834 0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x  .x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[3.72164384 0.         0.         0.        ]
------
Step:6, Action:North
State  183
Old Q Values:  [3.72164384 0.         0.         0.        ]
New Q values:  [1.49566804 0.         0.         0.        ]
Reward: -1  Episode Reward:  4
xxxxx
xa .x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         1.43664708 2.02336834 0.        ]
------
Step:7, Action:East
State  111
Old Q Values:  [0.         1.43664708 2.02336834 0.        ]
New Q values:  [0.         1.43664708 1.46328224 0.        ]
Reward: -1  Episode Reward:  3
xxxxx
x a.x
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  4.17978303e+00  3.10835143e+00]
------
Step:8, Action:East
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  4.17978303e+00  3.10835143e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  7.15942935e+00  3.10835143e+00]
Reward: 9  Episode Reward:  12
xxxxx
x  ax
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  2.91720459e-01]
------
Step:9, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  2.91720459e-01]
New Q values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -4.21759105e-01]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03  2.05175703e-01 -7.53179405e-01]
------
Step:10, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03  2.05175703e-01 -7.53179405e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03 -6.44457450e-01 -7.53179405e-01]
Reward: -1  Episode Reward:  10
xxxxx
x  ax
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -4.21759105e-01]
------
Step:11, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -4.21759105e-01]
New Q values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -9.62040877e-01]
Reward: -1  Episode Reward:  9
xxxxx
x a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03 -6.44457450e-01 -7.53179405e-01]
------
Step:12, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03 -6.44457450e-01 -7.53179405e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03 -1.14639524e+00 -7.53179405e-01]
Reward: -1  Episode Reward:  8
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -9.62040877e-01]
------
Step:13, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -9.62040877e-01]
New Q values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -1.21077017e+00]
Reward: -1  Episode Reward:  7
xxxxx
x a x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03 -1.14639524e+00 -7.53179405e-01]
------
Step:14, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03 -1.14639524e+00 -7.53179405e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03 -1.14639524e+00  6.46728238e-01]
Reward: -1  Episode Reward:  6
xxxxx
xa  x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6           5.16          2.21356819    0.        ]
------
Step:15, Action:East
State  111
Old Q Values:  [0.         1.43664708 1.46328224 0.        ]
New Q values:  [0.         1.43664708 0.17933137 0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x a x
x g.x
x...x
xxxxx
Step:16, Action:West
State  120
Old Q Values:  [-8.65057200e+03  0.00000000e+00 -4.27992144e-01  5.40000000e+00]
New Q values:  [-8.65057200e+03  0.00000000e+00 -4.27992144e-01  3.96302131e+00]
Reward: -1  Episode Reward:  4
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            8.01007104 -6000.6            0.        ]
------
Step:17, Action:South
State  111
Old Q Values:  [0.         1.43664708 0.17933137 0.        ]
New Q values:  [ 0.         -0.02534117  0.17933137  0.        ]
Reward: -1  Episode Reward:  3
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[0. 0. 0. 0.]
------
Step:18, Action:North
State  181
Old Q Values:  [   1.97725482   -0.375858      0.         -180.6       ]
New Q values:  [   2.59392324   -0.375858      0.         -180.6       ]
Reward: -1  Episode Reward:  2
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            8.01007104 -6000.6            0.        ]
------
Step:19, Action:South
State  111
Old Q Values:  [ 0.         -0.02534117  0.17933137  0.        ]
New Q values:  [ 0.         -0.61013647  0.17933137  0.        ]
Reward: -1  Episode Reward:  1
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[0. 0. 0. 0.]
------
Step:20, Action:North
State  189
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-0.54620059  0.          0.          0.        ]
Reward: -1  Episode Reward:  0
xxxxx
xa  x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[ 0.         -0.61013647  0.17933137  0.        ]
------
Step:21, Action:East
State  109
Old Q Values:  [ -180.6            8.01007104 -6000.6            0.        ]
New Q values:  [ -180.6            8.01007104 -2400.84           0.        ]
Reward: -1  Episode Reward:  -1
xxxxx
x agx
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9354.82251301     0.        ]
------
Step:22, Action:North
State  120
Old Q Values:  [-8.65057200e+03  0.00000000e+00 -4.27992144e-01  3.96302131e+00]
New Q values:  [-9.63963989e+03  0.00000000e+00 -4.27992144e-01  3.96302131e+00]
Reward: -10301  Episode Reward:  -10302
xxxxx
x g x
x ..x
x...x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-959.33646432    7.70852736    0.            0.        ]
------
Step:1, Action:South
State  208
Old Q Values:  [-959.33646432    7.70852736    0.            0.        ]
New Q values:  [ -959.33646432 18011.5495228      0.             0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6         -180.6        60010.22037284]
------
Step:2, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6         -180.6        60010.22037284]
New Q values:  [-6000.375858    -180.6         -180.6        24011.75614914]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.. x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5.99650624e+03 -1.80600000e+02 -2.98994088e-01  7.56000000e+00]
------
Step:3, Action:East
State  272
Old Q Values:  [-5.99650624e+03 -1.80600000e+02 -2.98994088e-01  7.56000000e+00]
New Q values:  [-5996.50623576  -180.6         7202.80724711     7.56      ]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6         -180.6        24011.75614914]
------
Step:4, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6         -180.6        24011.75614914]
New Q values:  [-6000.375858    -180.6         -180.6        11764.94463379]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x.. x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6         7202.80724711     7.56      ]
------
Step:5, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6         7202.80724711     7.56      ]
New Q values:  [-5996.50623576  -180.6         6410.00628898     7.56      ]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6         -180.6        11764.94463379]
------
Step:6, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6         -180.6        11764.94463379]
New Q values:  [-6000.375858    -180.6         -180.6         6628.37974021]
Reward: -1  Episode Reward:  14
xxxxx
xg..x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6         6410.00628898     7.56      ]
------
Step:7, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6         6410.00628898     7.56      ]
New Q values:  [-5996.50623576  -180.6         4551.91643765     7.56      ]
Reward: -1  Episode Reward:  13
xxxxx
x g.x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6         -180.6         6628.37974021]
------
Step:8, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6         -180.6         6628.37974021]
New Q values:  [-6000.375858    -180.6         -180.6         4016.32682738]
Reward: -1  Episode Reward:  12
xxxxx
xg..x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6         4551.91643765     7.56      ]
------
Step:9, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6         4551.91643765     7.56      ]
New Q values:  [-5996.50623576  -180.6         3025.06462328     7.56      ]
Reward: -1  Episode Reward:  11
xxxxx
x g.x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6         -180.6         4016.32682738]
------
Step:10, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6         -180.6         4016.32682738]
New Q values:  [-6000.375858    -180.6         -180.6         2513.45011793]
Reward: -1  Episode Reward:  10
xxxxx
xg..x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6         3025.06462328     7.56      ]
------
Step:11, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6         3025.06462328     7.56      ]
New Q values:  [-5996.50623576  -180.6         1963.46088469     7.56      ]
Reward: -1  Episode Reward:  9
xxxxx
x g.x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6         -180.6         2513.45011793]
------
Step:12, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6         -180.6         2513.45011793]
New Q values:  [-6000.375858    -180.6         -180.6         1593.81831258]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6         1963.46088469     7.56      ]
------
Step:13, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6         1963.46088469     7.56      ]
New Q values:  [-5996.50623576  -180.6         1262.92984765     7.56      ]
Reward: -1  Episode Reward:  7
xxxxx
x g.x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6         -180.6         1593.81831258]
------
Step:14, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6         -180.6         1593.81831258]
New Q values:  [-6000.375858    -180.6         -180.6         1015.80627933]
Reward: -1  Episode Reward:  6
xxxxx
xg..x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6         1262.92984765     7.56      ]
------
Step:15, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6         1262.92984765     7.56      ]
New Q values:  [-5996.50623576  -180.6          809.31382286     7.56      ]
Reward: -1  Episode Reward:  5
xxxxx
x g.x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6         -180.6         1015.80627933]
------
Step:16, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6         -180.6         1015.80627933]
New Q values:  [-6000.375858    -180.6         -180.6          648.51665859]
Reward: -1  Episode Reward:  4
xxxxx
xg..x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6          809.31382286     7.56      ]
------
Step:17, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6          809.31382286     7.56      ]
New Q values:  [-5996.50623576  -180.6          517.68052672     7.56      ]
Reward: -1  Episode Reward:  3
xxxxx
x ..x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6         -180.6          648.51665859]
------
Step:18, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6         -180.6          648.51665859]
New Q values:  [-6000.375858    -180.6         -180.6          414.11082145]
Reward: -1  Episode Reward:  2
xxxxx
x ..x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6          517.68052672     7.56      ]
------
Step:19, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6          517.68052672     7.56      ]
New Q values:  [-5996.50623576  -180.6          330.70545712     7.56      ]
Reward: -1  Episode Reward:  1
xxxxx
x g.x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6         -180.6          414.11082145]
------
Step:20, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6         -180.6          414.11082145]
New Q values:  [-6000.375858    -180.6         -180.6          264.25596572]
Reward: -1  Episode Reward:  0
xxxxx
x ..x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6          330.70545712     7.56      ]
------
Step:21, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6          330.70545712     7.56      ]
New Q values:  [-5996.50623576  -180.6          210.95897256     7.56      ]
Reward: -1  Episode Reward:  -1
xxxxx
x ..x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6         -180.6          264.25596572]
------
Step:22, Action:East
State  288
Old Q Values:  [-6000.375858    -180.6         -180.6          264.25596572]
New Q values:  [-6000.375858    -180.6        -6173.56321028   264.25596572]
Reward: -10301  Episode Reward:  -10302
xxxxx
x ..x
x.. x
x. gx
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  1.48946165e-01  0.00000000e+00 -1.17365320e+03]
------
Step:1, Action:South
State  136
Old Q Values:  [-6.18060000e+03  1.48946165e-01  0.00000000e+00 -1.17365320e+03]
New Q values:  [-6180.6            7.69031236     0.         -1173.6532032 ]
Reward: 9  Episode Reward:  9
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301     7.43577963     0.             0.        ]
------
Step:2, Action:South
State  208
Old Q Values:  [ -959.33646432 18011.5495228      0.             0.        ]
New Q values:  [-959.33646432 7289.29659883    0.            0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6        -6173.56321028   264.25596572]
------
Step:3, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6        -6173.56321028   264.25596572]
New Q values:  [-6000.375858    -180.6        -6173.56321028   174.39007806]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6          210.95897256     7.56      ]
------
Step:4, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6          210.95897256     7.56      ]
New Q values:  [-5996.50623576  -180.6          136.10061244     7.56      ]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6        -6173.56321028   174.39007806]
------
Step:5, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6        -6173.56321028   174.39007806]
New Q values:  [-6000.375858    -180.6        -6173.56321028   109.98621496]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5996.50623576  -180.6          136.10061244     7.56      ]
------
Step:6, Action:East
State  272
Old Q Values:  [-5996.50623576  -180.6          136.10061244     7.56      ]
New Q values:  [-5996.50623576  -180.6           86.83610946     7.56      ]
Reward: -1  Episode Reward:  24
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6        -6173.56321028   109.98621496]
------
Step:7, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6        -6173.56321028   109.98621496]
New Q values:  [-6000.375858    -180.6        -6173.56321028    45.50048598]
Reward: -1  Episode Reward:  23
xxxxx
x. gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[7.02 0.   0.   0.  ]
------
Step:8, Action:North
State  272
Old Q Values:  [-5996.50623576  -180.6           86.83610946     7.56      ]
New Q values:  [-2391.87637166  -180.6           86.83610946     7.56      ]
Reward: 9  Episode Reward:  32
xxxxx
x.g x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[1.02      0.        4.4204088 0.       ]
------
Step:9, Action:East
State  193
Old Q Values:  [7.56 0.   0.   0.  ]
New Q values:  [   7.56          0.         2186.18897965    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x. gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-959.33646432 7289.29659883    0.            0.        ]
------
Step:10, Action:South
State  208
Old Q Values:  [-959.33646432 7289.29659883    0.            0.        ]
New Q values:  [-959.33646432 2928.76878533    0.            0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6        -6173.56321028    45.50048598]
------
Step:11, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6        -6173.56321028    45.50048598]
New Q values:  [-6000.375858    -180.6        -6173.56321028    43.65102723]
Reward: -1  Episode Reward:  29
xxxxx
xg  x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6           86.83610946     7.56      ]
------
Step:12, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6           86.83610946     7.56      ]
New Q values:  [-2391.87637166  -180.6           47.22975196     7.56      ]
Reward: -1  Episode Reward:  28
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6        -6173.56321028    43.65102723]
------
Step:13, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6        -6173.56321028    43.65102723]
New Q values:  [-6000.375858    -180.6        -6173.56321028    31.02933648]
Reward: -1  Episode Reward:  27
xxxxx
xg  x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6           47.22975196     7.56      ]
------
Step:14, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6           47.22975196     7.56      ]
New Q values:  [-2391.87637166  -180.6           27.60070173     7.56      ]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6        -6173.56321028    31.02933648]
------
Step:15, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6        -6173.56321028    31.02933648]
New Q values:  [-6000.375858    -180.6        -6173.56321028    20.09194511]
Reward: -1  Episode Reward:  25
xxxxx
xg  x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6           27.60070173     7.56      ]
------
Step:16, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6           27.60070173     7.56      ]
New Q values:  [-2391.87637166  -180.6           16.46786422     7.56      ]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6        -6173.56321028    20.09194511]
------
Step:17, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6        -6173.56321028    20.09194511]
New Q values:  [-6000.375858    -180.6        -6173.56321028    12.37713731]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x.  x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6           16.46786422     7.56      ]
------
Step:18, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6           16.46786422     7.56      ]
New Q values:  [-2391.87637166  -180.6            9.70028688     7.56      ]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6        -6173.56321028    12.37713731]
------
Step:19, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6        -6173.56321028    12.37713731]
New Q values:  [-6000.375858    -180.6        -6173.56321028     7.26094099]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6            9.70028688     7.56      ]
------
Step:20, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6            9.70028688     7.56      ]
New Q values:  [-2391.87637166  -180.6            5.45839705     7.56      ]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858    -180.6        -6173.56321028     7.26094099]
------
Step:21, Action:West
State  288
Old Q Values:  [-6000.375858    -180.6        -6173.56321028     7.26094099]
New Q values:  [-6.00037586e+03 -1.80600000e+02 -6.17356321e+03  4.57237640e+00]
Reward: -1  Episode Reward:  19
xxxxx
xg  x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6            5.45839705     7.56      ]
------
Step:22, Action:West
State  272
Old Q Values:  [-2391.87637166  -180.6            5.45839705     7.56      ]
New Q values:  [-2391.87637166  -180.6            5.45839705     8.424     ]
Reward: 9  Episode Reward:  28
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[0. 0. 0. 0.]
------
Step:23, Action:East
State  256
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.     0.     1.9272 0.    ]
Reward: -1  Episode Reward:  27
xxxxx
x.  x
x.  x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6            5.45839705     8.424     ]
------
Step:24, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6            5.45839705     8.424     ]
New Q values:  [-2391.87637166  -180.6            2.95507174     8.424     ]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6.00037586e+03 -1.80600000e+02 -6.17356321e+03  4.57237640e+00]
------
Step:25, Action:West
State  288
Old Q Values:  [-6.00037586e+03 -1.80600000e+02 -6.17356321e+03  4.57237640e+00]
New Q values:  [-6.00037586e+03 -1.80600000e+02 -6.17356321e+03  3.75615056e+00]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x.  x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6            2.95507174     8.424     ]
------
Step:26, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6            2.95507174     8.424     ]
New Q values:  [-2.39187637e+03 -1.80600000e+02  1.70887386e+00  8.42400000e+00]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x.  x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6.00037586e+03 -1.80600000e+02 -6.17356321e+03  3.75615056e+00]
------
Step:27, Action:South
State  288
Old Q Values:  [-6.00037586e+03 -1.80600000e+02 -6.17356321e+03  3.75615056e+00]
New Q values:  [-6.00037586e+03 -6.25171315e+03 -6.17356321e+03  3.75615056e+00]
Reward: -10301  Episode Reward:  -10277
xxxxx
x.  x
x.  x
x  gx
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   7.70470649 -180.6           0.            0.        ]
------
Step:1, Action:North
State  261
Old Q Values:  [   7.70470649 -180.6           0.            0.        ]
New Q values:  [   9.26005957 -180.6           0.            0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   2.59392324   -0.375858      0.         -180.6       ]
------
Step:2, Action:North
State  181
Old Q Values:  [   2.59392324   -0.375858      0.         -180.6       ]
New Q values:  [   8.84059061   -0.375858      0.         -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
xag.x
x . x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            8.01007104 -2400.84           0.        ]
------
Step:3, Action:South
State  103
Old Q Values:  [-180.6     1.02    0.      0.  ]
New Q values:  [-180.6           2.46017718    0.            0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   8.84059061   -0.375858      0.         -180.6       ]
------
Step:4, Action:North
State  183
Old Q Values:  [1.49566804 0.         0.         0.        ]
New Q values:  [0.73632037 0.         0.         0.        ]
Reward: -1  Episode Reward:  16
xxxxx
xa..x
x . x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6           2.46017718    0.            0.        ]
------
Step:5, Action:South
State  103
Old Q Values:  [-180.6           2.46017718    0.            0.        ]
New Q values:  [-180.6           3.03624806    0.            0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   8.84059061   -0.375858      0.         -180.6       ]
------
Step:6, Action:North
State  180
Old Q Values:  [-6951.687708     0.           0.           0.      ]
New Q values:  [-2779.7270832     0.            0.            0.       ]
Reward: -1  Episode Reward:  14
xxxxx
xa..x
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6           5.16          2.21356819    0.        ]
------
Step:7, Action:East
State  111
Old Q Values:  [ 0.         -0.61013647  0.17933137  0.        ]
New Q values:  [ 0.         -0.61013647  5.47173255  0.        ]
Reward: 9  Episode Reward:  23
xxxxx
x a.x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[ -180.6 -6000.6     0.      0. ]
------
Step:8, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03 -1.14639524e+00  6.46728238e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03  4.57821085e+00  6.46728238e-01]
Reward: 9  Episode Reward:  32
xxxxx
x  ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -1.21077017e+00]
------
Step:9, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -1.21077017e+00]
New Q values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  2.89155186e-01]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03  4.57821085e+00  6.46728238e-01]
------
Step:10, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03  4.57821085e+00  6.46728238e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03  1.31803090e+00  6.46728238e-01]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x . x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  2.89155186e-01]
------
Step:11, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  2.89155186e-01]
New Q values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -8.89286566e-02]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.39864988e+03  1.31803090e+00  6.46728238e-01]
------
Step:12, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03  1.31803090e+00  6.46728238e-01]
New Q values:  [-2.81736000e+02 -8.39864988e+03 -9.94662385e-02  6.46728238e-01]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -8.89286566e-02]
------
Step:13, Action:West
State  136
Old Q Values:  [-6180.6            7.69031236     0.         -1173.6532032 ]
New Q values:  [-6180.6            7.69031236     0.          -468.87237489]
Reward: -1  Episode Reward:  27
xxxxx
xga x
x . x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9.63963989e+03  0.00000000e+00 -4.27992144e-01  3.96302131e+00]
------
Step:14, Action:South
State  120
Old Q Values:  [-9.63963989e+03  0.00000000e+00 -4.27992144e-01  3.96302131e+00]
New Q values:  [-9.63963989e+03  7.02000000e+00 -4.27992144e-01  3.96302131e+00]
Reward: 9  Episode Reward:  36
xxxxx
x g x
x a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[0.  0.  5.4 0. ]
------
Step:15, Action:East
State  200
Old Q Values:  [0.  0.  5.4 0. ]
New Q values:  [0.         0.         3.79073389 0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301     7.43577963     0.             0.        ]
------
Step:16, Action:South
State  216
Old Q Values:  [-5999.86251301     7.43577963     0.             0.        ]
New Q values:  [-5999.86251301     9.50115702     0.             0.        ]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x  gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6.00037586e+03 -6.25171315e+03 -6.17356321e+03  3.75615056e+00]
------
Step:17, Action:West
State  288
Old Q Values:  [-6.00037586e+03 -6.25171315e+03 -6.17356321e+03  3.75615056e+00]
New Q values:  [-6000.375858   -6251.71315483 -6173.56321028 60009.00846022]
Reward: 100009  Episode Reward:  100053
xxxxx
x  gx
x   x
x a x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9.63963989e+03  7.02000000e+00 -4.27992144e-01  3.96302131e+00]
------
Step:1, Action:South
State  122
Old Q Values:  [-2.81736000e+02 -8.39864988e+03 -9.94662385e-02  6.46728238e-01]
New Q values:  [-2.81736000e+02 -3.35405995e+03 -9.94662385e-02  6.46728238e-01]
Reward: 9  Episode Reward:  9
xxxxx
x  .x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[0. 0. 0. 0.]
------
Step:2, Action:North
State  196
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-0.40598153  0.          0.          0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -9.94662385e-02  6.46728238e-01]
------
Step:3, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03 -9.94662385e-02  6.46728238e-01]
New Q values:  [-2.81736000e+02 -3.35405995e+03 -9.94662385e-02 -3.41308705e-01]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    0.           -0.47679055    0.        ]
------
Step:4, Action:South
State  111
Old Q Values:  [ 0.         -0.61013647  5.47173255  0.        ]
New Q values:  [0.         7.8081226  5.47173255 0.        ]
Reward: 9  Episode Reward:  16
xxxxx
x  .x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   8.84059061   -0.375858      0.         -180.6       ]
------
Step:5, Action:North
State  189
Old Q Values:  [-0.54620059  0.          0.          0.        ]
New Q values:  [1.52395654 0.         0.         0.        ]
Reward: -1  Episode Reward:  15
xxxxx
xa .x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         7.8081226  5.47173255 0.        ]
------
Step:6, Action:South
State  111
Old Q Values:  [0.         7.8081226  5.47173255 0.        ]
New Q values:  [0.         2.74414515 5.47173255 0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x  .x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[0.73632037 0.         0.         0.        ]
------
Step:7, Action:North
State  189
Old Q Values:  [1.52395654 0.         0.         0.        ]
New Q values:  [1.65110238 0.         0.         0.        ]
Reward: -1  Episode Reward:  13
xxxxx
xa .x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         2.74414515 5.47173255 0.        ]
------
Step:8, Action:East
State  111
Old Q Values:  [0.         2.74414515 5.47173255 0.        ]
New Q values:  [0.         2.74414515 3.73652182 0.        ]
Reward: -1  Episode Reward:  12
xxxxx
x a.x
x  .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  7.15942935e+00  3.10835143e+00]
------
Step:9, Action:East
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  7.15942935e+00  3.10835143e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  8.23709314e+00  3.10835143e+00]
Reward: 9  Episode Reward:  21
xxxxx
x  ax
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -8.89286566e-02]
------
Step:10, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -8.89286566e-02]
New Q values:  [ -180.6        -1333.9224      -180.6            1.83555648]
Reward: -1  Episode Reward:  20
xxxxx
x a x
x  .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  8.23709314e+00  3.10835143e+00]
------
Step:11, Action:East
State  126
Old Q Values:  [0.         0.         0.35226601 0.        ]
New Q values:  [0.         0.         0.09157335 0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -1333.9224      -180.6            1.83555648]
------
Step:12, Action:West
State  138
Old Q Values:  [ -180.6        -1333.9224      -180.6            1.83555648]
New Q values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  1.61694597e-01]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x  .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[0.         0.         0.09157335 0.        ]
------
Step:13, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03 -9.94662385e-02 -3.41308705e-01]
New Q values:  [-2.81736000e+02 -3.35405995e+03 -5.91278116e-01 -3.41308705e-01]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  1.61694597e-01]
------
Step:14, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02  1.61694597e-01]
New Q values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -6.37714773e-01]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x g.x
x...x
xxxxx
Step:15, Action:South
State  120
Old Q Values:  [-9.63963989e+03  7.02000000e+00 -4.27992144e-01  3.96302131e+00]
New Q values:  [-9.63963989e+03 -5.99568600e+03 -4.27992144e-01  3.96302131e+00]
Reward: -10001  Episode Reward:  -9985
xxxxx
x g x
x  .x
x...x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6  0.   0.   0. ]
------
Step:1, Action:East
State  195
Old Q Values:  [7.4844 0.     7.668  0.    ]
New Q values:  [ 7.4844  0.     10.9944  0.    ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.424 0.    0.    0.   ]
------
Step:2, Action:North
State  208
Old Q Values:  [-959.33646432 2928.76878533    0.            0.        ]
New Q values:  [-378.52590016 2928.76878533    0.            0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -6.37714773e-01]
------
Step:3, Action:West
State  136
Old Q Values:  [-6180.6            7.69031236     0.          -468.87237489]
New Q values:  [-6180.6            7.69031236     0.          -182.14894995]
Reward: 9  Episode Reward:  27
xxxxx
x.agx
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9354.82251301     0.        ]
------
Step:4, Action:North
State  120
Old Q Values:  [-9.63963989e+03 -5.99568600e+03 -4.27992144e-01  3.96302131e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03 -4.27992144e-01  3.96302131e+00]
Reward: -10301  Episode Reward:  -10274
xxxxx
x.g x
x.  x
x. .x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -5.91278116e-01 -3.41308705e-01]
------
Step:1, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03 -5.91278116e-01 -3.41308705e-01]
New Q values:  [-2.81736000e+02 -3.35405995e+03 -5.91278116e-01  6.81147652e+00]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6           5.16          2.21356819    0.        ]
------
Step:2, Action:East
State  110
Old Q Values:  [-180.6           5.16          2.21356819    0.        ]
New Q values:  [-180.6           5.16          2.32887023    0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -5.91278116e-01  6.81147652e+00]
------
Step:3, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03 -5.91278116e-01  6.81147652e+00]
New Q values:  [-2.81736000e+02 -3.35405995e+03 -5.91278116e-01  3.24554715e+00]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         2.74414515 3.73652182 0.        ]
------
Step:4, Action:East
State  110
Old Q Values:  [-180.6           5.16          2.32887023    0.        ]
New Q values:  [-180.6           5.16          1.30521224    0.        ]
Reward: -1  Episode Reward:  6
xxxxx
x a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -5.91278116e-01  3.24554715e+00]
------
Step:5, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03 -5.91278116e-01  3.24554715e+00]
New Q values:  [-2.81736000e+02 -3.35405995e+03 -5.91278116e-01  2.24621886e+00]
Reward: -1  Episode Reward:  5
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6           5.16          1.30521224    0.        ]
------
Step:6, Action:East
State  111
Old Q Values:  [0.         2.74414515 3.73652182 0.        ]
New Q values:  [0.         2.74414515 1.56847439 0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x a.x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -5.91278116e-01  2.24621886e+00]
------
Step:7, Action:West
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03 -4.27992144e-01  3.96302131e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03 -4.27992144e-01  3.38822984e+00]
Reward: -1  Episode Reward:  3
xxxxx
xag.x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            8.01007104 -2400.84           0.        ]
------
Step:8, Action:South
State  108
Old Q Values:  [-6180.6       7.839     0.        0.   ]
New Q values:  [-6180.6        8.5356     0.         0.    ]
Reward: 9  Episode Reward:  12
xxxxx
xg .x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2779.7270832     0.            0.            0.       ]
------
Step:9, Action:South
State  181
Old Q Values:  [   8.84059061   -0.375858      0.         -180.6       ]
New Q values:  [   8.84059061    2.02767467    0.         -180.6       ]
Reward: -1  Episode Reward:  11
xxxxx
x g.x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   9.26005957 -180.6           0.            0.        ]
------
Step:10, Action:North
State  261
Old Q Values:  [   9.26005957 -180.6           0.            0.        ]
New Q values:  [   5.75620101 -180.6           0.            0.        ]
Reward: -1  Episode Reward:  10
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   8.84059061    2.02767467    0.         -180.6       ]
------
Step:11, Action:North
State  181
Old Q Values:  [   8.84059061    2.02767467    0.         -180.6       ]
New Q values:  [   3.75947979    2.02767467    0.         -180.6       ]
Reward: -1  Episode Reward:  9
xxxxx
xa .x
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         2.74414515 1.56847439 0.        ]
------
Step:12, Action:South
State  109
Old Q Values:  [ -180.6            8.01007104 -2400.84           0.        ]
New Q values:  [ -180.6            3.73187235 -2400.84           0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   3.75947979    2.02767467    0.         -180.6       ]
------
Step:13, Action:North
State  181
Old Q Values:  [   3.75947979    2.02767467    0.         -180.6       ]
New Q values:  [   2.02335362    2.02767467    0.         -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
xag.x
x ..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            3.73187235 -2400.84           0.        ]
------
Step:14, Action:South
State  109
Old Q Values:  [ -180.6            3.73187235 -2400.84           0.        ]
New Q values:  [-1.80600000e+02  1.50105134e+00 -2.40084000e+03  0.00000000e+00]
Reward: -1  Episode Reward:  6
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   2.02335362    2.02767467    0.         -180.6       ]
------
Step:15, Action:South
State  181
Old Q Values:  [   2.02335362    2.02767467    0.         -180.6       ]
New Q values:  [   2.02335362    1.93793017    0.         -180.6       ]
Reward: -1  Episode Reward:  5
xxxxx
x  .x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   5.75620101 -180.6           0.            0.        ]
------
Step:16, Action:North
State  261
Old Q Values:  [   5.75620101 -180.6           0.            0.        ]
New Q values:  [   1.92337651 -180.6           0.            0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x  .x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[0.73632037 0.         0.         0.        ]
------
Step:17, Action:North
State  181
Old Q Values:  [   2.02335362    1.93793017    0.         -180.6       ]
New Q values:  [   1.03258499    1.93793017    0.         -180.6       ]
Reward: -1  Episode Reward:  3
xxxxx
xa .x
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         2.74414515 1.56847439 0.        ]
------
Step:18, Action:South
State  111
Old Q Values:  [0.         2.74414515 1.56847439 0.        ]
New Q values:  [0.         1.07903711 1.56847439 0.        ]
Reward: -1  Episode Reward:  2
xxxxx
x  .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   1.03258499    1.93793017    0.         -180.6       ]
------
Step:19, Action:South
State  183
Old Q Values:  [0.73632037 0.         0.         0.        ]
New Q values:  [ 0.73632037 -0.02298705  0.          0.        ]
Reward: -1  Episode Reward:  1
xxxxx
x  .x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   1.92337651 -180.6           0.            0.        ]
------
Step:20, Action:North
State  260
Old Q Values:  [-5992.44     0.   -6000.6      0.  ]
New Q values:  [-2397.576     0.    -6000.6       0.   ]
Reward: -1  Episode Reward:  0
xxxxx
x  .x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[0. 0. 0. 0.]
------
Step:21, Action:North
State  183
Old Q Values:  [ 0.73632037 -0.02298705  0.          0.        ]
New Q values:  [ 0.16507046 -0.02298705  0.          0.        ]
Reward: -1  Episode Reward:  -1
xxxxx
xa .x
x ..x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         1.07903711 1.56847439 0.        ]
------
Step:22, Action:East
State  111
Old Q Values:  [0.         1.07903711 1.56847439 0.        ]
New Q values:  [0.         1.07903711 0.05486176 0.        ]
Reward: -1  Episode Reward:  -2
xxxxx
x a.x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[0.         0.         0.09157335 0.        ]
------
Step:23, Action:East
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03 -4.27992144e-01  3.38822984e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03  7.53589685e+00  3.38822984e+00]
Reward: 9  Episode Reward:  7
xxxxx
x gax
x ..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6            7.69031236     0.          -182.14894995]
------
Step:24, Action:South
State  136
Old Q Values:  [-6180.6            7.69031236     0.          -182.14894995]
New Q values:  [-6180.6           11.32647205     0.          -182.14894995]
Reward: 9  Episode Reward:  16
xxxxx
xg  x
x .ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301     9.50115702     0.             0.        ]
------
Step:25, Action:South
State  208
Old Q Values:  [-378.52590016 2928.76878533    0.            0.        ]
New Q values:  [ -378.52590016 19179.6100522      0.             0.        ]
Reward: 9  Episode Reward:  25
xxxxx
x   x
xg. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858   -6251.71315483 -6173.56321028 60009.00846022]
------
Step:26, Action:West
State  288
Old Q Values:  [-6000.375858   -6251.71315483 -6173.56321028 60009.00846022]
New Q values:  [-6000.375858   -6251.71315483 -6173.56321028 24011.53058409]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2.39187637e+03 -1.80600000e+02  1.70887386e+00  8.42400000e+00]
------
Step:27, Action:East
State  272
Old Q Values:  [-2.39187637e+03 -1.80600000e+02  1.70887386e+00  8.42400000e+00]
New Q values:  [-2391.87637166  -180.6         7203.54272477     8.424     ]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6000.375858   -6251.71315483 -6173.56321028 24011.53058409]
------
Step:28, Action:North
State  288
Old Q Values:  [-6000.375858   -6251.71315483 -6173.56321028 24011.53058409]
New Q values:  [ 3353.13267246 -6251.71315483 -6173.56321028 24011.53058409]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -378.52590016 19179.6100522      0.             0.        ]
------
Step:29, Action:South
State  208
Old Q Values:  [ -378.52590016 19179.6100522      0.             0.        ]
New Q values:  [ -378.52590016 14874.70319611     0.             0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3353.13267246 -6251.71315483 -6173.56321028 24011.53058409]
------
Step:30, Action:West
State  288
Old Q Values:  [ 3353.13267246 -6251.71315483 -6173.56321028 24011.53058409]
New Q values:  [ 3353.13267246 -6251.71315483 -6173.56321028 11765.07505107]
Reward: -1  Episode Reward:  30
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6         7203.54272477     8.424     ]
------
Step:31, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6         7203.54272477     8.424     ]
New Q values:  [-2391.87637166  -180.6         6410.33960523     8.424     ]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3353.13267246 -6251.71315483 -6173.56321028 11765.07505107]
------
Step:32, Action:West
State  288
Old Q Values:  [ 3353.13267246 -6251.71315483 -6173.56321028 11765.07505107]
New Q values:  [ 3353.13267246 -6251.71315483 -6173.56321028  6628.531902  ]
Reward: -1  Episode Reward:  28
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6         6410.33960523     8.424     ]
------
Step:33, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6         6410.33960523     8.424     ]
New Q values:  [-2391.87637166  -180.6         4552.09541269     8.424     ]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3353.13267246 -6251.71315483 -6173.56321028  6628.531902  ]
------
Step:34, Action:West
State  288
Old Q Values:  [ 3353.13267246 -6251.71315483 -6173.56321028  6628.531902  ]
New Q values:  [ 3353.13267246 -6251.71315483 -6173.56321028  4016.44138461]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6         4552.09541269     8.424     ]
------
Step:35, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6         4552.09541269     8.424     ]
New Q values:  [-2391.87637166  -180.6         3025.17058046     8.424     ]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3353.13267246 -6251.71315483 -6173.56321028  4016.44138461]
------
Step:36, Action:North
State  288
Old Q Values:  [ 3353.13267246 -6251.71315483 -6173.56321028  4016.44138461]
New Q values:  [ 1343.18026898 -6251.71315483 -6173.56321028  4016.44138461]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.424 0.    0.    0.   ]
------
Step:37, Action:North
State  208
Old Q Values:  [ -378.52590016 14874.70319611     0.             0.        ]
New Q values:  [ -152.2016745  14874.70319611     0.             0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -6.37714773e-01]
------
Step:38, Action:West
State  136
Old Q Values:  [-6180.6           11.32647205     0.          -182.14894995]
New Q values:  [-6180.6           11.32647205     0.           -71.19881093]
Reward: -1  Episode Reward:  22
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-1.00352671e+04 -5.99568600e+03  7.53589685e+00  3.38822984e+00]
------
Step:39, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03 -5.91278116e-01  2.24621886e+00]
New Q values:  [-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  2.24621886e+00]
Reward: -1  Episode Reward:  21
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -6.37714773e-01]
------
Step:40, Action:West
State  136
Old Q Values:  [-6180.6           11.32647205     0.           -71.19881093]
New Q values:  [-6180.6           11.32647205     0.           -26.81875532]
Reward: -1  Episode Reward:  20
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-1.00352671e+04 -5.99568600e+03  7.53589685e+00  3.38822984e+00]
------
Step:41, Action:East
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03  7.53589685e+00  3.38822984e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03  5.81230035e+00  3.38822984e+00]
Reward: -1  Episode Reward:  19
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           11.32647205     0.           -26.81875532]
------
Step:42, Action:South
State  136
Old Q Values:  [-6180.6           11.32647205     0.           -26.81875532]
New Q values:  [-6180.6         4466.34154765     0.           -26.81875532]
Reward: -1  Episode Reward:  18
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -152.2016745  14874.70319611     0.             0.        ]
------
Step:43, Action:South
State  208
Old Q Values:  [ -152.2016745  14874.70319611     0.             0.        ]
New Q values:  [-152.2016745  7154.21369382    0.            0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1343.18026898 -6251.71315483 -6173.56321028  4016.44138461]
------
Step:44, Action:West
State  288
Old Q Values:  [ 1343.18026898 -6251.71315483 -6173.56321028  4016.44138461]
New Q values:  [ 1343.18026898 -6251.71315483 -6173.56321028  2513.52772798]
Reward: -1  Episode Reward:  16
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6         3025.17058046     8.424     ]
------
Step:45, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6         3025.17058046     8.424     ]
New Q values:  [-2391.87637166  -180.6         1963.52655058     8.424     ]
Reward: -1  Episode Reward:  15
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1343.18026898 -6251.71315483 -6173.56321028  2513.52772798]
------
Step:46, Action:West
State  288
Old Q Values:  [ 1343.18026898 -6251.71315483 -6173.56321028  2513.52772798]
New Q values:  [ 1343.18026898 -6251.71315483 -6173.56321028  1593.86905636]
Reward: -1  Episode Reward:  14
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6         1963.52655058     8.424     ]
------
Step:47, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6         1963.52655058     8.424     ]
New Q values:  [-2391.87637166  -180.6         1262.97133714     8.424     ]
Reward: -1  Episode Reward:  13
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1343.18026898 -6251.71315483 -6173.56321028  1593.86905636]
------
Step:48, Action:West
State  288
Old Q Values:  [ 1343.18026898 -6251.71315483 -6173.56321028  1593.86905636]
New Q values:  [ 1343.18026898 -6251.71315483 -6173.56321028  1015.83902369]
Reward: -1  Episode Reward:  12
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6         1262.97133714     8.424     ]
------
Step:49, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6         1262.97133714     8.424     ]
New Q values:  [-2391.87637166  -180.6          907.54261555     8.424     ]
Reward: -1  Episode Reward:  11
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1343.18026898 -6251.71315483 -6173.56321028  1015.83902369]
------
Step:50, Action:North
State  288
Old Q Values:  [ 1343.18026898 -6251.71315483 -6173.56321028  1015.83902369]
New Q values:  [  539.19930759 -6251.71315483 -6173.56321028  1015.83902369]
Reward: -1  Episode Reward:  10
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.424 0.    0.    0.   ]
------
Step:51, Action:North
State  208
Old Q Values:  [-152.2016745  7154.21369382    0.            0.        ]
New Q values:  [ -61.67198423 7154.21369382    0.            0.        ]
Reward: -1  Episode Reward:  9
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -6.37714773e-01]
------
Step:52, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -6.37714773e-01]
New Q values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -1.81220251e-01]
Reward: -1  Episode Reward:  8
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  2.24621886e+00]
------
Step:53, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  2.24621886e+00]
New Q values:  [-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  6.22198678e-01]
Reward: -1  Episode Reward:  7
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         1.07903711 0.05486176 0.        ]
------
Step:54, Action:South
State  99
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [ 0.  -0.6  0.   0. ]
Reward: -1  Episode Reward:  6
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:55, Action:North
State  181
Old Q Values:  [   1.03258499    1.93793017    0.         -180.6       ]
New Q values:  [   0.2633494     1.93793017    0.         -180.6       ]
Reward: -1  Episode Reward:  5
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02  1.50105134e+00 -2.40084000e+03  0.00000000e+00]
------
Step:56, Action:South
State  99
Old Q Values:  [ 0.  -0.6  0.   0. ]
New Q values:  [ 0.   -0.84  0.    0.  ]
Reward: -1  Episode Reward:  4
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:57, Action:North
State  180
Old Q Values:  [-2779.7270832     0.            0.            0.       ]
New Q values:  [-1110.94283328     0.             0.             0.        ]
Reward: -1  Episode Reward:  3
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6           5.16          1.30521224    0.        ]
------
Step:58, Action:East
State  110
Old Q Values:  [-180.6           5.16          1.30521224    0.        ]
New Q values:  [-1.80600000e+02  5.16000000e+00  1.08744499e-01  0.00000000e+00]
Reward: -1  Episode Reward:  2
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  6.22198678e-01]
------
Step:59, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  6.22198678e-01]
New Q values:  [-2.81736000e+02 -3.35405995e+03 -1.02782568e+00 -2.74093955e-02]
Reward: -1  Episode Reward:  1
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         1.07903711 0.05486176 0.        ]
------
Step:60, Action:South
State  99
Old Q Values:  [ 0.   -0.84  0.    0.  ]
New Q values:  [ 0.    -0.936  0.     0.   ]
Reward: -1  Episode Reward:  0
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:61, Action:North
State  183
Old Q Values:  [ 0.16507046 -0.02298705  0.          0.        ]
New Q values:  [-0.21026068 -0.02298705  0.          0.        ]
Reward: -1  Episode Reward:  -1
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         1.07903711 0.05486176 0.        ]
------
Step:62, Action:South
State  111
Old Q Values:  [0.         1.07903711 0.05486176 0.        ]
New Q values:  [ 0.         -0.16838516  0.05486176  0.        ]
Reward: -1  Episode Reward:  -2
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.21026068 -0.02298705  0.          0.        ]
------
Step:63, Action:East
State  177
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [    0.            0.        60661.2566939     0.       ]
Reward: 100009  Episode Reward:  100007
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -1.81220251e-01]
------
Step:1, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -1.33392240e+03 -1.80600000e+02 -1.81220251e-01]
New Q values:  [ -180.6        -1333.9224      -180.6            5.31928908]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -1.02782568e+00 -2.74093955e-02]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03 -1.02782568e+00 -2.74093955e-02]
New Q values:  [-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  6.93703624e+00]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02  5.16000000e+00  1.08744499e-01  0.00000000e+00]
------
Step:3, Action:East
State  107
Old Q Values:  [-252.35169558    0.           -0.47679055    0.        ]
New Q values:  [-252.35169558    0.            1.29039465    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  6.93703624e+00]
------
Step:4, Action:West
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  8.23709314e+00  3.10835143e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  8.23709314e+00  1.03045897e+00]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    0.            1.29039465    0.        ]
------
Step:5, Action:East
State  107
Old Q Values:  [-252.35169558    0.            1.29039465    0.        ]
New Q values:  [-252.35169558    0.            1.99726873    0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  6.93703624e+00]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  6.93703624e+00]
New Q values:  [-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  3.72281450e+00]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02  5.16000000e+00  1.08744499e-01  0.00000000e+00]
------
Step:7, Action:East
State  108
Old Q Values:  [-6180.6        8.5356     0.         0.    ]
New Q values:  [-6.18060000e+03  8.53560000e+00  1.14369011e+00  0.00000000e+00]
Reward: -1  Episode Reward:  13
xxxxx
xga x
x...x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-1.00352671e+04 -5.99568600e+03  5.81230035e+00  3.38822984e+00]
------
Step:8, Action:East
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03  5.81230035e+00  3.38822984e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03  1.34162738e+03  3.38822984e+00]
Reward: -1  Episode Reward:  12
xxxxx
x gax
x...x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6         4466.34154765     0.           -26.81875532]
------
Step:9, Action:South
State  138
Old Q Values:  [ -180.6        -1333.9224      -180.6            5.31928908]
New Q values:  [-180.6        -525.31861289 -180.6           5.31928908]
Reward: 9  Episode Reward:  21
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301     9.50115702     0.             0.        ]
------
Step:10, Action:South
State  208
Old Q Values:  [ -61.67198423 7154.21369382    0.            0.        ]
New Q values:  [ -61.67198423 3171.83718464    0.            0.        ]
Reward: 9  Episode Reward:  30
xxxxx
x g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  539.19930759 -6251.71315483 -6173.56321028  1015.83902369]
------
Step:11, Action:West
State  288
Old Q Values:  [  539.19930759 -6251.71315483 -6173.56321028  1015.83902369]
New Q values:  [  539.19930759 -6251.71315483 -6173.56321028   677.99839414]
Reward: -1  Episode Reward:  29
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6          907.54261555     8.424     ]
------
Step:12, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6          907.54261555     8.424     ]
New Q values:  [-2391.87637166  -180.6          565.81656446     8.424     ]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  539.19930759 -6251.71315483 -6173.56321028   677.99839414]
------
Step:13, Action:West
State  288
Old Q Values:  [  539.19930759 -6251.71315483 -6173.56321028   677.99839414]
New Q values:  [  539.19930759 -6251.71315483 -6173.56321028   440.344327  ]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x.. x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6          565.81656446     8.424     ]
------
Step:14, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6          565.81656446     8.424     ]
New Q values:  [-2391.87637166  -180.6          387.48641806     8.424     ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  539.19930759 -6251.71315483 -6173.56321028   440.344327  ]
------
Step:15, Action:North
State  288
Old Q Values:  [  539.19930759 -6251.71315483 -6173.56321028   440.344327  ]
New Q values:  [  217.60692304 -6251.71315483 -6173.56321028   440.344327  ]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.424 0.    0.    0.   ]
------
Step:16, Action:North
State  208
Old Q Values:  [ -61.67198423 3171.83718464    0.            0.        ]
New Q values:  [ -23.67300697 3171.83718464    0.            0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -525.31861289 -180.6           5.31928908]
------
Step:17, Action:West
State  138
Old Q Values:  [-180.6        -525.31861289 -180.6           5.31928908]
New Q values:  [-180.6        -525.31861289 -180.6           2.64455998]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  3.72281450e+00]
------
Step:18, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  3.72281450e+00]
New Q values:  [-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  8.89125799e-01]
Reward: -1  Episode Reward:  22
xxxxx
xa  x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ 0.          0.         -0.32377956  0.        ]
------
Step:19, Action:North
State  106
Old Q Values:  [ 0.          0.         -0.32377956  0.        ]
New Q values:  [-180.6           0.           -0.32377956    0.        ]
Reward: -301  Episode Reward:  -279
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-180.6           0.           -0.32377956    0.        ]
------
Step:20, Action:South
State  107
Old Q Values:  [-252.35169558    0.            1.99726873    0.        ]
New Q values:  [-252.35169558    5.4           1.99726873    0.        ]
Reward: 9  Episode Reward:  -270
xxxxx
x   x
xa. x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.21026068 -0.02298705  0.          0.        ]
------
Step:21, Action:East
State  178
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.  0.  5.4 0. ]
Reward: 9  Episode Reward:  -261
xxxxx
x   x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6  0.   0.   0. ]
------
Step:22, Action:South
State  202
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [    0.         -5884.35407458     0.             0.        ]
Reward: -10001  Episode Reward:  -10262
xxxxx
x   x
x   x
x.g x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.21026068 -0.02298705  0.          0.        ]
------
Step:1, Action:East
State  181
Old Q Values:  [   0.2633494     1.93793017    0.         -180.6       ]
New Q values:  [ 2.63349400e-01  1.93793017e+00 -5.99327388e+03 -1.80600000e+02]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x g.x
x. .x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  217.60692304 -6251.71315483 -6173.56321028   440.344327  ]
------
Step:1, Action:West
State  288
Old Q Values:  [  217.60692304 -6251.71315483 -6173.56321028   440.344327  ]
New Q values:  [  217.60692304 -6251.71315483 -6173.56321028   297.78365622]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2391.87637166  -180.6          387.48641806     8.424     ]
------
Step:2, Action:East
State  272
Old Q Values:  [-2391.87637166  -180.6          387.48641806     8.424     ]
New Q values:  [-2391.87637166  -180.6          243.72966409     8.424     ]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x ..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  217.60692304 -6251.71315483 -6173.56321028   297.78365622]
------
Step:3, Action:West
State  288
Old Q Values:  [  217.60692304 -6251.71315483 -6173.56321028   297.78365622]
New Q values:  [  217.60692304 -6251.71315483 -6173.56321028   120.61946249]
Reward: -1  Episode Reward:  7
xxxxx
x..gx
x ..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[7.02 0.   0.   0.  ]
------
Step:4, Action:North
State  272
Old Q Values:  [-2391.87637166  -180.6          243.72966409     8.424     ]
New Q values:  [-951.35054867 -180.6         243.72966409    8.424     ]
Reward: 9  Episode Reward:  16
xxxxx
x.g.x
x a.x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-0.40598153  0.          0.          0.        ]
------
Step:5, Action:South
State  192
Old Q Values:  [1.02      0.        4.4204088 0.       ]
New Q values:  [ 1.02       72.51889923  4.4204088   0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6         243.72966409    8.424     ]
------
Step:6, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6         243.72966409    8.424     ]
New Q values:  [-951.35054867 -180.6         162.17394255    8.424     ]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  217.60692304 -6251.71315483 -6173.56321028   120.61946249]
------
Step:7, Action:North
State  288
Old Q Values:  [  217.60692304 -6251.71315483 -6173.56321028   120.61946249]
New Q values:  [ 1043.99392461 -6251.71315483 -6173.56321028   120.61946249]
Reward: 9  Episode Reward:  23
xxxxx
x...x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -23.67300697 3171.83718464    0.            0.        ]
------
Step:8, Action:South
State  208
Old Q Values:  [ -23.67300697 3171.83718464    0.            0.        ]
New Q values:  [ -23.67300697 1581.33305124    0.            0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x...x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1043.99392461 -6251.71315483 -6173.56321028   120.61946249]
------
Step:9, Action:North
State  288
Old Q Values:  [ 1043.99392461 -6251.71315483 -6173.56321028   120.61946249]
New Q values:  [  891.39748521 -6251.71315483 -6173.56321028   120.61946249]
Reward: -1  Episode Reward:  21
xxxxx
xg..x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -23.67300697 1581.33305124    0.            0.        ]
------
Step:10, Action:South
State  208
Old Q Values:  [ -23.67300697 1581.33305124    0.            0.        ]
New Q values:  [-23.67300697 899.35246606   0.           0.        ]
Reward: -1  Episode Reward:  20
xxxxx
x...x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  891.39748521 -6251.71315483 -6173.56321028   120.61946249]
------
Step:11, Action:North
State  288
Old Q Values:  [  891.39748521 -6251.71315483 -6173.56321028   120.61946249]
New Q values:  [  625.7647339  -6251.71315483 -6173.56321028   120.61946249]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-23.67300697 899.35246606   0.           0.        ]
------
Step:12, Action:South
State  208
Old Q Values:  [-23.67300697 899.35246606   0.           0.        ]
New Q values:  [-23.67300697 546.87040659   0.           0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  625.7647339  -6251.71315483 -6173.56321028   120.61946249]
------
Step:13, Action:West
State  288
Old Q Values:  [  625.7647339  -6251.71315483 -6173.56321028   120.61946249]
New Q values:  [  625.7647339  -6251.71315483 -6173.56321028    49.75378499]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[7.02 0.   0.   0.  ]
------
Step:14, Action:North
State  273
Old Q Values:  [7.02 0.   0.   0.  ]
New Q values:  [658.0646939   0.          0.          0.       ]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   7.56          0.         2186.18897965    0.        ]
------
Step:15, Action:North
State  193
Old Q Values:  [   7.56          0.         2186.18897965    0.        ]
New Q values:  [   8.424         0.         2186.18897965    0.        ]
Reward: 9  Episode Reward:  25
xxxxx
x.agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:16, Action:North
State  115
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-180.6    0.     0.     0. ]
Reward: -301  Episode Reward:  -276
xxxxx
x.a.x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6    0.     0.     0. ]
------
Step:17, Action:South
State  113
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [  0.        655.2566939   0.          0.       ]
Reward: -1  Episode Reward:  -277
xxxxx
x. gx
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   8.424         0.         2186.18897965    0.        ]
------
Step:18, Action:East
State  193
Old Q Values:  [   8.424         0.         2186.18897965    0.        ]
New Q values:  [    8.424          0.         -4962.06328616     0.        ]
Reward: -10001  Episode Reward:  -10278
xxxxx
x. .x
x  gx
x.  x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.424 0.    0.    0.   ]
------
Step:1, Action:North
State  208
Old Q Values:  [-23.67300697 546.87040659   0.           0.        ]
New Q values:  [ -3.27583479 546.87040659   0.           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -525.31861289 -180.6           2.64455998]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6        -525.31861289 -180.6           2.64455998]
New Q values:  [-180.6        -525.31861289 -180.6           8.92895194]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  8.23709314e+00  1.03045897e+00]
------
Step:3, Action:East
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  8.23709314e+00  1.03045897e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  5.37352284e+00  1.03045897e+00]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -525.31861289 -180.6           8.92895194]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6        -525.31861289 -180.6           8.92895194]
New Q values:  [-180.6        -525.31861289 -180.6           3.23831851]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  8.89125799e-01]
------
Step:5, Action:West
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03  1.34162738e+03  3.38822984e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03  1.34162738e+03  6.75529193e+00]
Reward: 9  Episode Reward:  25
xxxxx
xag x
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:6, Action:North
State  105
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-180.6    0.     0.     0. ]
Reward: -301  Episode Reward:  -276
xxxxx
xa gx
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6    0.     0.     0. ]
------
Step:7, Action:South
State  109
Old Q Values:  [-1.80600000e+02  1.50105134e+00 -2.40084000e+03  0.00000000e+00]
New Q values:  [ -180.6            6.58179959 -2400.84           0.        ]
Reward: 9  Episode Reward:  -267
xxxxx
x g x
xa. x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.63349400e-01  1.93793017e+00 -5.99327388e+03 -1.80600000e+02]
------
Step:8, Action:South
State  181
Old Q Values:  [ 2.63349400e-01  1.93793017e+00 -5.99327388e+03 -1.80600000e+02]
New Q values:  [ 2.63349400e-01  6.75218502e+00 -5.99327388e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  -258
xxxxx
x  gx
x . x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   1.92337651 -180.6           0.            0.        ]
------
Step:9, Action:North
State  261
Old Q Values:  [   1.92337651 -180.6           0.            0.        ]
New Q values:  [   2.19500611 -180.6           0.            0.        ]
Reward: -1  Episode Reward:  -259
xxxxx
x g x
xa. x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.63349400e-01  6.75218502e+00 -5.99327388e+03 -1.80600000e+02]
------
Step:10, Action:South
State  189
Old Q Values:  [1.65110238 0.         0.         0.        ]
New Q values:  [1.65110238 0.05850183 0.         0.        ]
Reward: -1  Episode Reward:  -260
xxxxx
x   x
x g x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   2.19500611 -180.6           0.            0.        ]
------
Step:11, Action:North
State  261
Old Q Values:  [   2.19500611 -180.6           0.            0.        ]
New Q values:  [   2.30365795 -180.6           0.            0.        ]
Reward: -1  Episode Reward:  -261
xxxxx
x   x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.63349400e-01  6.75218502e+00 -5.99327388e+03 -1.80600000e+02]
------
Step:12, Action:South
State  183
Old Q Values:  [-0.21026068 -0.02298705  0.          0.        ]
New Q values:  [-0.21026068  0.08190257  0.          0.        ]
Reward: -1  Episode Reward:  -262
xxxxx
x   x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   2.30365795 -180.6           0.            0.        ]
------
Step:13, Action:North
State  261
Old Q Values:  [   2.30365795 -180.6           0.            0.        ]
New Q values:  [   0.34603395 -180.6           0.            0.        ]
Reward: -1  Episode Reward:  -263
xxxxx
x   x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.21026068  0.08190257  0.          0.        ]
------
Step:14, Action:South
State  183
Old Q Values:  [-0.21026068  0.08190257  0.          0.        ]
New Q values:  [-0.21026068 -0.46342879  0.          0.        ]
Reward: -1  Episode Reward:  -264
xxxxx
x   x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   0.34603395 -180.6           0.            0.        ]
------
Step:15, Action:North
State  261
Old Q Values:  [   0.34603395 -180.6           0.            0.        ]
New Q values:  [   1.56406909 -180.6           0.            0.        ]
Reward: -1  Episode Reward:  -265
xxxxx
x   x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.63349400e-01  6.75218502e+00 -5.99327388e+03 -1.80600000e+02]
------
Step:16, Action:South
State  181
Old Q Values:  [ 2.63349400e-01  6.75218502e+00 -5.99327388e+03 -1.80600000e+02]
New Q values:  [ 2.63349400e-01  2.57009474e+00 -5.99327388e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -266
xxxxx
x  gx
x . x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   1.56406909 -180.6           0.            0.        ]
------
Step:17, Action:North
State  261
Old Q Values:  [   1.56406909 -180.6           0.            0.        ]
New Q values:  [   0.79665606 -180.6           0.            0.        ]
Reward: -1  Episode Reward:  -267
xxxxx
x   x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.63349400e-01  2.57009474e+00 -5.99327388e+03 -1.80600000e+02]
------
Step:18, Action:South
State  189
Old Q Values:  [1.65110238 0.05850183 0.         0.        ]
New Q values:  [ 1.65110238 -0.33760245  0.          0.        ]
Reward: -1  Episode Reward:  -268
xxxxx
x   x
x g x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   0.79665606 -180.6           0.            0.        ]
------
Step:19, Action:North
State  261
Old Q Values:  [   0.79665606 -180.6           0.            0.        ]
New Q values:  [   0.48969084 -180.6           0.            0.        ]
Reward: -1  Episode Reward:  -269
xxxxx
x   x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.63349400e-01  2.57009474e+00 -5.99327388e+03 -1.80600000e+02]
------
Step:20, Action:South
State  181
Old Q Values:  [ 2.63349400e-01  2.57009474e+00 -5.99327388e+03 -1.80600000e+02]
New Q values:  [ 2.63349400e-01  5.74945147e-01 -5.99327388e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -270
xxxxx
x  gx
x . x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   0.48969084 -180.6           0.            0.        ]
------
Step:21, Action:North
State  261
Old Q Values:  [   0.48969084 -180.6           0.            0.        ]
New Q values:  [  -0.23164012 -180.6           0.            0.        ]
Reward: -1  Episode Reward:  -271
xxxxx
x g x
xa. x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.63349400e-01  5.74945147e-01 -5.99327388e+03 -1.80600000e+02]
------
Step:22, Action:South
State  181
Old Q Values:  [ 2.63349400e-01  5.74945147e-01 -5.99327388e+03 -1.80600000e+02]
New Q values:  [ 2.63349400e-01 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -272
xxxxx
x  gx
x . x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  -0.23164012 -180.6           0.            0.        ]
------
Step:23, Action:East
State  261
Old Q Values:  [  -0.23164012 -180.6           0.            0.        ]
New Q values:  [  -0.23164012 -180.6         202.81940817    0.        ]
Reward: 9  Episode Reward:  -263
xxxxx
x   x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[658.0646939   0.          0.          0.       ]
------
Step:24, Action:North
State  273
Old Q Values:  [658.0646939   0.          0.          0.       ]
New Q values:  [60271.15307756     0.             0.             0.        ]
Reward: 100009  Episode Reward:  99746
xxxxx
x  gx
x a x
x   x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -3.27583479 546.87040659   0.           0.        ]
------
Step:1, Action:South
State  208
Old Q Values:  [ -3.27583479 546.87040659   0.           0.        ]
New Q values:  [ -3.27583479 411.87758281   0.           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  625.7647339  -6251.71315483 -6173.56321028    49.75378499]
------
Step:2, Action:North
State  288
Old Q Values:  [  625.7647339  -6251.71315483 -6173.56321028    49.75378499]
New Q values:  [  252.23309356 -6251.71315483 -6173.56321028    49.75378499]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.424 0.    0.    0.   ]
------
Step:3, Action:North
State  208
Old Q Values:  [ -3.27583479 411.87758281   0.           0.        ]
New Q values:  [  5.06116164 411.87758281   0.           0.        ]
Reward: 9  Episode Reward:  17
xxxxx
x .ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -525.31861289 -180.6           3.23831851]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6        -525.31861289 -180.6           3.23831851]
New Q values:  [-180.6        -525.31861289 -180.6           6.96206515]
Reward: 9  Episode Reward:  26
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  8.89125799e-01]
------
Step:5, Action:West
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  5.37352284e+00  1.03045897e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  5.37352284e+00  1.43218359e+00]
Reward: -1  Episode Reward:  25
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    5.4           1.99726873    0.        ]
------
Step:6, Action:South
State  111
Old Q Values:  [ 0.         -0.16838516  0.05486176  0.        ]
New Q values:  [0.         5.33264594 0.05486176 0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x   x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.21026068 -0.46342879  0.          0.        ]
------
Step:7, Action:East
State  185
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.  0.  5.4 0. ]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:8, Action:North
State  203
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [1.01205685 0.         0.         0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  5.37352284e+00  1.43218359e+00]
------
Step:9, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03 -1.02782568e+00  8.89125799e-01]
New Q values:  [-2.81736000e+02 -3.35405995e+03  1.07748927e+00  8.89125799e-01]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -525.31861289 -180.6           6.96206515]
------
Step:10, Action:West
State  138
Old Q Values:  [-180.6        -525.31861289 -180.6           6.96206515]
New Q values:  [-180.6        -525.31861289 -180.6           2.50807284]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03  1.07748927e+00  8.89125799e-01]
------
Step:11, Action:East
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03  1.34162738e+03  6.75529193e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03  1.87595342e+03  6.75529193e+00]
Reward: -1  Episode Reward:  39
xxxxx
x gax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6         4466.34154765     0.           -26.81875532]
------
Step:12, Action:South
State  138
Old Q Values:  [-180.6        -525.31861289 -180.6           2.50807284]
New Q values:  [-180.6        -207.87709805 -180.6           2.50807284]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301     9.50115702     0.             0.        ]
------
Step:13, Action:South
State  218
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [ 0.         75.06992807  0.          0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  252.23309356 -6251.71315483 -6173.56321028    49.75378499]
------
Step:14, Action:North
State  288
Old Q Values:  [  252.23309356 -6251.71315483 -6173.56321028    49.75378499]
New Q values:  [  103.14358453 -6251.71315483 -6173.56321028    49.75378499]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301     9.50115702     0.             0.        ]
------
Step:15, Action:South
State  216
Old Q Values:  [-5999.86251301     9.50115702     0.             0.        ]
New Q values:  [-5999.86251301    34.14353817     0.             0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  103.14358453 -6251.71315483 -6173.56321028    49.75378499]
------
Step:16, Action:West
State  288
Old Q Values:  [  103.14358453 -6251.71315483 -6173.56321028    49.75378499]
New Q values:  [  103.14358453 -6251.71315483 -6173.56321028 18106.64743727]
Reward: 9  Episode Reward:  44
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[60271.15307756     0.             0.             0.        ]
------
Step:17, Action:North
State  273
Old Q Values:  [60271.15307756     0.             0.             0.        ]
New Q values:  [24107.86123102     0.             0.             0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:18, Action:North
State  200
Old Q Values:  [0.         0.         3.79073389 0.        ]
New Q values:  [-0.27675322  0.          3.79073389  0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03  1.07748927e+00  8.89125799e-01]
------
Step:19, Action:East
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  5.37352284e+00  1.43218359e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  2.30183099e+00  1.43218359e+00]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -207.87709805 -180.6           2.50807284]
------
Step:20, Action:West
State  138
Old Q Values:  [-180.6        -207.87709805 -180.6           2.50807284]
New Q values:  [-180.6        -207.87709805 -180.6           0.72647592]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03  1.07748927e+00  8.89125799e-01]
------
Step:21, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03  1.07748927e+00  8.89125799e-01]
New Q values:  [-2.81736000e+02 -3.35405995e+03  4.89384841e-02  8.89125799e-01]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -207.87709805 -180.6           0.72647592]
------
Step:22, Action:West
State  138
Old Q Values:  [-180.6        -207.87709805 -180.6           0.72647592]
New Q values:  [-1.80600000e+02 -2.07877098e+02 -1.80600000e+02 -4.26718934e-02]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03  4.89384841e-02  8.89125799e-01]
------
Step:23, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03  4.89384841e-02  8.89125799e-01]
New Q values:  [-2.81736000e+02 -3.35405995e+03  4.89384841e-02  1.37565032e+00]
Reward: -1  Episode Reward:  37
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    5.4           1.99726873    0.        ]
------
Step:24, Action:South
State  107
Old Q Values:  [-252.35169558    5.4           1.99726873    0.        ]
New Q values:  [-252.35169558    3.18          1.99726873    0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[0.  0.  5.4 0. ]
------
Step:25, Action:West
State  185
Old Q Values:  [0.  0.  5.4 0. ]
New Q values:  [   0.      0.      5.4  -178.98]
Reward: -301  Episode Reward:  -265
xxxxx
x g x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   0.      0.      5.4  -178.98]
------
Step:26, Action:East
State  185
Old Q Values:  [   0.      0.      5.4  -178.98]
New Q values:  [    0.             0.         -5997.30277983  -178.98      ]
Reward: -10001  Episode Reward:  -10266
xxxxx
x   x
x g x
x.  x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6         162.17394255    8.424     ]
------
Step:1, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6         162.17394255    8.424     ]
New Q values:  [-951.35054867 -180.6        5502.2638082     8.424     ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  103.14358453 -6251.71315483 -6173.56321028 18106.64743727]
------
Step:2, Action:West
State  288
Old Q Values:  [  103.14358453 -6251.71315483 -6173.56321028 18106.64743727]
New Q values:  [  103.14358453 -6251.71315483 -6173.56321028  8892.73811737]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6        5502.2638082     8.424     ]
------
Step:3, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6        5502.2638082     8.424     ]
New Q values:  [-951.35054867 -180.6        4868.12695849    8.424     ]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  103.14358453 -6251.71315483 -6173.56321028  8892.73811737]
------
Step:4, Action:West
State  288
Old Q Values:  [  103.14358453 -6251.71315483 -6173.56321028  8892.73811737]
New Q values:  [  103.14358453 -6251.71315483 -6173.56321028  5016.93333449]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
x...x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6        4868.12695849    8.424     ]
------
Step:5, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6        4868.12695849    8.424     ]
New Q values:  [-951.35054867 -180.6        3451.73078374    8.424     ]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  103.14358453 -6251.71315483 -6173.56321028  5016.93333449]
------
Step:6, Action:North
State  288
Old Q Values:  [  103.14358453 -6251.71315483 -6173.56321028  5016.93333449]
New Q values:  [   49.18463381 -6251.71315483 -6173.56321028  5016.93333449]
Reward: 9  Episode Reward:  14
xxxxx
x ..x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.424 0.    0.    0.   ]
------
Step:7, Action:North
State  208
Old Q Values:  [  5.06116164 411.87758281   0.           0.        ]
New Q values:  [  7.41166309 411.87758281   0.           0.        ]
Reward: 9  Episode Reward:  23
xxxxx
x .ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -2.07877098e+02 -1.80600000e+02 -4.26718934e-02]
------
Step:8, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -2.07877098e+02 -1.80600000e+02 -4.26718934e-02]
New Q values:  [-180.6        -207.87709805 -180.6           5.79562634]
Reward: 9  Episode Reward:  32
xxxxx
x a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03  4.89384841e-02  1.37565032e+00]
------
Step:9, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03  4.89384841e-02  1.37565032e+00]
New Q values:  [-2.81736000e+02 -3.35405995e+03  4.89384841e-02 -4.97398722e-02]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-180.6           0.           -0.32377956    0.        ]
------
Step:10, Action:South
State  106
Old Q Values:  [-180.6           0.           -0.32377956    0.        ]
New Q values:  [-1.8060000e+02 -6.0006000e+03 -3.2377956e-01  0.0000000e+00]
Reward: -10001  Episode Reward:  -9970
xxxxx
xa  x
x.. x
xg  x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -207.87709805 -180.6           5.79562634]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6        -207.87709805 -180.6           5.79562634]
New Q values:  [-180.6        -207.87709805 -180.6           8.40879983]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  2.30183099e+00  1.43218359e+00]
------
Step:2, Action:East
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  2.30183099e+00  1.43218359e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  2.84337234e+00  1.43218359e+00]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -207.87709805 -180.6           8.40879983]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6        -207.87709805 -180.6           8.40879983]
New Q values:  [-180.6        -207.87709805 -180.6           3.61653164]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  2.84337234e+00  1.43218359e+00]
------
Step:4, Action:East
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  2.84337234e+00  1.43218359e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  1.62230843e+00  1.43218359e+00]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -207.87709805 -180.6           3.61653164]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6        -207.87709805 -180.6           3.61653164]
New Q values:  [-180.6        -207.87709805 -180.6           0.8612942 ]
Reward: -1  Episode Reward:  5
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03  4.89384841e-02 -4.97398722e-02]
------
Step:6, Action:East
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03  1.87595342e+03  6.75529193e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03  2.08968383e+03  6.75529193e+00]
Reward: -1  Episode Reward:  4
xxxxx
x.gax
x...x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6         4466.34154765     0.           -26.81875532]
------
Step:7, Action:South
State  138
Old Q Values:  [-180.6        -207.87709805 -180.6           0.8612942 ]
New Q values:  [-180.6          45.81243562 -180.6           0.8612942 ]
Reward: 9  Episode Reward:  13
xxxxx
x.  x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  7.41166309 411.87758281   0.           0.        ]
------
Step:8, Action:South
State  208
Old Q Values:  [  7.41166309 411.87758281   0.           0.        ]
New Q values:  [   7.41166309 1675.23103347    0.            0.        ]
Reward: 9  Episode Reward:  22
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   49.18463381 -6251.71315483 -6173.56321028  5016.93333449]
------
Step:9, Action:West
State  288
Old Q Values:  [   49.18463381 -6251.71315483 -6173.56321028  5016.93333449]
New Q values:  [   49.18463381 -6251.71315483 -6173.56321028  3041.69256892]
Reward: -1  Episode Reward:  21
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6        3451.73078374    8.424     ]
------
Step:10, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6        3451.73078374    8.424     ]
New Q values:  [-951.35054867 -180.6        2292.60008417    8.424     ]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   49.18463381 -6251.71315483 -6173.56321028  3041.69256892]
------
Step:11, Action:West
State  288
Old Q Values:  [   49.18463381 -6251.71315483 -6173.56321028  3041.69256892]
New Q values:  [   49.18463381 -6251.71315483 -6173.56321028  1903.85705282]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6        2292.60008417    8.424     ]
------
Step:12, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6        2292.60008417    8.424     ]
New Q values:  [-951.35054867 -180.6        1487.59714952    8.424     ]
Reward: -1  Episode Reward:  18
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   49.18463381 -6251.71315483 -6173.56321028  1903.85705282]
------
Step:13, Action:West
State  288
Old Q Values:  [   49.18463381 -6251.71315483 -6173.56321028  1903.85705282]
New Q values:  [   49.18463381 -6251.71315483 -6173.56321028  7993.30119044]
Reward: -1  Episode Reward:  17
xxxxx
x. gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[24107.86123102     0.             0.             0.        ]
------
Step:14, Action:North
State  273
Old Q Values:  [24107.86123102     0.             0.             0.        ]
New Q values:  [9651.07169241    0.            0.            0.        ]
Reward: 9  Episode Reward:  26
xxxxx
x.  x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[    8.424          0.         -4962.06328616     0.        ]
------
Step:15, Action:North
State  195
Old Q Values:  [ 7.4844  0.     10.9944  0.    ]
New Q values:  [ 2.88045253  0.         10.9944      0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.77853638e+02 -6.00060000e+03  1.62230843e+00  1.43218359e+00]
------
Step:16, Action:East
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  1.62230843e+00  1.43218359e+00]
New Q values:  [-2.77853638e+02 -6.00060000e+03  1.37926541e+01  1.43218359e+00]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          45.81243562 -180.6           0.8612942 ]
------
Step:17, Action:West
State  136
Old Q Values:  [-6180.6         4466.34154765     0.           -26.81875532]
New Q values:  [-6180.6         4466.34154765     0.           -11.32750213]
Reward: -1  Episode Reward:  23
xxxxx
x.agx
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9354.82251301     0.        ]
------
Step:18, Action:North
State  123
Old Q Values:  [-2.77853638e+02 -6.00060000e+03  1.37926541e+01  1.43218359e+00]
New Q values:  [-2.87603659e+02 -6.00060000e+03  1.37926541e+01  1.43218359e+00]
Reward: -301  Episode Reward:  -278
xxxxx
x.a x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.87603659e+02 -6.00060000e+03  1.37926541e+01  1.43218359e+00]
------
Step:19, Action:East
State  121
Old Q Values:  [    0.             0.         -9354.82251301     0.        ]
New Q values:  [    0.             0.         -8402.62654091     0.        ]
Reward: -10001  Episode Reward:  -10279
xxxxx
x. gx
x.  x
x.  x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[ 2.88045253  0.         10.9944      0.        ]
------
Step:1, Action:East
State  193
Old Q Values:  [    8.424          0.         -4962.06328616     0.        ]
New Q values:  [    8.424          0.         -7476.85600442     0.        ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. gx
x.. x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03  4.89384841e-02 -4.97398722e-02]
------
Step:1, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03  4.89384841e-02 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -3.35405995e+03  1.91633061e+01 -4.97398722e-02]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          45.81243562 -180.6           0.8612942 ]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6          45.81243562 -180.6           0.8612942 ]
New Q values:  [-180.6          26.25217425 -180.6           0.8612942 ]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x. ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.424 0.    0.    0.   ]
------
Step:3, Action:North
State  210
Old Q Values:  [8.424 0.    0.    0.   ]
New Q values:  [10.64525227  0.          0.          0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          26.25217425 -180.6           0.8612942 ]
------
Step:4, Action:South
State  138
Old Q Values:  [-180.6          26.25217425 -180.6           0.8612942 ]
New Q values:  [-180.6          13.09444538 -180.6           0.8612942 ]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[10.64525227  0.          0.          0.        ]
------
Step:5, Action:North
State  210
Old Q Values:  [10.64525227  0.          0.          0.        ]
New Q values:  [7.58643452 0.         0.         0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          13.09444538 -180.6           0.8612942 ]
------
Step:6, Action:South
State  138
Old Q Values:  [-180.6          13.09444538 -180.6           0.8612942 ]
New Q values:  [-180.6           6.91370851 -180.6           0.8612942 ]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.58643452 0.         0.         0.        ]
------
Step:7, Action:North
State  210
Old Q Values:  [7.58643452 0.         0.         0.        ]
New Q values:  [4.50868636 0.         0.         0.        ]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           6.91370851 -180.6           0.8612942 ]
------
Step:8, Action:South
State  138
Old Q Values:  [-180.6           6.91370851 -180.6           0.8612942 ]
New Q values:  [-180.6           3.51808931 -180.6           0.8612942 ]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x. ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.50868636 0.         0.         0.        ]
------
Step:9, Action:North
State  210
Old Q Values:  [4.50868636 0.         0.         0.        ]
New Q values:  [2.25890134 0.         0.         0.        ]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           3.51808931 -180.6           0.8612942 ]
------
Step:10, Action:South
State  138
Old Q Values:  [-180.6           3.51808931 -180.6           0.8612942 ]
New Q values:  [-180.6          11.05029718 -180.6           0.8612942 ]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
x.gax
x...x
xxxxx
Step:11, Action:East
State  216
Old Q Values:  [-5999.86251301    34.14353817     0.             0.        ]
New Q values:  [-5999.86251301    34.14353817 -6170.35693855     0.        ]
Reward: -10301  Episode Reward:  -10291
xxxxx
x.  x
x. gx
x...x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  -0.23164012 -180.6         202.81940817    0.        ]
------
Step:1, Action:East
State  261
Old Q Values:  [  -0.23164012 -180.6         202.81940817    0.        ]
New Q values:  [-2.31640119e-01 -1.80600000e+02 -5.46719309e+03  0.00000000e+00]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x...x
x g x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[    8.424          0.         -7476.85600442     0.        ]
------
Step:1, Action:North
State  193
Old Q Values:  [    8.424          0.         -7476.85600442     0.        ]
New Q values:  [   12.90739622     0.         -7476.85600442     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.87603659e+02 -6.00060000e+03  1.37926541e+01  1.43218359e+00]
------
Step:2, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03  1.91633061e+01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -3.35405995e+03  1.03804116e+01 -4.97398722e-02]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          11.05029718 -180.6           0.8612942 ]
------
Step:3, Action:South
State  136
Old Q Values:  [-6180.6         4466.34154765     0.           -11.32750213]
New Q values:  [-6180.6         1802.17968051     0.           -11.32750213]
Reward: 9  Episode Reward:  17
xxxxx
x.g x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301    34.14353817 -6170.35693855     0.        ]
------
Step:4, Action:South
State  216
Old Q Values:  [-5999.86251301    34.14353817 -6170.35693855     0.        ]
New Q values:  [-5999.86251301  2417.0477724  -6170.35693855     0.        ]
Reward: 9  Episode Reward:  26
xxxxx
xg  x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   49.18463381 -6251.71315483 -6173.56321028  7993.30119044]
------
Step:5, Action:West
State  288
Old Q Values:  [   49.18463381 -6251.71315483 -6173.56321028  7993.30119044]
New Q values:  [   49.18463381 -6251.71315483 -6173.56321028  3648.99962103]
Reward: 9  Episode Reward:  35
xxxxx
x.g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6        1487.59714952    8.424     ]
------
Step:6, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6        1487.59714952    8.424     ]
New Q values:  [-951.35054867 -180.6        1689.13874611    8.424     ]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   49.18463381 -6251.71315483 -6173.56321028  3648.99962103]
------
Step:7, Action:West
State  288
Old Q Values:  [   49.18463381 -6251.71315483 -6173.56321028  3648.99962103]
New Q values:  [   49.18463381 -6251.71315483 -6173.56321028  1965.74147225]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6        1689.13874611    8.424     ]
------
Step:8, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6        1689.13874611    8.424     ]
New Q values:  [-951.35054867 -180.6        1264.77794012    8.424     ]
Reward: -1  Episode Reward:  32
xxxxx
x.  x
x.  x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   49.18463381 -6251.71315483 -6173.56321028  1965.74147225]
------
Step:9, Action:West
State  288
Old Q Values:  [   49.18463381 -6251.71315483 -6173.56321028  1965.74147225]
New Q values:  [   49.18463381 -6251.71315483 -6173.56321028 -4834.87002907]
Reward: -10001  Episode Reward:  -9969
xxxxx
x.  x
x.  x
x.g x
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03  1.03804116e+01 -4.97398722e-02]
------
Step:1, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03  1.03804116e+01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -3.35405995e+03  1.28672538e+01 -4.97398722e-02]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          11.05029718 -180.6           0.8612942 ]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6          11.05029718 -180.6           0.8612942 ]
New Q values:  [-180.6         734.93445059 -180.6           0.8612942 ]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301  2417.0477724  -6170.35693855     0.        ]
------
Step:3, Action:South
State  216
Old Q Values:  [-5999.86251301  2417.0477724  -6170.35693855     0.        ]
New Q values:  [-5999.86251301   986.9744991  -6170.35693855     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   49.18463381 -6251.71315483 -6173.56321028 -4834.87002907]
------
Step:4, Action:North
State  288
Old Q Values:  [   49.18463381 -6251.71315483 -6173.56321028 -4834.87002907]
New Q values:  [  521.64316357 -6251.71315483 -6173.56321028 -4834.87002907]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   7.41166309 1675.23103347    0.            0.        ]
------
Step:5, Action:South
State  216
Old Q Values:  [-5999.86251301   986.9744991  -6170.35693855     0.        ]
New Q values:  [-5999.86251301   550.68274871 -6170.35693855     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
xg  x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  521.64316357 -6251.71315483 -6173.56321028 -4834.87002907]
------
Step:6, Action:North
State  288
Old Q Values:  [  521.64316357 -6251.71315483 -6173.56321028 -4834.87002907]
New Q values:  [  710.62657547 -6251.71315483 -6173.56321028 -4834.87002907]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   7.41166309 1675.23103347    0.            0.        ]
------
Step:7, Action:South
State  216
Old Q Values:  [-5999.86251301   550.68274871 -6170.35693855     0.        ]
New Q values:  [-5999.86251301   432.86107212 -6170.35693855     0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  710.62657547 -6251.71315483 -6173.56321028 -4834.87002907]
------
Step:8, Action:North
State  288
Old Q Values:  [  710.62657547 -6251.71315483 -6173.56321028 -4834.87002907]
New Q values:  [-5213.78005977 -6251.71315483 -6173.56321028 -4834.87002907]
Reward: -10001  Episode Reward:  -9978
xxxxx
x.  x
x .gx
x.. x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         734.93445059 -180.6           0.8612942 ]
------
Step:1, Action:South
State  136
Old Q Values:  [-6180.6         1802.17968051     0.           -11.32750213]
New Q values:  [-6180.6          856.13019384     0.           -11.32750213]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301   432.86107212 -6170.35693855     0.        ]
------
Step:2, Action:South
State  208
Old Q Values:  [   7.41166309 1675.23103347    0.            0.        ]
New Q values:  [   7.41166309 -774.96859533    0.            0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5213.78005977 -6251.71315483 -6173.56321028 -4834.87002907]
------
Step:3, Action:West
State  288
Old Q Values:  [-5213.78005977 -6251.71315483 -6173.56321028 -4834.87002907]
New Q values:  [-5213.78005977 -6251.71315483 -6173.56321028   966.7734961 ]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[9651.07169241    0.            0.            0.        ]
------
Step:4, Action:North
State  273
Old Q Values:  [9651.07169241    0.            0.            0.        ]
New Q values:  [3863.70089583    0.            0.            0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   12.90739622     0.         -7476.85600442     0.        ]
------
Step:5, Action:North
State  192
Old Q Values:  [ 1.02       72.51889923  4.4204088   0.        ]
New Q values:  [-5.36728685e+03  7.25188992e+01  4.42040880e+00  0.00000000e+00]
Reward: -9991  Episode Reward:  -9965
xxxxx
x.g x
x.  x
x.  x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -3.35405995e+03  1.28672538e+01 -4.97398722e-02]
------
Step:1, Action:East
State  123
Old Q Values:  [-2.87603659e+02 -6.00060000e+03  1.37926541e+01  1.43218359e+00]
New Q values:  [-2.87603659e+02 -6.00060000e+03  2.31397397e+02  1.43218359e+00]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         734.93445059 -180.6           0.8612942 ]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6         734.93445059 -180.6           0.8612942 ]
New Q values:  [-180.6         734.93445059 -180.6           3.60469382]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x.g.x
x...x
xxxxx
Step:3, Action:South
State  122
Old Q Values:  [-2.81736000e+02 -3.35405995e+03  1.28672538e+01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -7.33836380e+03  1.28672538e+01 -4.97398722e-02]
Reward: -10001  Episode Reward:  -9993
xxxxx
x.a x
x. .x
x.g.x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[-5.36728685e+03  7.25188992e+01  4.42040880e+00  0.00000000e+00]
------
Step:1, Action:South
State  196
Old Q Values:  [-0.40598153  0.          0.          0.        ]
New Q values:  [-0.40598153  5.4         0.          0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[0. 0. 0. 0.]
------
Step:2, Action:North
State  276
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [1.02 0.   0.   0.  ]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
xga.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-0.40598153  5.4         0.          0.        ]
------
Step:3, Action:South
State  194
Old Q Values:  [-0.6  0.   0.   0. ]
New Q values:  [-0.6   -0.294  0.     0.   ]
Reward: -1  Episode Reward:  7
xxxxx
x. .x
x. .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[1.02 0.   0.   0.  ]
------
Step:4, Action:North
State  276
Old Q Values:  [1.02 0.   0.   0.  ]
New Q values:  [-0.192  0.     0.     0.   ]
Reward: -1  Episode Reward:  6
xxxxx
x. .x
x.a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6   -0.294  0.     0.   ]
------
Step:5, Action:East
State  194
Old Q Values:  [-0.6   -0.294  0.     0.   ]
New Q values:  [-0.6       -0.294      6.0776704  0.       ]
Reward: 9  Episode Reward:  15
xxxxx
x. .x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.25890134 0.         0.         0.        ]
------
Step:6, Action:North
State  210
Old Q Values:  [2.25890134 0.         0.         0.        ]
New Q values:  [226.78389571   0.           0.           0.        ]
Reward: 9  Episode Reward:  24
xxxxx
x. ax
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         734.93445059 -180.6           3.60469382]
------
Step:7, Action:South
State  138
Old Q Values:  [-180.6         734.93445059 -180.6           3.60469382]
New Q values:  [-180.6         423.23210187 -180.6           3.60469382]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
xg ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301   432.86107212 -6170.35693855     0.        ]
------
Step:8, Action:South
State  210
Old Q Values:  [226.78389571   0.           0.           0.        ]
New Q values:  [226.78389571 295.43204883   0.           0.        ]
Reward: 9  Episode Reward:  32
xxxxx
x.  x
x.  x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5213.78005977 -6251.71315483 -6173.56321028   966.7734961 ]
------
Step:9, Action:West
State  288
Old Q Values:  [-5213.78005977 -6251.71315483 -6173.56321028   966.7734961 ]
New Q values:  [-5213.78005977 -6251.71315483 -6173.56321028   765.54278047]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6        1264.77794012    8.424     ]
------
Step:10, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6        1264.77794012    8.424     ]
New Q values:  [-951.35054867 -180.6         734.97401019    8.424     ]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
x.  x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5213.78005977 -6251.71315483 -6173.56321028   765.54278047]
------
Step:11, Action:West
State  288
Old Q Values:  [-5213.78005977 -6251.71315483 -6173.56321028   765.54278047]
New Q values:  [-5213.78005977 -6251.71315483 -6173.56321028   526.10931525]
Reward: -1  Episode Reward:  29
xxxxx
x.  x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6         734.97401019    8.424     ]
------
Step:12, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6         734.97401019    8.424     ]
New Q values:  [-951.35054867 -180.6         451.22239865    8.424     ]
Reward: -1  Episode Reward:  28
xxxxx
x.  x
x.  x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5213.78005977 -6251.71315483 -6173.56321028   526.10931525]
------
Step:13, Action:West
State  288
Old Q Values:  [-5213.78005977 -6251.71315483 -6173.56321028   526.10931525]
New Q values:  [-5213.78005977 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: -10001  Episode Reward:  -9973
xxxxx
x.  x
x.  x
x.g x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[-2.31640119e-01 -1.80600000e+02 -5.46719309e+03  0.00000000e+00]
------
Step:1, Action:West
State  261
Old Q Values:  [-2.31640119e-01 -1.80600000e+02 -5.46719309e+03  0.00000000e+00]
New Q values:  [-2.31640119e-01 -1.80600000e+02 -5.46719309e+03 -1.80600000e+02]
Reward: -301  Episode Reward:  -301
xxxxx
x...x
x...x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[-2.31640119e-01 -1.80600000e+02 -5.46719309e+03 -1.80600000e+02]
------
Step:2, Action:North
State  261
Old Q Values:  [-2.31640119e-01 -1.80600000e+02 -5.46719309e+03 -1.80600000e+02]
New Q values:  [ 5.38634877e+00 -1.80600000e+02 -5.46719309e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  -292
xxxxx
x...x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.63349400e-01 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
------
Step:3, Action:North
State  181
Old Q Values:  [ 2.63349400e-01 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
New Q values:  [ 6.41621418e+00 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  -283
xxxxx
xa..x
x g.x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6           3.03624806    0.            0.        ]
------
Step:4, Action:South
State  110
Old Q Values:  [-1.80600000e+02  5.16000000e+00  1.08744499e-01  0.00000000e+00]
New Q values:  [-1.80600000e+02 -5.99853600e+03  1.08744499e-01  0.00000000e+00]
Reward: -10001  Episode Reward:  -10284
xxxxx
x ..x
xg..x
x  .x
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1110.94283328     0.             0.             0.        ]
------
Step:1, Action:South
State  180
Old Q Values:  [-1110.94283328     0.             0.             0.        ]
New Q values:  [-1110.94283328     5.4            0.             0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2397.576     0.    -6000.6       0.   ]
------
Step:2, Action:South
State  260
Old Q Values:  [-2397.576     0.    -6000.6       0.   ]
New Q values:  [-2397.576  -180.6   -6000.6       0.   ]
Reward: -301  Episode Reward:  -292
xxxxx
xg..x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2397.576  -180.6   -6000.6       0.   ]
------
Step:3, Action:West
State  260
Old Q Values:  [-2397.576  -180.6   -6000.6       0.   ]
New Q values:  [-2397.576  -180.6   -6000.6    -180.6  ]
Reward: -301  Episode Reward:  -593
xxxxx
x ..x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2397.576  -180.6   -6000.6    -180.6  ]
------
Step:4, Action:South
State  261
Old Q Values:  [ 5.38634877e+00 -1.80600000e+02 -5.46719309e+03 -1.80600000e+02]
New Q values:  [ 5.38634877e+00 -2.51224095e+02 -5.46719309e+03 -1.80600000e+02]
Reward: -301  Episode Reward:  -894
xxxxx
x ..x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5.38634877e+00 -2.51224095e+02 -5.46719309e+03 -1.80600000e+02]
------
Step:5, Action:North
State  261
Old Q Values:  [ 5.38634877e+00 -2.51224095e+02 -5.46719309e+03 -1.80600000e+02]
New Q values:  [ 3.47940376e+00 -2.51224095e+02 -5.46719309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -895
xxxxx
x g.x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.41621418e+00 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
------
Step:6, Action:North
State  181
Old Q Values:  [ 6.41621418e+00 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
New Q values:  [ 3.56627945e+00 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -896
xxxxx
xa..x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         5.33264594 0.05486176 0.        ]
------
Step:7, Action:South
State  111
Old Q Values:  [0.         5.33264594 0.05486176 0.        ]
New Q values:  [0.         2.60294221 0.05486176 0.        ]
Reward: -1  Episode Reward:  -897
xxxxx
x ..x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 3.56627945e+00 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
------
Step:8, Action:North
State  181
Old Q Values:  [ 3.56627945e+00 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
New Q values:  [ 2.80105166e+00 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -898
xxxxx
xa.gx
x ..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            6.58179959 -2400.84           0.        ]
------
Step:9, Action:South
State  109
Old Q Values:  [ -180.6            6.58179959 -2400.84           0.        ]
New Q values:  [ -180.6            2.87303533 -2400.84           0.        ]
Reward: -1  Episode Reward:  -899
xxxxx
x g.x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.80105166e+00 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
------
Step:10, Action:North
State  181
Old Q Values:  [ 2.80105166e+00 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
New Q values:  [ 1.30130333e+00 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -900
xxxxx
xa..x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         2.60294221 0.05486176 0.        ]
------
Step:11, Action:South
State  109
Old Q Values:  [ -180.6            2.87303533 -2400.84           0.        ]
New Q values:  [-1.80600000e+02  9.39605131e-01 -2.40084000e+03  0.00000000e+00]
Reward: -1  Episode Reward:  -901
xxxxx
x g.x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1.30130333e+00 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
------
Step:12, Action:North
State  181
Old Q Values:  [ 1.30130333e+00 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
New Q values:  [ 7.01403994e-01 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -902
xxxxx
xa..x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         2.60294221 0.05486176 0.        ]
------
Step:13, Action:South
State  109
Old Q Values:  [-1.80600000e+02  9.39605131e-01 -2.40084000e+03  0.00000000e+00]
New Q values:  [-1.80600000e+02 -1.37367496e-02 -2.40084000e+03  0.00000000e+00]
Reward: -1  Episode Reward:  -903
xxxxx
x g.x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 7.01403994e-01 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
------
Step:14, Action:North
State  180
Old Q Values:  [-1110.94283328     5.4            0.             0.        ]
New Q values:  [-6.44241645e+03  5.40000000e+00  0.00000000e+00  0.00000000e+00]
Reward: -10001  Episode Reward:  -10904
xxxxx
xg..x
x ..x
x ..x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[226.78389571 295.43204883   0.           0.        ]
------
Step:1, Action:North
State  208
Old Q Values:  [   7.41166309 -774.96859533    0.            0.        ]
New Q values:  [ 135.3342958  -774.96859533    0.            0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         423.23210187 -180.6           3.60469382]
------
Step:2, Action:West
State  136
Old Q Values:  [-6180.6          856.13019384     0.           -11.32750213]
New Q values:  [-6.18060000e+03  8.56130194e+02  0.00000000e+00  8.68999149e-01]
Reward: 9  Episode Reward:  18
xxxxx
x.agx
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8402.62654091     0.        ]
------
Step:3, Action:North
State  123
Old Q Values:  [-2.87603659e+02 -6.00060000e+03  2.31397397e+02  1.43218359e+00]
New Q values:  [-2.26222244e+02 -6.00060000e+03  2.31397397e+02  1.43218359e+00]
Reward: -301  Episode Reward:  -283
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.26222244e+02 -6.00060000e+03  2.31397397e+02  1.43218359e+00]
------
Step:4, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -7.33836380e+03  1.28672538e+01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -7.33836380e+03  1.31516532e+02 -4.97398722e-02]
Reward: -1  Episode Reward:  -284
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         423.23210187 -180.6           3.60469382]
------
Step:5, Action:South
State  138
Old Q Values:  [-180.6         423.23210187 -180.6           3.60469382]
New Q values:  [-180.6         257.3224554  -180.6           3.60469382]
Reward: -1  Episode Reward:  -285
xxxxx
x.  x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[226.78389571 295.43204883   0.           0.        ]
------
Step:6, Action:South
State  210
Old Q Values:  [226.78389571 295.43204883   0.           0.        ]
New Q values:  [  226.78389571 -1446.5611984      0.             0.        ]
Reward: -1  Episode Reward:  -286
xxxxx
x.  x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5213.78005977 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:7, Action:North
State  288
Old Q Values:  [-5213.78005977 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [-2018.07685519 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: -1  Episode Reward:  -287
xxxxx
x.  x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  226.78389571 -1446.5611984      0.             0.        ]
------
Step:8, Action:North
State  210
Old Q Values:  [  226.78389571 -1446.5611984      0.             0.        ]
New Q values:  [  167.3102949 -1446.5611984     0.            0.       ]
Reward: -1  Episode Reward:  -288
xxxxx
x. ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         257.3224554  -180.6           3.60469382]
------
Step:9, Action:South
State  138
Old Q Values:  [-180.6         257.3224554  -180.6           3.60469382]
New Q values:  [-180.6         152.52207063 -180.6           3.60469382]
Reward: -1  Episode Reward:  -289
xxxxx
x.  x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  167.3102949 -1446.5611984     0.            0.       ]
------
Step:10, Action:North
State  210
Old Q Values:  [  167.3102949 -1446.5611984     0.            0.       ]
New Q values:  [  112.08073915 -1446.5611984      0.             0.        ]
Reward: -1  Episode Reward:  -290
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         152.52207063 -180.6           3.60469382]
------
Step:11, Action:South
State  138
Old Q Values:  [-180.6         152.52207063 -180.6           3.60469382]
New Q values:  [-1.80600000e+02 -5.89899088e+03 -1.80600000e+02  3.60469382e+00]
Reward: -10001  Episode Reward:  -10291
xxxxx
x.  x
x..gx
x.. x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -5.89899088e+03 -1.80600000e+02  3.60469382e+00]
------
Step:1, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -5.89899088e+03 -1.80600000e+02  3.60469382e+00]
New Q values:  [ -180.6        -5898.99088301  -180.6           46.29683715]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -7.33836380e+03  1.31516532e+02 -4.97398722e-02]
------
Step:2, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -7.33836380e+03  1.31516532e+02 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -7.33836380e+03  6.58956640e+01 -4.97398722e-02]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5898.99088301  -180.6           46.29683715]
------
Step:3, Action:West
State  138
Old Q Values:  [ -180.6        -5898.99088301  -180.6           46.29683715]
New Q values:  [ -180.6        -5898.99088301  -180.6           37.68743405]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -7.33836380e+03  6.58956640e+01 -4.97398722e-02]
------
Step:4, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -7.33836380e+03  6.58956640e+01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -7.33836380e+03  3.70644958e+01 -4.97398722e-02]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
x...x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5898.99088301  -180.6           37.68743405]
------
Step:5, Action:West
State  138
Old Q Values:  [ -180.6        -5898.99088301  -180.6           37.68743405]
New Q values:  [ -180.6        -5898.99088301  -180.6           25.59432236]
Reward: -1  Episode Reward:  5
xxxxx
x.a x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -7.33836380e+03  3.70644958e+01 -4.97398722e-02]
------
Step:6, Action:East
State  123
Old Q Values:  [-2.26222244e+02 -6.00060000e+03  2.31397397e+02  1.43218359e+00]
New Q values:  [-2.26222244e+02 -6.00060000e+03  9.96372554e+01  1.43218359e+00]
Reward: -1  Episode Reward:  4
xxxxx
x. ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5898.99088301  -180.6           25.59432236]
------
Step:7, Action:West
State  138
Old Q Values:  [ -180.6        -5898.99088301  -180.6           25.59432236]
New Q values:  [ -180.6        -5898.99088301  -180.6           39.52890557]
Reward: -1  Episode Reward:  3
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.26222244e+02 -6.00060000e+03  9.96372554e+01  1.43218359e+00]
------
Step:8, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -7.33836380e+03  3.70644958e+01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -7.33836380e+03  2.60844700e+01 -4.97398722e-02]
Reward: -1  Episode Reward:  2
xxxxx
x. ax
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5898.99088301  -180.6           39.52890557]
------
Step:9, Action:West
State  138
Old Q Values:  [ -180.6        -5898.99088301  -180.6           39.52890557]
New Q values:  [ -180.6        -5898.99088301  -180.6           45.10273886]
Reward: -1  Episode Reward:  1
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.26222244e+02 -6.00060000e+03  9.96372554e+01  1.43218359e+00]
------
Step:10, Action:East
State  121
Old Q Values:  [    0.             0.         -8402.62654091     0.        ]
New Q values:  [    0.             0.         -9104.81155821     0.        ]
Reward: -10001  Episode Reward:  -10000
xxxxx
x. gx
x...x
x.. x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5898.99088301  -180.6           45.10273886]
------
Step:1, Action:West
State  138
Old Q Values:  [ -180.6        -5898.99088301  -180.6           45.10273886]
New Q values:  [ -180.6        -5898.99088301  -180.6           31.26643654]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -7.33836380e+03  2.60844700e+01 -4.97398722e-02]
------
Step:2, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -7.33836380e+03  2.60844700e+01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -7.33836380e+03  1.92137190e+01 -4.97398722e-02]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5898.99088301  -180.6           31.26643654]
------
Step:3, Action:West
State  138
Old Q Values:  [ -180.6        -5898.99088301  -180.6           31.26643654]
New Q values:  [ -180.6        -5898.99088301  -180.6           17.6706903 ]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -7.33836380e+03  1.92137190e+01 -4.97398722e-02]
------
Step:4, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -7.33836380e+03  1.92137190e+01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -7.33836380e+03  1.23866947e+01 -4.97398722e-02]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5898.99088301  -180.6           17.6706903 ]
------
Step:5, Action:West
State  138
Old Q Values:  [ -180.6        -5898.99088301  -180.6           17.6706903 ]
New Q values:  [ -180.6        -5898.99088301  -180.6           10.18428452]
Reward: -1  Episode Reward:  5
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -7.33836380e+03  1.23866947e+01 -4.97398722e-02]
------
Step:6, Action:East
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03  2.08968383e+03  6.75529193e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03  1.09211259e+03  6.75529193e+00]
Reward: -1  Episode Reward:  4
xxxxx
x.gax
x...x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  8.56130194e+02  0.00000000e+00  8.68999149e-01]
------
Step:7, Action:South
State  136
Old Q Values:  [-6.18060000e+03  8.56130194e+02  0.00000000e+00  8.68999149e-01]
New Q values:  [-6.18060000e+03  3.88452366e+02  0.00000000e+00  8.68999149e-01]
Reward: 9  Episode Reward:  13
xxxxx
x. gx
x..ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 135.3342958  -774.96859533    0.            0.        ]
------
Step:8, Action:West
State  208
Old Q Values:  [ 135.3342958  -774.96859533    0.            0.        ]
New Q values:  [ 135.3342958  -774.96859533    0.            9.27221887]
Reward: 9  Episode Reward:  22
xxxxx
x.  x
x.agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   12.90739622     0.         -7476.85600442     0.        ]
------
Step:9, Action:North
State  195
Old Q Values:  [ 2.88045253  0.         10.9944      0.        ]
New Q values:  [30.44335764  0.         10.9944      0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.26222244e+02 -6.00060000e+03  9.96372554e+01  1.43218359e+00]
------
Step:10, Action:East
State  123
Old Q Values:  [-2.26222244e+02 -6.00060000e+03  9.96372554e+01  1.43218359e+00]
New Q values:  [-2.26222244e+02 -6.00060000e+03  4.23101875e+01  1.43218359e+00]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5898.99088301  -180.6           10.18428452]
------
Step:11, Action:West
State  138
Old Q Values:  [ -180.6        -5898.99088301  -180.6           10.18428452]
New Q values:  [ -180.6        -5898.99088301  -180.6            7.18972221]
Reward: -1  Episode Reward:  19
xxxxx
x.a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -7.33836380e+03  1.23866947e+01 -4.97398722e-02]
------
Step:12, Action:East
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03  1.09211259e+03  6.75529193e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03  5.52780746e+02  6.75529193e+00]
Reward: -1  Episode Reward:  18
xxxxx
x.gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  3.88452366e+02  0.00000000e+00  8.68999149e-01]
------
Step:13, Action:South
State  138
Old Q Values:  [ -180.6        -5898.99088301  -180.6            7.18972221]
New Q values:  [ -180.6        -2319.59606446  -180.6            7.18972221]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 135.3342958  -774.96859533    0.            9.27221887]
------
Step:14, Action:North
State  208
Old Q Values:  [ 135.3342958  -774.96859533    0.            9.27221887]
New Q values:  [ 170.0694282  -774.96859533    0.            9.27221887]
Reward: -1  Episode Reward:  16
xxxxx
x.gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  3.88452366e+02  0.00000000e+00  8.68999149e-01]
------
Step:15, Action:South
State  136
Old Q Values:  [-6.18060000e+03  3.88452366e+02  0.00000000e+00  8.68999149e-01]
New Q values:  [-6.18060000e+03  2.84639268e+02  0.00000000e+00  8.68999149e-01]
Reward: -1  Episode Reward:  15
xxxxx
xg  x
x. ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301   432.86107212 -6170.35693855     0.        ]
------
Step:16, Action:South
State  208
Old Q Values:  [ 170.0694282  -774.96859533    0.            9.27221887]
New Q values:  [ 170.0694282  -910.01049469    0.            9.27221887]
Reward: 9  Episode Reward:  24
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2018.07685519 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:17, Action:North
State  288
Old Q Values:  [-2018.07685519 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [ -756.80991362 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 170.0694282  -910.01049469    0.            9.27221887]
------
Step:18, Action:North
State  210
Old Q Values:  [  112.08073915 -1446.5611984      0.             0.        ]
New Q values:  [   46.38921232 -1446.5611984      0.             0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -2319.59606446  -180.6            7.18972221]
------
Step:19, Action:West
State  130
Old Q Values:  [-180.6       1.9272    0.        0.    ]
New Q values:  [-180.6       1.9272    0.       -0.6   ]
Reward: -1  Episode Reward:  21
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[0. 0. 0. 0.]
------
Step:20, Action:North
State  114
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-180.6    0.     0.     0. ]
Reward: -301  Episode Reward:  -280
xxxxx
x.a x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6    0.     0.     0. ]
------
Step:21, Action:South
State  122
Old Q Values:  [-2.81736000e+02 -7.33836380e+03  1.23866947e+01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.23866947e+01 -4.97398722e-02]
Reward: -10001  Episode Reward:  -10281
xxxxx
x.  x
x.g x
x.  x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   46.38921232 -1446.5611984      0.             0.        ]
------
Step:1, Action:North
State  210
Old Q Values:  [   46.38921232 -1446.5611984      0.             0.        ]
New Q values:  [   26.11260159 -1446.5611984      0.             0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -2319.59606446  -180.6            7.18972221]
------
Step:2, Action:West
State  138
Old Q Values:  [ -180.6        -2319.59606446  -180.6            7.18972221]
New Q values:  [ -180.6        -2319.59606446  -180.6           11.99189729]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.23866947e+01 -4.97398722e-02]
------
Step:3, Action:East
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03  5.52780746e+02  6.75529193e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03  3.05904079e+02  6.75529193e+00]
Reward: -1  Episode Reward:  17
xxxxx
xg ax
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  2.84639268e+02  0.00000000e+00  8.68999149e-01]
------
Step:4, Action:South
State  138
Old Q Values:  [ -180.6        -2319.59606446  -180.6           11.99189729]
New Q values:  [-180.6        -877.41759733 -180.6          11.99189729]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
xg.ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 170.0694282  -910.01049469    0.            9.27221887]
------
Step:5, Action:North
State  208
Old Q Values:  [ 170.0694282  -910.01049469    0.            9.27221887]
New Q values:  [  71.02534047 -910.01049469    0.            9.27221887]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -877.41759733 -180.6          11.99189729]
------
Step:6, Action:West
State  138
Old Q Values:  [-180.6        -877.41759733 -180.6          11.99189729]
New Q values:  [-180.6        -877.41759733 -180.6           7.91276732]
Reward: -1  Episode Reward:  14
xxxxx
x.a x
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.23866947e+01 -4.97398722e-02]
------
Step:7, Action:East
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03  3.05904079e+02  6.75529193e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03  2.07153412e+02  6.75529193e+00]
Reward: -1  Episode Reward:  13
xxxxx
xg ax
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  2.84639268e+02  0.00000000e+00  8.68999149e-01]
------
Step:8, Action:South
State  136
Old Q Values:  [-6.18060000e+03  2.84639268e+02  0.00000000e+00  8.68999149e-01]
New Q values:  [-6.18060000e+03  1.34563309e+02  0.00000000e+00  8.68999149e-01]
Reward: -1  Episode Reward:  12
xxxxx
x.g x
x..ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  71.02534047 -910.01049469    0.            9.27221887]
------
Step:9, Action:North
State  208
Old Q Values:  [  71.02534047 -910.01049469    0.            9.27221887]
New Q values:  [  30.18396638 -910.01049469    0.            9.27221887]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -877.41759733 -180.6           7.91276732]
------
Step:10, Action:West
State  138
Old Q Values:  [-180.6        -877.41759733 -180.6           7.91276732]
New Q values:  [-180.6        -877.41759733 -180.6           6.28111533]
Reward: -1  Episode Reward:  10
xxxxx
x.a x
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.23866947e+01 -4.97398722e-02]
------
Step:11, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.23866947e+01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -8.91418985e+03  6.23901247e+00 -4.97398722e-02]
Reward: -1  Episode Reward:  9
xxxxx
x. ax
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -877.41759733 -180.6           6.28111533]
------
Step:12, Action:West
State  138
Old Q Values:  [-180.6        -877.41759733 -180.6           6.28111533]
New Q values:  [-180.6        -877.41759733 -180.6           3.78414987]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  6.23901247e+00 -4.97398722e-02]
------
Step:13, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  6.23901247e+00 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.03084995e+00 -4.97398722e-02]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -877.41759733 -180.6           3.78414987]
------
Step:14, Action:West
State  138
Old Q Values:  [-180.6        -877.41759733 -180.6           3.78414987]
New Q values:  [-180.6        -877.41759733 -180.6           1.82291493]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.03084995e+00 -4.97398722e-02]
------
Step:15, Action:East
State  123
Old Q Values:  [-2.26222244e+02 -6.00060000e+03  4.23101875e+01  1.43218359e+00]
New Q values:  [-2.26222244e+02 -6.00060000e+03  1.68709495e+01  1.43218359e+00]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -877.41759733 -180.6           1.82291493]
------
Step:16, Action:West
State  138
Old Q Values:  [-180.6        -877.41759733 -180.6           1.82291493]
New Q values:  [-180.6        -877.41759733 -180.6           5.19045082]
Reward: -1  Episode Reward:  4
xxxxx
x.a x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.26222244e+02 -6.00060000e+03  1.68709495e+01  1.43218359e+00]
------
Step:17, Action:East
State  123
Old Q Values:  [-2.26222244e+02 -6.00060000e+03  1.68709495e+01  1.43218359e+00]
New Q values:  [-2.26222244e+02 -6.00060000e+03  7.70551504e+00  1.43218359e+00]
Reward: -1  Episode Reward:  3
xxxxx
x. ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -877.41759733 -180.6           5.19045082]
------
Step:18, Action:West
State  138
Old Q Values:  [-180.6        -877.41759733 -180.6           5.19045082]
New Q values:  [-180.6        -877.41759733 -180.6           3.78783484]
Reward: -1  Episode Reward:  2
xxxxx
x.a x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.26222244e+02 -6.00060000e+03  7.70551504e+00  1.43218359e+00]
------
Step:19, Action:East
State  121
Old Q Values:  [    0.             0.         -9104.81155821     0.        ]
New Q values:  [    0.             0.         -9602.15563046     0.        ]
Reward: -10001  Episode Reward:  -9999
xxxxx
x. gx
x.. x
x. .x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         2.60294221 0.05486176 0.        ]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6           3.03624806    0.            0.        ]
New Q values:  [-180.6           6.61449922    0.            0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.21026068 -0.46342879  0.          0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [ 7.01403994e-01 -3.70021941e-01 -5.99327388e+03 -1.80600000e+02]
New Q values:  [ 7.01403994e-01 -3.70021941e-01 -2.39190955e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[0. 0. 0. 0.]
------
Step:3, Action:North
State  200
Old Q Values:  [-0.27675322  0.          3.79073389  0.        ]
New Q values:  [6.1985537  0.         3.79073389 0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x a.x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.03084995e+00 -4.97398722e-02]
------
Step:4, Action:East
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03  2.07153412e+02  6.75529193e+00]
New Q values:  [-1.00352671e+04 -5.99568600e+03  1.28630358e+02  6.75529193e+00]
Reward: 9  Episode Reward:  36
xxxxx
x gax
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  1.34563309e+02  0.00000000e+00  8.68999149e-01]
------
Step:5, Action:South
State  138
Old Q Values:  [-180.6        -877.41759733 -180.6           3.78783484]
New Q values:  [-180.6        -221.70871729 -180.6           3.78783484]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301   432.86107212 -6170.35693855     0.        ]
------
Step:6, Action:South
State  216
Old Q Values:  [-5999.86251301   432.86107212 -6170.35693855     0.        ]
New Q values:  [-5999.86251301   -48.49854524 -6170.35693855     0.        ]
Reward: 9  Episode Reward:  44
xxxxx
x   x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -756.80991362 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:7, Action:North
State  288
Old Q Values:  [ -756.80991362 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [ -303.32396545 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5999.86251301   -48.49854524 -6170.35693855     0.        ]
------
Step:8, Action:West
State  216
Old Q Values:  [-5999.86251301   -48.49854524 -6170.35693855     0.        ]
New Q values:  [-5.99986251e+03 -4.84985452e+01 -6.17035694e+03  1.25956611e+00]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xga x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[6.1985537  0.         3.79073389 0.        ]
------
Step:9, Action:North
State  200
Old Q Values:  [6.1985537  0.         3.79073389 0.        ]
New Q values:  [40.46852877  0.          3.79073389  0.        ]
Reward: -1  Episode Reward:  41
xxxxx
xga x
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-1.00352671e+04 -5.99568600e+03  1.28630358e+02  6.75529193e+00]
------
Step:10, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.03084995e+00 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.74869043e+00 -4.97398722e-02]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           3.78783484]
------
Step:11, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           3.78783484]
New Q values:  [-180.6        -221.70871729 -180.6           1.43974107]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.74869043e+00 -4.97398722e-02]
------
Step:12, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.74869043e+00 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -8.91418985e+03  5.31398493e-01 -4.97398722e-02]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           1.43974107]
------
Step:13, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           1.43974107]
New Q values:  [-180.6        -221.70871729 -180.6           2.28755094]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.26222244e+02 -6.00060000e+03  7.70551504e+00  1.43218359e+00]
------
Step:14, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  5.31398493e-01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -8.91418985e+03  2.98824679e-01 -4.97398722e-02]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           2.28755094]
------
Step:15, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           2.28755094]
New Q values:  [-180.6        -221.70871729 -180.6           0.40466778]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  2.98824679e-01 -4.97398722e-02]
------
Step:16, Action:East
State  123
Old Q Values:  [-2.26222244e+02 -6.00060000e+03  7.70551504e+00  1.43218359e+00]
New Q values:  [-2.26222244e+02 -6.00060000e+03  2.60360635e+00  1.43218359e+00]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           0.40466778]
------
Step:17, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           0.40466778]
New Q values:  [-180.6        -221.70871729 -180.6           0.34294902]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.26222244e+02 -6.00060000e+03  2.60360635e+00  1.43218359e+00]
------
Step:18, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  2.98824679e-01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -3.77585423e-01 -4.97398722e-02]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           0.34294902]
------
Step:19, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           0.34294902]
New Q values:  [-180.6        -221.70871729 -180.6           0.31826151]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.26222244e+02 -6.00060000e+03  2.60360635e+00  1.43218359e+00]
------
Step:20, Action:East
State  123
Old Q Values:  [-2.26222244e+02 -6.00060000e+03  2.60360635e+00  1.43218359e+00]
New Q values:  [-2.26222244e+02 -6.00060000e+03  5.36920994e-01  1.43218359e+00]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           0.31826151]
------
Step:21, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           0.31826151]
New Q values:  [-180.6        -221.70871729 -180.6          -0.48761736]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -3.77585423e-01 -4.97398722e-02]
------
Step:22, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -3.77585423e-01 -4.97398722e-02]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -3.77585423e-01 -6.19895949e-01]
Reward: -1  Episode Reward:  28
xxxxx
xa  x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.8060000e+02 -6.0006000e+03 -3.2377956e-01  0.0000000e+00]
------
Step:23, Action:West
State  104
Old Q Values:  [-8652.84     0.       0.       0.  ]
New Q values:  [-8652.84     0.       0.   -6180.6 ]
Reward: -10301  Episode Reward:  -10273
xxxxx
xg  x
x   x
x.. x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6         451.22239865    8.424     ]
------
Step:1, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6         451.22239865    8.424     ]
New Q values:  [-951.35054867 -180.6          94.89176983    8.424     ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -303.32396545 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:2, Action:North
State  288
Old Q Values:  [ -303.32396545 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [ -108.0958057  -6251.71315483 -6173.56321028 -5654.78955431]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   26.11260159 -1446.5611984      0.             0.        ]
------
Step:3, Action:North
State  210
Old Q Values:  [   26.11260159 -1446.5611984      0.             0.        ]
New Q values:  [   16.42320064 -1446.5611984      0.             0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6       1.9272    0.       -0.6   ]
------
Step:4, Action:South
State  130
Old Q Values:  [-180.6       1.9272    0.       -0.6   ]
New Q values:  [-180.6           5.09784019    0.           -0.6       ]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   16.42320064 -1446.5611984      0.             0.        ]
------
Step:5, Action:North
State  208
Old Q Values:  [  30.18396638 -910.01049469    0.            9.27221887]
New Q values:  [  13.00293861 -910.01049469    0.            9.27221887]
Reward: -1  Episode Reward:  25
xxxxx
x..ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           5.09784019    0.           -0.6       ]
------
Step:6, Action:East
State  128
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [    0.      0.  -6180.6     0. ]
Reward: -10301  Episode Reward:  -10276
xxxxx
x..gx
x.  x
x.  x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02 -1.37367496e-02 -2.40084000e+03  0.00000000e+00]
------
Step:1, Action:West
State  109
Old Q Values:  [-1.80600000e+02 -1.37367496e-02 -2.40084000e+03  0.00000000e+00]
New Q values:  [-1.80600000e+02 -1.37367496e-02 -2.40084000e+03 -1.80600000e+02]
Reward: -301  Episode Reward:  -301
xxxxx
xag x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02 -1.37367496e-02 -2.40084000e+03 -1.80600000e+02]
------
Step:2, Action:South
State  109
Old Q Values:  [-1.80600000e+02 -1.37367496e-02 -2.40084000e+03 -1.80600000e+02]
New Q values:  [ -180.6           5.6049265 -2400.84       -180.6      ]
Reward: 9  Episode Reward:  -292
xxxxx
x .gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 7.01403994e-01 -3.70021941e-01 -2.39190955e+03 -1.80600000e+02]
------
Step:3, Action:North
State  181
Old Q Values:  [ 7.01403994e-01 -3.70021941e-01 -2.39190955e+03 -1.80600000e+02]
New Q values:  [ 4.61444261e-01 -3.70021941e-01 -2.39190955e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -293
xxxxx
xa. x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         2.60294221 0.05486176 0.        ]
------
Step:4, Action:South
State  111
Old Q Values:  [0.         2.60294221 0.05486176 0.        ]
New Q values:  [0.         0.57961016 0.05486176 0.        ]
Reward: -1  Episode Reward:  -294
xxxxx
x . x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 4.61444261e-01 -3.70021941e-01 -2.39190955e+03 -1.80600000e+02]
------
Step:5, Action:North
State  181
Old Q Values:  [ 4.61444261e-01 -3.70021941e-01 -2.39190955e+03 -1.80600000e+02]
New Q values:  [ 1.26605565e+00 -3.70021941e-01 -2.39190955e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -295
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6           5.6049265 -2400.84       -180.6      ]
------
Step:6, Action:South
State  108
Old Q Values:  [-6.18060000e+03  8.53560000e+00  1.14369011e+00  0.00000000e+00]
New Q values:  [-6.18060000e+03  4.43424000e+00  1.14369011e+00  0.00000000e+00]
Reward: -1  Episode Reward:  -296
xxxxx
xg. x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-6.44241645e+03  5.40000000e+00  0.00000000e+00  0.00000000e+00]
------
Step:7, Action:South
State  180
Old Q Values:  [-6.44241645e+03  5.40000000e+00  0.00000000e+00  0.00000000e+00]
New Q values:  [-6442.41645331   -46.62           0.             0.        ]
Reward: 9  Episode Reward:  -287
xxxxx
x . x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2397.576  -180.6   -6000.6    -180.6  ]
------
Step:8, Action:South
State  261
Old Q Values:  [ 3.47940376e+00 -2.51224095e+02 -5.46719309e+03 -1.80600000e+02]
New Q values:  [ 3.47940376e+00 -2.80045817e+02 -5.46719309e+03 -1.80600000e+02]
Reward: -301  Episode Reward:  -588
xxxxx
x . x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3.47940376e+00 -2.80045817e+02 -5.46719309e+03 -1.80600000e+02]
------
Step:9, Action:North
State  261
Old Q Values:  [ 3.47940376e+00 -2.80045817e+02 -5.46719309e+03 -1.80600000e+02]
New Q values:  [ 7.91761505e-01 -2.80045817e+02 -5.46719309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -589
xxxxx
x . x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.21026068 -0.46342879  0.          0.        ]
------
Step:10, Action:East
State  183
Old Q Values:  [-0.21026068 -0.46342879  0.          0.        ]
New Q values:  [-0.21026068 -0.46342879 14.53300729  0.        ]
Reward: 9  Episode Reward:  -580
xxxxx
x . x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[30.44335764  0.         10.9944      0.        ]
------
Step:11, Action:North
State  201
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [5.82965508 0.         0.         0.        ]
Reward: 9  Episode Reward:  -571
xxxxx
x a x
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.26222244e+02 -6.00060000e+03  5.36920994e-01  1.43218359e+00]
------
Step:12, Action:West
State  125
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.         0.         0.         1.08147795]
Reward: -1  Episode Reward:  -572
xxxxx
xa gx
x  .x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6           5.6049265 -2400.84       -180.6      ]
------
Step:13, Action:South
State  109
Old Q Values:  [ -180.6           5.6049265 -2400.84       -180.6      ]
New Q values:  [-1.80600000e+02  2.13730131e+00 -2.40084000e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -573
xxxxx
x g x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 1.65110238 -0.33760245  0.          0.        ]
------
Step:14, Action:North
State  189
Old Q Values:  [ 1.65110238 -0.33760245  0.          0.        ]
New Q values:  [ 0.234324   -0.33760245  0.          0.        ]
Reward: -1  Episode Reward:  -574
xxxxx
xa  x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         0.57961016 0.05486176 0.        ]
------
Step:15, Action:South
State  110
Old Q Values:  [-1.80600000e+02 -5.99853600e+03  1.08744499e-01  0.00000000e+00]
New Q values:  [-1.80600000e+02 -8.40001440e+03  1.08744499e-01  0.00000000e+00]
Reward: -10001  Episode Reward:  -10575
xxxxx
x   x
xg .x
x ..x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6          -0.48761736]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6          -0.48761736]
New Q values:  [-180.6        -221.70871729 -180.6           5.63460813]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.26222244e+02 -6.00060000e+03  5.36920994e-01  1.43218359e+00]
------
Step:2, Action:West
State  123
Old Q Values:  [-2.26222244e+02 -6.00060000e+03  5.36920994e-01  1.43218359e+00]
New Q values:  [-2.26222244e+02 -6.00060000e+03  5.36920994e-01  6.92687344e+00]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    3.18          1.99726873    0.        ]
------
Step:3, Action:South
State  111
Old Q Values:  [0.         0.57961016 0.05486176 0.        ]
New Q values:  [0.         5.70214127 0.05486176 0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 0.234324   -0.33760245  0.          0.        ]
------
Step:4, Action:North
State  189
Old Q Values:  [ 0.234324   -0.33760245  0.          0.        ]
New Q values:  [ 1.20437198 -0.33760245  0.          0.        ]
Reward: -1  Episode Reward:  26
xxxxx
xa  x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         5.70214127 0.05486176 0.        ]
------
Step:5, Action:South
State  109
Old Q Values:  [-1.80600000e+02  2.13730131e+00 -2.40084000e+03 -1.80600000e+02]
New Q values:  [-1.8060000e+02  6.1623212e-01 -2.4008400e+03 -1.8060000e+02]
Reward: -1  Episode Reward:  25
xxxxx
x g x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 1.20437198 -0.33760245  0.          0.        ]
------
Step:6, Action:North
State  188
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-5999.269728     0.           0.           0.      ]
Reward: -10001  Episode Reward:  -9976
xxxxx
xg  x
x  .x
x...x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           5.63460813]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           5.63460813]
New Q values:  [-180.6        -221.70871729 -180.6           9.73190528]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.26222244e+02 -6.00060000e+03  5.36920994e-01  6.92687344e+00]
------
Step:2, Action:West
State  123
Old Q Values:  [-2.26222244e+02 -6.00060000e+03  5.36920994e-01  6.92687344e+00]
New Q values:  [-2.26222244e+02 -6.00060000e+03  5.36920994e-01  9.12474937e+00]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    3.18          1.99726873    0.        ]
------
Step:3, Action:South
State  111
Old Q Values:  [0.         5.70214127 0.05486176 0.        ]
New Q values:  [0.         8.0606732  0.05486176 0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa.gx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1.26605565e+00 -3.70021941e-01 -2.39190955e+03 -1.80600000e+02]
------
Step:4, Action:North
State  181
Old Q Values:  [ 1.26605565e+00 -3.70021941e-01 -2.39190955e+03 -1.80600000e+02]
New Q values:  [ 2.32462422e+00 -3.70021941e-01 -2.39190955e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  26
xxxxx
xa  x
x g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         8.0606732  0.05486176 0.        ]
------
Step:5, Action:South
State  109
Old Q Values:  [-1.8060000e+02  6.1623212e-01 -2.4008400e+03 -1.8060000e+02]
New Q values:  [-1.80600000e+02  3.43880114e-01 -2.40084000e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  25
xxxxx
x g x
xa..x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.32462422e+00 -3.70021941e-01 -2.39190955e+03 -1.80600000e+02]
------
Step:6, Action:North
State  180
Old Q Values:  [-6442.41645331   -46.62           0.             0.        ]
New Q values:  [-8576.23630932   -46.62           0.             0.        ]
Reward: -10001  Episode Reward:  -9976
xxxxx
xg  x
x ..x
x. .x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -3.77585423e-01 -6.19895949e-01]
------
Step:1, Action:East
State  123
Old Q Values:  [-2.26222244e+02 -6.00060000e+03  5.36920994e-01  9.12474937e+00]
New Q values:  [ -226.22224449 -6000.6            8.53433998     9.12474937]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           9.73190528]
------
Step:2, Action:West
State  136
Old Q Values:  [-6.18060000e+03  1.34563309e+02  0.00000000e+00  8.68999149e-01]
New Q values:  [-6.18060000e+03  1.34563309e+02  0.00000000e+00 -2.52400340e-01]
Reward: -1  Episode Reward:  8
xxxxx
x.agx
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9602.15563046     0.        ]
------
Step:3, Action:North
State  120
Old Q Values:  [-1.00352671e+04 -5.99568600e+03  1.28630358e+02  6.75529193e+00]
New Q values:  [-1.01561177e+04 -5.99568600e+03  1.28630358e+02  6.75529193e+00]
Reward: -10301  Episode Reward:  -10293
xxxxx
x.g x
x. .x
x...x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.21026068 -0.46342879 14.53300729  0.        ]
------
Step:1, Action:East
State  181
Old Q Values:  [ 2.32462422e+00 -3.70021941e-01 -2.39190955e+03 -1.80600000e+02]
New Q values:  [ 2.32462422e+00 -3.70021941e-01 -6.92960815e+03 -1.80600000e+02]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x g.x
x. .x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 7.91761505e-01 -2.80045817e+02 -5.46719309e+03 -1.80600000e+02]
------
Step:1, Action:North
State  261
Old Q Values:  [ 7.91761505e-01 -2.80045817e+02 -5.46719309e+03 -1.80600000e+02]
New Q values:  [   10.07660679  -280.04581702 -5467.19309188  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa. x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.21026068 -0.46342879 14.53300729  0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [-0.21026068 -0.46342879 14.53300729  0.        ]
New Q values:  [-0.21026068 -0.46342879 13.03650404  0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6       -0.294      6.0776704  0.       ]
------
Step:3, Action:East
State  192
Old Q Values:  [-5.36728685e+03  7.25188992e+01  4.42040880e+00  0.00000000e+00]
New Q values:  [-5.36728685e+03  7.25188992e+01  5.06904510e+00  0.00000000e+00]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  13.00293861 -910.01049469    0.            9.27221887]
------
Step:4, Action:North
State  210
Old Q Values:  [   16.42320064 -1446.5611984      0.             0.        ]
New Q values:  [   13.49863231 -1446.5611984      0.             0.        ]
Reward: 9  Episode Reward:  26
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           5.09784019    0.           -0.6       ]
------
Step:5, Action:South
State  130
Old Q Values:  [-180.6           5.09784019    0.           -0.6       ]
New Q values:  [-180.6           5.34001766    0.           -0.6       ]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  13.00293861 -910.01049469    0.            9.27221887]
------
Step:6, Action:North
State  210
Old Q Values:  [   13.49863231 -1446.5611984      0.             0.        ]
New Q values:  [    6.40145822 -1446.5611984      0.             0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           5.34001766    0.           -0.6       ]
------
Step:7, Action:South
State  130
Old Q Values:  [-180.6           5.34001766    0.           -0.6       ]
New Q values:  [-180.6           3.45644453    0.           -0.6       ]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    6.40145822 -1446.5611984      0.             0.        ]
------
Step:8, Action:North
State  210
Old Q Values:  [    6.40145822 -1446.5611984      0.             0.        ]
New Q values:  [    2.99751665 -1446.5611984      0.             0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           3.45644453    0.           -0.6       ]
------
Step:9, Action:South
State  130
Old Q Values:  [-180.6           3.45644453    0.           -0.6       ]
New Q values:  [-180.6          4.6834594    0.          -0.6      ]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  13.00293861 -910.01049469    0.            9.27221887]
------
Step:10, Action:North
State  208
Old Q Values:  [  13.00293861 -910.01049469    0.            9.27221887]
New Q values:  [   6.00621326 -910.01049469    0.            9.27221887]
Reward: -1  Episode Reward:  20
xxxxx
x..ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6          4.6834594    0.          -0.6      ]
------
Step:11, Action:South
State  130
Old Q Values:  [-180.6          4.6834594    0.          -0.6      ]
New Q values:  [-180.6           2.17263875    0.           -0.6       ]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.99751665 -1446.5611984      0.             0.        ]
------
Step:12, Action:North
State  208
Old Q Values:  [   6.00621326 -910.01049469    0.            9.27221887]
New Q values:  [   2.45427693 -910.01049469    0.            9.27221887]
Reward: -1  Episode Reward:  18
xxxxx
x..ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           2.17263875    0.           -0.6       ]
------
Step:13, Action:South
State  130
Old Q Values:  [-180.6           2.17263875    0.           -0.6       ]
New Q values:  [-180.6           3.05072116    0.           -0.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   2.45427693 -910.01049469    0.            9.27221887]
------
Step:14, Action:North
State  216
Old Q Values:  [-5.99986251e+03 -4.84985452e+01 -6.17035694e+03  1.25956611e+00]
New Q values:  [-2.36017601e+03 -4.84985452e+01 -6.17035694e+03  1.25956611e+00]
Reward: -1  Episode Reward:  16
xxxxx
x.gax
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  1.34563309e+02  0.00000000e+00 -2.52400340e-01]
------
Step:15, Action:South
State  136
Old Q Values:  [-6.18060000e+03  1.34563309e+02  0.00000000e+00 -2.52400340e-01]
New Q values:  [-6.18060000e+03  5.36031936e+01  0.00000000e+00 -2.52400340e-01]
Reward: -1  Episode Reward:  15
xxxxx
xg. x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-2.36017601e+03 -4.84985452e+01 -6.17035694e+03  1.25956611e+00]
------
Step:16, Action:West
State  208
Old Q Values:  [   2.45427693 -910.01049469    0.            9.27221887]
New Q values:  [   2.45427693 -910.01049469    0.           24.86455731]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
xga x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[-5.36728685e+03  7.25188992e+01  5.06904510e+00  0.00000000e+00]
------
Step:17, Action:South
State  192
Old Q Values:  [-5.36728685e+03  7.25188992e+01  5.06904510e+00  0.00000000e+00]
New Q values:  [-5.36728685e+03  6.28750906e+01  5.06904510e+00  0.00000000e+00]
Reward: 9  Episode Reward:  23
xxxxx
xg. x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6          94.89176983    8.424     ]
------
Step:18, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6          94.89176983    8.424     ]
New Q values:  [-951.35054867 -180.6          10.92796622    8.424     ]
Reward: 9  Episode Reward:  32
xxxxx
x.. x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -108.0958057  -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:19, Action:North
State  288
Old Q Values:  [ -108.0958057  -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [  -36.37895509 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: -1  Episode Reward:  31
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   2.45427693 -910.01049469    0.           24.86455731]
------
Step:20, Action:West
State  208
Old Q Values:  [   2.45427693 -910.01049469    0.           24.86455731]
New Q values:  [   2.45427693 -910.01049469    0.           28.20835012]
Reward: -1  Episode Reward:  30
xxxxx
x.. x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[-5.36728685e+03  6.28750906e+01  5.06904510e+00  0.00000000e+00]
------
Step:21, Action:South
State  194
Old Q Values:  [-0.6       -0.294      6.0776704  0.       ]
New Q values:  [-0.6         2.56078987  6.0776704   0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x.. x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6          10.92796622    8.424     ]
------
Step:22, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6          10.92796622    8.424     ]
New Q values:  [-951.35054867 -180.6          -7.14250004    8.424     ]
Reward: -1  Episode Reward:  28
xxxxx
x.. x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  -36.37895509 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:23, Action:North
State  288
Old Q Values:  [  -36.37895509 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [  -14.25232704 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: -1  Episode Reward:  27
xxxxx
x.. x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.99751665 -1446.5611984      0.             0.        ]
------
Step:24, Action:North
State  208
Old Q Values:  [   2.45427693 -910.01049469    0.           28.20835012]
New Q values:  [   1.29692712 -910.01049469    0.           28.20835012]
Reward: -1  Episode Reward:  26
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           3.05072116    0.           -0.6       ]
------
Step:25, Action:South
State  130
Old Q Values:  [-180.6           3.05072116    0.           -0.6       ]
New Q values:  [-180.6          9.0827935    0.          -0.6      ]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   1.29692712 -910.01049469    0.           28.20835012]
------
Step:26, Action:North
State  208
Old Q Values:  [   1.29692712 -910.01049469    0.           28.20835012]
New Q values:  [   2.6436089  -910.01049469    0.           28.20835012]
Reward: -1  Episode Reward:  24
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6          9.0827935    0.          -0.6      ]
------
Step:27, Action:South
State  130
Old Q Values:  [-180.6          9.0827935    0.          -0.6      ]
New Q values:  [-180.6          11.49562243    0.           -0.6       ]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   2.6436089  -910.01049469    0.           28.20835012]
------
Step:28, Action:North
State  208
Old Q Values:  [   2.6436089  -910.01049469    0.           28.20835012]
New Q values:  [   3.90613029 -910.01049469    0.           28.20835012]
Reward: -1  Episode Reward:  22
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6          11.49562243    0.           -0.6       ]
------
Step:29, Action:East
State  128
Old Q Values:  [    0.      0.  -6180.6     0. ]
New Q values:  [    0.       0.   -8652.84     0.  ]
Reward: -10301  Episode Reward:  -10279
xxxxx
x..gx
x   x
x   x
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-8576.23630932   -46.62           0.             0.        ]
------
Step:1, Action:East
State  181
Old Q Values:  [ 2.32462422e+00 -3.70021941e-01 -6.92960815e+03 -1.80600000e+02]
New Q values:  [ 2.32462422e+00 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-0.40598153  5.4         0.          0.        ]
------
Step:2, Action:South
State  196
Old Q Values:  [-0.40598153  5.4         0.          0.        ]
New Q values:  [-0.40598153  7.56        0.          0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-0.192  0.     0.     0.   ]
------
Step:3, Action:South
State  276
Old Q Values:  [-0.192  0.     0.     0.   ]
New Q values:  [  -0.192 -180.6      0.       0.   ]
Reward: -301  Episode Reward:  -283
xxxxx
x g.x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  -0.192 -180.6      0.       0.   ]
------
Step:4, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6          -7.14250004    8.424     ]
New Q values:  [-951.35054867 -180.6          -1.73269813    8.424     ]
Reward: 9  Episode Reward:  -274
xxxxx
xg..x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  -14.25232704 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:5, Action:North
State  288
Old Q Values:  [  -14.25232704 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [    8.16157422 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: 9  Episode Reward:  -265
xxxxx
x g.x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   3.90613029 -910.01049469    0.           28.20835012]
------
Step:6, Action:West
State  208
Old Q Values:  [   3.90613029 -910.01049469    0.           28.20835012]
New Q values:  [   3.90613029 -910.01049469    0.           14.55555891]
Reward: -1  Episode Reward:  -266
xxxxx
x .gx
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   12.90739622     0.         -7476.85600442     0.        ]
------
Step:7, Action:North
State  196
Old Q Values:  [-0.40598153  7.56        0.          0.        ]
New Q values:  [-5994.76239261     7.56           0.             0.        ]
Reward: -9991  Episode Reward:  -10257
xxxxx
x g.x
x   x
x.  x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6          -1.73269813    8.424     ]
------
Step:1, Action:West
State  276
Old Q Values:  [  -0.192 -180.6      0.       0.   ]
New Q values:  [  -0.192 -180.6      0.     -48.78 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2397.576  -180.6   -6000.6    -180.6  ]
------
Step:2, Action:South
State  261
Old Q Values:  [   10.07660679  -280.04581702 -5467.19309188  -180.6       ]
New Q values:  [   10.07660679  -289.59534477 -5467.19309188  -180.6       ]
Reward: -301  Episode Reward:  -292
xxxxx
x...x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   10.07660679  -289.59534477 -5467.19309188  -180.6       ]
------
Step:3, Action:North
State  261
Old Q Values:  [   10.07660679  -289.59534477 -5467.19309188  -180.6       ]
New Q values:  [   10.12802998  -289.59534477 -5467.19309188  -180.6       ]
Reward: 9  Episode Reward:  -283
xxxxx
x...x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.32462422e+00 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
------
Step:4, Action:North
State  181
Old Q Values:  [ 2.32462422e+00 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
New Q values:  [ 6.43301372e+00 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  -274
xxxxx
xa.gx
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02  3.43880114e-01 -2.40084000e+03 -1.80600000e+02]
------
Step:5, Action:South
State  103
Old Q Values:  [-180.6           6.61449922    0.            0.        ]
New Q values:  [-180.6           3.97570381    0.            0.        ]
Reward: -1  Episode Reward:  -275
xxxxx
x ..x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.43301372e+00 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
------
Step:6, Action:North
State  181
Old Q Values:  [ 6.43301372e+00 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
New Q values:  [ 3.16591663e+00 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -276
xxxxx
xa..x
x g.x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6           3.97570381    0.            0.        ]
------
Step:7, Action:South
State  103
Old Q Values:  [-180.6           3.97570381    0.            0.        ]
New Q values:  [-180.6           1.94005651    0.            0.        ]
Reward: -1  Episode Reward:  -277
xxxxx
x ..x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 3.16591663e+00 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
------
Step:8, Action:North
State  181
Old Q Values:  [ 3.16591663e+00 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
New Q values:  [ 7.69530687e-01 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -278
xxxxx
xa.gx
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02  3.43880114e-01 -2.40084000e+03 -1.80600000e+02]
------
Step:9, Action:South
State  103
Old Q Values:  [-180.6           1.94005651    0.            0.        ]
New Q values:  [-180.6           0.40688181    0.            0.        ]
Reward: -1  Episode Reward:  -279
xxxxx
x ..x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 7.69530687e-01 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
------
Step:10, Action:North
State  181
Old Q Values:  [ 7.69530687e-01 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
New Q values:  [-1.70123182e-01 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -280
xxxxx
xa..x
x g.x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6           0.40688181    0.            0.        ]
------
Step:11, Action:South
State  102
Old Q Values:  [-180.6    0.    -0.6    0. ]
New Q values:  [-1.8060e+02 -6.0006e+03 -6.0000e-01  0.0000e+00]
Reward: -10001  Episode Reward:  -10281
xxxxx
x ..x
xg .x
x  .x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.99751665 -1446.5611984      0.             0.        ]
------
Step:1, Action:North
State  208
Old Q Values:  [   3.90613029 -910.01049469    0.           14.55555891]
New Q values:  [   9.8820237  -910.01049469    0.           14.55555891]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           9.73190528]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           9.73190528]
New Q values:  [-180.6        -221.70871729 -180.6           9.17948649]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -3.77585423e-01 -6.19895949e-01]
------
Step:3, Action:East
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  1.28630358e+02  6.75529193e+00]
New Q values:  [-1.01561177e+04 -5.99568600e+03  6.69331011e+01  6.75529193e+00]
Reward: -1  Episode Reward:  17
xxxxx
xg ax
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  5.36031936e+01  0.00000000e+00 -2.52400340e-01]
------
Step:4, Action:South
State  136
Old Q Values:  [-6.18060000e+03  5.36031936e+01  0.00000000e+00 -2.52400340e-01]
New Q values:  [-6.18060000e+03  2.52079451e+01  0.00000000e+00 -2.52400340e-01]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
x..ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   9.8820237  -910.01049469    0.           14.55555891]
------
Step:5, Action:West
State  208
Old Q Values:  [   9.8820237  -910.01049469    0.           14.55555891]
New Q values:  [    9.8820237   -910.01049469     0.         -5969.91524924]
Reward: -9991  Episode Reward:  -9975
xxxxx
x.  x
x.g x
x. .x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    9.8820237   -910.01049469     0.         -5969.91524924]
------
Step:1, Action:North
State  210
Old Q Values:  [    2.99751665 -1446.5611984      0.             0.        ]
New Q values:  [    9.35285261 -1446.5611984      0.             0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           9.17948649]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           9.17948649]
New Q values:  [-180.6        -221.70871729 -180.6           8.95851897]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -3.77585423e-01 -6.19895949e-01]
------
Step:3, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -3.77585423e-01 -6.19895949e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.93652152e+00 -6.19895949e-01]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           8.95851897]
------
Step:4, Action:West
State  136
Old Q Values:  [-6.18060000e+03  2.52079451e+01  0.00000000e+00 -2.52400340e-01]
New Q values:  [-6180.6           25.20794511     0.         -5980.6210298 ]
Reward: -10001  Episode Reward:  -9984
xxxxx
x.g x
x . x
x...x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[-1.70123182e-01 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
------
Step:1, Action:North
State  183
Old Q Values:  [-0.21026068 -0.46342879 13.03650404  0.        ]
New Q values:  [ 5.43796027 -0.46342879 13.03650404  0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6           0.40688181    0.            0.        ]
------
Step:2, Action:South
State  110
Old Q Values:  [-1.80600000e+02 -8.40001440e+03  1.08744499e-01  0.00000000e+00]
New Q values:  [-1.80600000e+02 -3.36060576e+03  1.08744499e-01  0.00000000e+00]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xa .x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[0. 0. 0. 0.]
------
Step:3, Action:North
State  183
Old Q Values:  [ 5.43796027 -0.46342879 13.03650404  0.        ]
New Q values:  [ 1.69724865 -0.46342879 13.03650404  0.        ]
Reward: -1  Episode Reward:  7
xxxxx
xa..x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6           0.40688181    0.            0.        ]
------
Step:4, Action:South
State  110
Old Q Values:  [-1.80600000e+02 -3.36060576e+03  1.08744499e-01  0.00000000e+00]
New Q values:  [-1.80600000e+02 -1.34484230e+03  1.08744499e-01  0.00000000e+00]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
xa .x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[0. 0. 0. 0.]
------
Step:5, Action:North
State  180
Old Q Values:  [-8576.23630932   -46.62           0.             0.        ]
New Q values:  [-3431.06190038   -46.62           0.             0.        ]
Reward: -1  Episode Reward:  5
xxxxx
xa..x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -1.34484230e+03  1.08744499e-01  0.00000000e+00]
------
Step:6, Action:East
State  110
Old Q Values:  [-1.80600000e+02 -1.34484230e+03  1.08744499e-01  0.00000000e+00]
New Q values:  [ -180.6       -1344.842304      5.4709698     0.       ]
Reward: 9  Episode Reward:  14
xxxxx
x a.x
x  .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[0.         0.         0.09157335 0.        ]
------
Step:7, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.93652152e+00 -6.19895949e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  8.86216430e+00 -6.19895949e-01]
Reward: 9  Episode Reward:  23
xxxxx
x  ax
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           8.95851897]
------
Step:8, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           8.95851897]
New Q values:  [-180.6        -221.70871729 -180.6           5.64205688]
Reward: -1  Episode Reward:  22
xxxxx
x a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  8.86216430e+00 -6.19895949e-01]
------
Step:9, Action:East
State  126
Old Q Values:  [0.         0.         0.09157335 0.        ]
New Q values:  [0.        0.        1.1292464 0.       ]
Reward: -1  Episode Reward:  21
xxxxx
x  ax
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           5.64205688]
------
Step:10, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           5.64205688]
New Q values:  [-180.6        -221.70871729 -180.6           4.31547204]
Reward: -1  Episode Reward:  20
xxxxx
x a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  8.86216430e+00 -6.19895949e-01]
------
Step:11, Action:East
State  123
Old Q Values:  [ -226.22224449 -6000.6            8.53433998     9.12474937]
New Q values:  [-2.26222244e+02 -6.00060000e+03  4.10837761e+00  9.12474937e+00]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           4.31547204]
------
Step:12, Action:West
State  136
Old Q Values:  [-6180.6           25.20794511     0.         -5980.6210298 ]
New Q values:  [-6180.6           25.20794511     0.         -2392.84841192]
Reward: -1  Episode Reward:  18
xxxxx
x agx
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9602.15563046     0.        ]
------
Step:13, Action:North
State  123
Old Q Values:  [-2.26222244e+02 -6.00060000e+03  4.10837761e+00  9.12474937e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  4.10837761e+00  9.12474937e+00]
Reward: -301  Episode Reward:  -283
xxxxx
x a x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  4.10837761e+00  9.12474937e+00]
------
Step:14, Action:West
State  121
Old Q Values:  [    0.             0.         -9602.15563046     0.        ]
New Q values:  [ 0.00000000e+00  0.00000000e+00 -9.60215563e+03 -4.96835966e-01]
Reward: -1  Episode Reward:  -284
xxxxx
xa gx
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02  3.43880114e-01 -2.40084000e+03 -1.80600000e+02]
------
Step:15, Action:South
State  109
Old Q Values:  [-1.80600000e+02  3.43880114e-01 -2.40084000e+03 -1.80600000e+02]
New Q values:  [-1.8060000e+02 -1.0113636e-01 -2.4008400e+03 -1.8060000e+02]
Reward: -1  Episode Reward:  -285
xxxxx
x g x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 1.20437198 -0.33760245  0.          0.        ]
------
Step:16, Action:North
State  189
Old Q Values:  [ 1.20437198 -0.33760245  0.          0.        ]
New Q values:  [-0.14859212 -0.33760245  0.          0.        ]
Reward: -1  Episode Reward:  -286
xxxxx
xa gx
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.8060000e+02 -1.0113636e-01 -2.4008400e+03 -1.8060000e+02]
------
Step:17, Action:South
State  109
Old Q Values:  [-1.8060000e+02 -1.0113636e-01 -2.4008400e+03 -1.8060000e+02]
New Q values:  [-1.80600000e+02 -6.40454544e-01 -2.40084000e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -287
xxxxx
x g x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[-0.14859212 -0.33760245  0.          0.        ]
------
Step:18, Action:East
State  189
Old Q Values:  [-0.14859212 -0.33760245  0.          0.        ]
New Q values:  [-1.48592116e-01 -3.37602450e-01 -5.98845944e+03  0.00000000e+00]
Reward: -10001  Episode Reward:  -10288
xxxxx
x   x
x g.x
x...x
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  8.86216430e+00 -6.19895949e-01]
------
Step:1, Action:East
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  6.69331011e+01  6.75529193e+00]
New Q values:  [-1.01561177e+04 -5.99568600e+03  3.97356240e+01  6.75529193e+00]
Reward: 9  Episode Reward:  9
xxxxx
xg ax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           25.20794511     0.         -2392.84841192]
------
Step:2, Action:South
State  136
Old Q Values:  [-6180.6           25.20794511     0.         -2392.84841192]
New Q values:  [-6180.6           15.86104788     0.         -2392.84841192]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-2.36017601e+03 -4.84985452e+01 -6.17035694e+03  1.25956611e+00]
------
Step:3, Action:West
State  216
Old Q Values:  [-2.36017601e+03 -4.84985452e+01 -6.17035694e+03  1.25956611e+00]
New Q values:  [-2360.17601238   -48.49854524 -6170.35693855    18.04438507]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[40.46852877  0.          3.79073389  0.        ]
------
Step:4, Action:North
State  200
Old Q Values:  [40.46852877  0.          3.79073389  0.        ]
New Q values:  [18.2460608   0.          3.79073389  0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x.a x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  8.86216430e+00 -6.19895949e-01]
------
Step:5, Action:East
State  114
Old Q Values:  [-180.6    0.     0.     0. ]
New Q values:  [-180.6           0.            0.69464161    0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x. ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           4.31547204]
------
Step:6, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           4.31547204]
New Q values:  [-180.6        -221.70871729 -180.6           3.78483811]
Reward: -1  Episode Reward:  24
xxxxx
x.a x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  8.86216430e+00 -6.19895949e-01]
------
Step:7, Action:East
State  114
Old Q Values:  [-180.6           0.            0.69464161    0.        ]
New Q values:  [-180.6           0.            0.81330808    0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x. ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           3.78483811]
------
Step:8, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           3.78483811]
New Q values:  [-180.6        -221.70871729 -180.6           1.15792767]
Reward: -1  Episode Reward:  22
xxxxx
x.a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.            0.81330808    0.        ]
------
Step:9, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  8.86216430e+00 -6.19895949e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.29224402e+00 -6.19895949e-01]
Reward: -1  Episode Reward:  21
xxxxx
x. ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           1.15792767]
------
Step:10, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           1.15792767]
New Q values:  [-180.6        -221.70871729 -180.6           2.60059588]
Reward: -1  Episode Reward:  20
xxxxx
x.a x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  4.10837761e+00  9.12474937e+00]
------
Step:11, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.29224402e+00 -6.19895949e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.29224402e+00  7.57024358e+00]
Reward: 9  Episode Reward:  29
xxxxx
xa  x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         8.0606732  0.05486176 0.        ]
------
Step:12, Action:South
State  107
Old Q Values:  [-252.35169558    3.18          1.99726873    0.        ]
New Q values:  [-252.35169558    0.672         1.99726873    0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xa  x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[0. 0. 0. 0.]
------
Step:13, Action:North
State  190
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [1.04129094 0.         0.         0.        ]
Reward: -1  Episode Reward:  27
xxxxx
xa  x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6       -1344.842304      5.4709698     0.       ]
------
Step:14, Action:East
State  110
Old Q Values:  [ -180.6       -1344.842304      5.4709698     0.       ]
New Q values:  [ -180.6      -1344.842304     3.859461     0.      ]
Reward: -1  Episode Reward:  26
xxxxx
x a x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.29224402e+00  7.57024358e+00]
------
Step:15, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.29224402e+00  7.57024358e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.29224402e+00  3.58593573e+00]
Reward: -1  Episode Reward:  25
xxxxx
xa  x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6      -1344.842304     3.859461     0.      ]
------
Step:16, Action:East
State  107
Old Q Values:  [-252.35169558    0.672         1.99726873    0.        ]
New Q values:  [-252.35169558    0.672         1.27468821    0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.29224402e+00  3.58593573e+00]
------
Step:17, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.29224402e+00  3.58593573e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.29224402e+00  1.99221259e+00]
Reward: -1  Episode Reward:  23
xxxxx
xa  x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6      -1344.842304     3.859461     0.      ]
------
Step:18, Action:East
State  110
Old Q Values:  [ -180.6      -1344.842304     3.859461     0.      ]
New Q values:  [ -180.6       -1344.842304      1.9314576     0.       ]
Reward: -1  Episode Reward:  22
xxxxx
x a x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.29224402e+00  1.99221259e+00]
------
Step:19, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.29224402e+00  1.99221259e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.49707637e+00  1.99221259e+00]
Reward: -1  Episode Reward:  21
xxxxx
x  ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           2.60059588]
------
Step:20, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           2.60059588]
New Q values:  [-180.6        -221.70871729 -180.6           1.03790213]
Reward: -1  Episode Reward:  20
xxxxx
x a x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.49707637e+00  1.99221259e+00]
------
Step:21, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  3.97356240e+01  6.75529193e+00]
New Q values:  [-10156.11771313  -5995.686          39.73562398  -5996.56761123]
Reward: -10001  Episode Reward:  -9981
xxxxx
xg  x
x   x
x...x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02 -6.40454544e-01 -2.40084000e+03 -1.80600000e+02]
------
Step:1, Action:South
State  111
Old Q Values:  [0.         8.0606732  0.05486176 0.        ]
New Q values:  [0.         8.57323233 0.05486176 0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[-1.70123182e-01 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
------
Step:2, Action:North
State  183
Old Q Values:  [ 1.69724865 -0.46342879 13.03650404  0.        ]
New Q values:  [ 2.65086916 -0.46342879 13.03650404  0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xa. x
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         8.57323233 0.05486176 0.        ]
------
Step:3, Action:South
State  111
Old Q Values:  [0.         8.57323233 0.05486176 0.        ]
New Q values:  [0.         6.74024414 0.05486176 0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x . x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2.65086916 -0.46342879 13.03650404  0.        ]
------
Step:4, Action:East
State  181
Old Q Values:  [-1.70123182e-01 -3.70021941e-01 -2.76482326e+03 -1.80600000e+02]
New Q values:  [-1.70123182e-01 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
Reward: -9991  Episode Reward:  -9984
xxxxx
x . x
x g.x
x...x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[    8.16157422 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:1, Action:North
State  288
Old Q Values:  [    8.16157422 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [   11.6292368  -6251.71315483 -6173.56321028 -5654.78955431]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    9.8820237   -910.01049469     0.         -5969.91524924]
------
Step:2, Action:North
State  208
Old Q Values:  [    9.8820237   -910.01049469     0.         -5969.91524924]
New Q values:  [   14.11112384  -910.01049469     0.         -5969.91524924]
Reward: 9  Episode Reward:  18
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           15.86104788     0.         -2392.84841192]
------
Step:3, Action:South
State  136
Old Q Values:  [-6180.6           15.86104788     0.         -2392.84841192]
New Q values:  [-6180.6            9.9777563      0.         -2392.84841192]
Reward: -1  Episode Reward:  17
xxxxx
xg. x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   14.11112384  -910.01049469     0.         -5969.91524924]
------
Step:4, Action:North
State  208
Old Q Values:  [   14.11112384  -910.01049469     0.         -5969.91524924]
New Q values:  [    8.49313627  -910.01049469     0.         -5969.91524924]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6          11.49562243    0.           -0.6       ]
------
Step:5, Action:South
State  130
Old Q Values:  [-180.6          11.49562243    0.           -0.6       ]
New Q values:  [-180.6           6.80410476    0.           -0.6       ]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    9.35285261 -1446.5611984      0.             0.        ]
------
Step:6, Action:North
State  210
Old Q Values:  [    9.35285261 -1446.5611984      0.             0.        ]
New Q values:  [    3.45251168 -1446.5611984      0.             0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           1.03790213]
------
Step:7, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           1.03790213]
New Q values:  [-180.6        -221.70871729 -180.6           8.55258566]
Reward: 9  Episode Reward:  23
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  4.10837761e+00  9.12474937e+00]
------
Step:8, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  4.10837761e+00  9.12474937e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  4.10837761e+00  9.43230621e+00]
Reward: 9  Episode Reward:  32
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    0.672         1.27468821    0.        ]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558    0.672         1.27468821    0.        ]
New Q values:  [-252.35169558    0.672         0.50753906    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.49707637e+00  1.99221259e+00]
------
Step:10, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  4.10837761e+00  9.43230621e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  4.10837761e+00  3.37452249e+00]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    0.672         0.50753906    0.        ]
------
Step:11, Action:South
State  107
Old Q Values:  [-252.35169558    0.672         0.50753906    0.        ]
New Q values:  [-252.35169558    5.6688        0.50753906    0.        ]
Reward: 9  Episode Reward:  39
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[    0.             0.         -5997.30277983  -178.98      ]
------
Step:12, Action:North
State  181
Old Q Values:  [-1.70123182e-01 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
New Q values:  [ 1.03259073e+00 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
x .gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    5.6688        0.50753906    0.        ]
------
Step:13, Action:South
State  105
Old Q Values:  [-180.6    0.     0.     0. ]
New Q values:  [-180.6          -0.29022278    0.            0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1.03259073e+00 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
------
Step:14, Action:North
State  181
Old Q Values:  [ 1.03259073e+00 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
New Q values:  [ 1.51367629e+00 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
x .gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    5.6688        0.50753906    0.        ]
------
Step:15, Action:South
State  107
Old Q Values:  [-252.35169558    5.6688        0.50753906    0.        ]
New Q values:  [-252.35169558    5.57847121    0.50753906    0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa. x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2.65086916 -0.46342879 13.03650404  0.        ]
------
Step:16, Action:East
State  187
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.  0.  5.4 0. ]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458     0.             0.        ]
------
Step:17, Action:North
State  203
Old Q Values:  [1.01205685 0.         0.         0.        ]
New Q values:  [1.03733602 0.         0.         0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  4.10837761e+00  3.37452249e+00]
------
Step:18, Action:East
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  4.10837761e+00  3.37452249e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  3.60912674e+00  3.37452249e+00]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           8.55258566]
------
Step:19, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           8.55258566]
New Q values:  [-180.6        -221.70871729 -180.6           3.41869804]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.49707637e+00  1.99221259e+00]
------
Step:20, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.49707637e+00  1.99221259e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.49707637e+00  1.87042640e+00]
Reward: -1  Episode Reward:  40
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    5.57847121    0.50753906    0.        ]
------
Step:21, Action:South
State  107
Old Q Values:  [-252.35169558    5.57847121    0.50753906    0.        ]
New Q values:  [-252.35169558    1.63138848    0.50753906    0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[    0.             0.         -5997.30277983  -178.98      ]
------
Step:22, Action:North
State  187
Old Q Values:  [0.  0.  5.4 0. ]
New Q values:  [-0.11058345  0.          5.4         0.        ]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    1.63138848    0.50753906    0.        ]
------
Step:23, Action:South
State  107
Old Q Values:  [-252.35169558    1.63138848    0.50753906    0.        ]
New Q values:  [-252.35169558    1.67255539    0.50753906    0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[-0.11058345  0.          5.4         0.        ]
------
Step:24, Action:East
State  187
Old Q Values:  [-0.11058345  0.          5.4         0.        ]
New Q values:  [-0.11058345  0.          1.56        0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458     0.             0.        ]
------
Step:25, Action:North
State  203
Old Q Values:  [1.03733602 0.         0.         0.        ]
New Q values:  [0.89767243 0.         0.         0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  3.60912674e+00  3.37452249e+00]
------
Step:26, Action:East
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  3.60912674e+00  3.37452249e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  3.37452249e+00]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           3.41869804]
------
Step:27, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           3.41869804]
New Q values:  [-180.6        -221.70871729 -180.6           1.32860714]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.49707637e+00  1.87042640e+00]
------
Step:28, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686          39.73562398  -5996.56761123]
New Q values:  [-10156.11771313  -5995.686          39.73562398  -2399.22704449]
Reward: -1  Episode Reward:  32
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6          -0.29022278    0.            0.        ]
------
Step:29, Action:East
State  104
Old Q Values:  [-8652.84     0.       0.   -6180.6 ]
New Q values:  [-8652.84     0.   -6000.6  -6180.6 ]
Reward: -10001  Episode Reward:  -9969
xxxxx
xg  x
x   x
x.  x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2397.576  -180.6   -6000.6    -180.6  ]
------
Step:1, Action:South
State  260
Old Q Values:  [-2397.576  -180.6   -6000.6    -180.6  ]
New Q values:  [-2397.576  -307.02  -6000.6    -180.6  ]
Reward: -301  Episode Reward:  -301
xxxxx
x ..x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2397.576  -307.02  -6000.6    -180.6  ]
------
Step:2, Action:West
State  260
Old Q Values:  [-2397.576  -307.02  -6000.6    -180.6  ]
New Q values:  [-2397.576  -307.02  -6000.6   -6307.02 ]
Reward: -10301  Episode Reward:  -10602
xxxxx
x ..x
x...x
xg..x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  3.37452249e+00]
------
Step:1, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  3.37452249e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  7.25157561e+00]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    1.67255539    0.50753906    0.        ]
------
Step:2, Action:South
State  111
Old Q Values:  [0.         6.74024414 0.05486176 0.        ]
New Q values:  [0.         8.55020054 0.05486176 0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1.51367629e+00 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
------
Step:3, Action:North
State  189
Old Q Values:  [-1.48592116e-01 -3.37602450e-01 -5.98845944e+03  0.00000000e+00]
New Q values:  [ 1.90562332e+00 -3.37602450e-01 -5.98845944e+03  0.00000000e+00]
Reward: -1  Episode Reward:  17
xxxxx
xa .x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         8.55020054 0.05486176 0.        ]
------
Step:4, Action:South
State  111
Old Q Values:  [0.         8.55020054 0.05486176 0.        ]
New Q values:  [0.         3.2741831  0.05486176 0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x  .x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1.51367629e+00 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
------
Step:5, Action:North
State  189
Old Q Values:  [ 1.90562332e+00 -3.37602450e-01 -5.98845944e+03  0.00000000e+00]
New Q values:  [-2.98870365e-02 -3.37602450e-01 -5.98845944e+03  0.00000000e+00]
Reward: -1  Episode Reward:  15
xxxxx
xa gx
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02 -6.40454544e-01 -2.40084000e+03 -1.80600000e+02]
------
Step:6, Action:South
State  111
Old Q Values:  [0.         3.2741831  0.05486176 0.        ]
New Q values:  [0.         1.16377613 0.05486176 0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x  .x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1.51367629e+00 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
------
Step:7, Action:North
State  183
Old Q Values:  [ 2.65086916 -0.46342879 13.03650404  0.        ]
New Q values:  [ 0.8094805  -0.46342879 13.03650404  0.        ]
Reward: -1  Episode Reward:  13
xxxxx
xa .x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         1.16377613 0.05486176 0.        ]
------
Step:8, Action:South
State  111
Old Q Values:  [0.         1.16377613 0.05486176 0.        ]
New Q values:  [0.         0.31961334 0.05486176 0.        ]
Reward: -1  Episode Reward:  12
xxxxx
x  .x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1.51367629e+00 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
------
Step:9, Action:North
State  183
Old Q Values:  [ 0.8094805  -0.46342879 13.03650404  0.        ]
New Q values:  [-0.1803238  -0.46342879 13.03650404  0.        ]
Reward: -1  Episode Reward:  11
xxxxx
xa .x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         0.31961334 0.05486176 0.        ]
------
Step:10, Action:South
State  111
Old Q Values:  [0.         0.31961334 0.05486176 0.        ]
New Q values:  [0.         3.43879655 0.05486176 0.        ]
Reward: -1  Episode Reward:  10
xxxxx
x  .x
xa. x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.1803238  -0.46342879 13.03650404  0.        ]
------
Step:11, Action:East
State  183
Old Q Values:  [-0.1803238  -0.46342879 13.03650404  0.        ]
New Q values:  [-0.1803238  -0.46342879 10.88390334  0.        ]
Reward: 9  Episode Reward:  19
xxxxx
x  .x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[0.89767243 0.         0.         0.        ]
------
Step:12, Action:North
State  201
Old Q Values:  [5.82965508 0.         0.         0.        ]
New Q values:  [3.90733471 0.         0.         0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x a.x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  7.25157561e+00]
------
Step:13, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  7.25157561e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  2.42269479e+00]
Reward: -1  Episode Reward:  17
xxxxx
xa .x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6           0.40688181    0.            0.        ]
------
Step:14, Action:South
State  103
Old Q Values:  [-180.6           0.40688181    0.            0.        ]
New Q values:  [-180.6           2.82792373    0.            0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x  .x
xa  x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.1803238  -0.46342879 10.88390334  0.        ]
------
Step:15, Action:East
State  190
Old Q Values:  [1.04129094 0.         0.         0.        ]
New Q values:  [ 1.04129094  0.         -0.6         0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x  .x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-0.6  0.   0.   0. ]
------
Step:16, Action:South
State  196
Old Q Values:  [-5994.76239261     7.56           0.             0.        ]
New Q values:  [-5994.76239261     8.424          0.             0.        ]
Reward: 9  Episode Reward:  24
xxxxx
x  .x
xg  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  -0.192 -180.6      0.     -48.78 ]
------
Step:17, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6          -1.73269813    8.424     ]
New Q values:  [-951.35054867 -180.6           8.19569179    8.424     ]
Reward: 9  Episode Reward:  33
xxxxx
xg .x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   11.6292368  -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:18, Action:North
State  288
Old Q Values:  [   11.6292368  -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [    6.5996356  -6251.71315483 -6173.56321028 -5654.78955431]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    8.49313627  -910.01049469     0.         -5969.91524924]
------
Step:19, Action:North
State  216
Old Q Values:  [-2360.17601238   -48.49854524 -6170.35693855    18.04438507]
New Q values:  [ -938.27182281   -48.49854524 -6170.35693855    18.04438507]
Reward: 9  Episode Reward:  41
xxxxx
x  ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           1.32860714]
------
Step:20, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           1.32860714]
New Q values:  [-180.6        -221.70871729 -180.6           0.65825129]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  2.42269479e+00]
------
Step:21, Action:West
State  121
Old Q Values:  [ 0.00000000e+00  0.00000000e+00 -9.60215563e+03 -4.96835966e-01]
New Q values:  [ 0.00000000e+00  0.00000000e+00 -9.60215563e+03 -7.98734386e-01]
Reward: -1  Episode Reward:  39
xxxxx
xa gx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6          -0.29022278    0.            0.        ]
------
Step:22, Action:East
State  105
Old Q Values:  [-180.6          -0.29022278    0.            0.        ]
New Q values:  [-1.80600000e+02 -2.90222782e-01 -5.98867931e+03  0.00000000e+00]
Reward: -10001  Episode Reward:  -9962
xxxxx
x g x
x   x
x.  x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           0.65825129]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           0.65825129]
New Q values:  [-180.6        -221.70871729 -180.6           6.39010895]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  2.42269479e+00]
------
Step:2, Action:West
State  121
Old Q Values:  [ 0.00000000e+00  0.00000000e+00 -9.60215563e+03 -7.98734386e-01]
New Q values:  [ 0.00000000e+00  0.00000000e+00 -9.60215563e+03  4.88836988e+00]
Reward: 9  Episode Reward:  18
xxxxx
xa gx
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02 -6.40454544e-01 -2.40084000e+03 -1.80600000e+02]
------
Step:3, Action:South
State  111
Old Q Values:  [0.         3.43879655 0.05486176 0.        ]
New Q values:  [0.         7.22962151 0.05486176 0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1.51367629e+00 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
------
Step:4, Action:North
State  181
Old Q Values:  [ 1.51367629e+00 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
New Q values:  [-1.86665847e-01 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  26
xxxxx
xa gx
x ..x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02 -6.40454544e-01 -2.40084000e+03 -1.80600000e+02]
------
Step:5, Action:South
State  109
Old Q Values:  [-1.80600000e+02 -6.40454544e-01 -2.40084000e+03 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -9.12181572e-01 -2.40084000e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  25
xxxxx
x g x
xa..x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[-1.86665847e-01 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
------
Step:6, Action:North
State  181
Old Q Values:  [-1.86665847e-01 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
New Q values:  [-9.48320810e-01 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  24
xxxxx
xa gx
x ..x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02 -9.12181572e-01 -2.40084000e+03 -1.80600000e+02]
------
Step:7, Action:South
State  111
Old Q Values:  [0.         7.22962151 0.05486176 0.        ]
New Q values:  [0.         2.18084202 0.05486176 0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[-9.48320810e-01 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
------
Step:8, Action:South
State  183
Old Q Values:  [-0.1803238  -0.46342879 10.88390334  0.        ]
New Q values:  [-0.1803238   8.25303748 10.88390334  0.        ]
Reward: 9  Episode Reward:  32
xxxxx
x   x
x ..x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   10.12802998  -289.59534477 -5467.19309188  -180.6       ]
------
Step:9, Action:North
State  261
Old Q Values:  [   10.12802998  -289.59534477 -5467.19309188  -180.6       ]
New Q values:  [    6.716383    -289.59534477 -5467.19309188  -180.6       ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.1803238   8.25303748 10.88390334  0.        ]
------
Step:10, Action:East
State  182
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.  0.  5.4 0. ]
Reward: 9  Episode Reward:  40
xxxxx
x   x
x a.x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-0.6  0.   0.   0. ]
------
Step:11, Action:South
State  198
Old Q Values:  [-0.6  0.   0.   0. ]
New Q values:  [-6.0000e-01 -5.9946e+03  0.0000e+00  0.0000e+00]
Reward: -9991  Episode Reward:  -9951
xxxxx
x   x
x  .x
x g x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   12.90739622     0.         -7476.85600442     0.        ]
------
Step:1, Action:North
State  192
Old Q Values:  [-5.36728685e+03  6.28750906e+01  5.06904510e+00  0.00000000e+00]
New Q values:  [-8.12959405e+03  6.28750906e+01  5.06904510e+00  0.00000000e+00]
Reward: -9991  Episode Reward:  -9991
xxxxx
x.g x
x. .x
x...x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -221.70871729 -180.6           6.39010895]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           6.39010895]
New Q values:  [-180.6        -221.70871729 -180.6           8.68285202]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  2.42269479e+00]
------
Step:2, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  2.42269479e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  7.02333052e+00]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         2.18084202 0.05486176 0.        ]
------
Step:3, Action:South
State  111
Old Q Values:  [0.         2.18084202 0.05486176 0.        ]
New Q values:  [0.         9.53750781 0.05486176 0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.1803238   8.25303748 10.88390334  0.        ]
------
Step:4, Action:East
State  183
Old Q Values:  [-0.1803238   8.25303748 10.88390334  0.        ]
New Q values:  [-0.1803238   8.25303748 10.02286307  0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[0.89767243 0.         0.         0.        ]
------
Step:5, Action:North
State  198
Old Q Values:  [-6.0000e-01 -5.9946e+03  0.0000e+00  0.0000e+00]
New Q values:  [-2.7887208e-01 -5.9946000e+03  0.0000000e+00  0.0000000e+00]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x  .x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.49707637e+00  1.87042640e+00]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.49707637e+00  1.87042640e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.49707637e+00  7.27607841e-01]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x  .x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6       -1344.842304      1.9314576     0.       ]
------
Step:7, Action:East
State  110
Old Q Values:  [ -180.6       -1344.842304      1.9314576     0.       ]
New Q values:  [-1.80600000e+02 -1.34484230e+03  6.21705953e-01  0.00000000e+00]
Reward: -1  Episode Reward:  33
xxxxx
x a x
xg .x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.49707637e+00  7.27607841e-01]
------
Step:8, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686          39.73562398  -2399.22704449]
New Q values:  [-10156.11771313  -5995.686          18.28757648  -2399.22704449]
Reward: -1  Episode Reward:  32
xxxxx
xg ax
x  .x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6            9.9777563      0.         -2392.84841192]
------
Step:9, Action:South
State  138
Old Q Values:  [-180.6        -221.70871729 -180.6           8.68285202]
New Q values:  [-180.6         -77.8701714  -180.6           8.68285202]
Reward: 9  Episode Reward:  41
xxxxx
x   x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ -938.27182281   -48.49854524 -6170.35693855    18.04438507]
------
Step:10, Action:West
State  216
Old Q Values:  [ -938.27182281   -48.49854524 -6170.35693855    18.04438507]
New Q values:  [ -938.27182281   -48.49854524 -6170.35693855    12.09157227]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[18.2460608   0.          3.79073389  0.        ]
------
Step:11, Action:North
State  200
Old Q Values:  [18.2460608   0.          3.79073389  0.        ]
New Q values:  [7.14754723 0.         3.79073389 0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.49707637e+00  7.27607841e-01]
------
Step:12, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.49707637e+00  7.27607841e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  2.60368615e+00  7.27607841e-01]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -77.8701714  -180.6           8.68285202]
------
Step:13, Action:West
State  138
Old Q Values:  [-180.6         -77.8701714  -180.6           8.68285202]
New Q values:  [-180.6         -77.8701714  -180.6           3.65424665]
Reward: -1  Episode Reward:  37
xxxxx
x a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  2.60368615e+00  7.27607841e-01]
------
Step:14, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  2.60368615e+00  7.27607841e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.53774846e+00  7.27607841e-01]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -77.8701714  -180.6           3.65424665]
------
Step:15, Action:West
State  138
Old Q Values:  [-180.6         -77.8701714  -180.6           3.65424665]
New Q values:  [-180.6        -77.8701714 -180.6          1.3230232]
Reward: -1  Episode Reward:  35
xxxxx
x a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.53774846e+00  7.27607841e-01]
------
Step:16, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.53774846e+00  7.27607841e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.12006343e-01  7.27607841e-01]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -77.8701714 -180.6          1.3230232]
------
Step:17, Action:West
State  138
Old Q Values:  [-180.6        -77.8701714 -180.6          1.3230232]
New Q values:  [-1.80600000e+02 -7.78701714e+01 -1.80600000e+02  1.47491632e-01]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  4.12006343e-01  7.27607841e-01]
------
Step:18, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  7.02333052e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  2.71109883e+00]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    1.67255539    0.50753906    0.        ]
------
Step:19, Action:South
State  107
Old Q Values:  [-252.35169558    1.67255539    0.50753906    0.        ]
New Q values:  [-252.35169558    0.53702216    0.50753906    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xa  x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[-0.11058345  0.          1.56        0.        ]
------
Step:20, Action:East
State  190
Old Q Values:  [ 1.04129094  0.         -0.6         0.        ]
New Q values:  [ 1.04129094  0.         -0.84        0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x a x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458     0.             0.        ]
------
Step:21, Action:North
State  200
Old Q Values:  [7.14754723 0.         3.79073389 0.        ]
New Q values:  [2.47730124 0.         3.79073389 0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  4.12006343e-01  7.27607841e-01]
------
Step:22, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.12006343e-01  7.27607841e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.12006343e-01 -1.22445078e-01]
Reward: -1  Episode Reward:  28
xxxxx
xa  x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -1.34484230e+03  6.21705953e-01  0.00000000e+00]
------
Step:23, Action:East
State  107
Old Q Values:  [-252.35169558    0.53702216    0.50753906    0.        ]
New Q values:  [-252.35169558    0.53702216   -0.27338247    0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  4.12006343e-01 -1.22445078e-01]
------
Step:24, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.12006343e-01 -1.22445078e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -3.90949973e-01 -1.22445078e-01]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -7.78701714e+01 -1.80600000e+02  1.47491632e-01]
------
Step:25, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -7.78701714e+01 -1.80600000e+02  1.47491632e-01]
New Q values:  [-180.6        -77.8701714 -180.6          0.2723263]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  2.71109883e+00]
------
Step:26, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  2.71109883e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  6.45546178e-01]
Reward: -1  Episode Reward:  24
xxxxx
xa  x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    0.53702216   -0.27338247    0.        ]
------
Step:27, Action:South
State  107
Old Q Values:  [-252.35169558    0.53702216   -0.27338247    0.        ]
New Q values:  [-2.52351696e+02  8.28088630e-02 -2.73382472e-01  0.00000000e+00]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xa  x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[-0.11058345  0.          1.56        0.        ]
------
Step:28, Action:East
State  187
Old Q Values:  [-0.11058345  0.          1.56        0.        ]
New Q values:  [-0.11058345  0.          0.29330173  0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[0.89767243 0.         0.         0.        ]
------
Step:29, Action:North
State  201
Old Q Values:  [3.90733471 0.         0.         0.        ]
New Q values:  [1.52371192 0.         0.         0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x a x
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  6.45546178e-01]
------
Step:30, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -3.90949973e-01 -1.22445078e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01 -1.22445078e-01]
Reward: -1  Episode Reward:  20
xxxxx
x  ax
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -77.8701714 -180.6          0.2723263]
------
Step:31, Action:West
State  136
Old Q Values:  [-6180.6            9.9777563      0.         -2392.84841192]
New Q values:  [-6180.6            9.9777563      0.         -6952.25309182]
Reward: -10001  Episode Reward:  -9981
xxxxx
x g x
x   x
x.. x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -77.8701714 -180.6          0.2723263]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6        -77.8701714 -180.6          0.2723263]
New Q values:  [-180.6        -77.8701714 -180.6          5.472197 ]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -6.74682099e-01 -1.22445078e-01]
------
Step:2, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  6.45546178e-01]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  8.51947081e+00]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         9.53750781 0.05486176 0.        ]
------
Step:3, Action:South
State  109
Old Q Values:  [-1.80600000e+02 -9.12181572e-01 -2.40084000e+03 -1.80600000e+02]
New Q values:  [ -180.6            5.03512737 -2400.84        -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x  gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[-2.98870365e-02 -3.37602450e-01 -5.98845944e+03  0.00000000e+00]
------
Step:4, Action:West
State  189
Old Q Values:  [-2.98870365e-02 -3.37602450e-01 -5.98845944e+03  0.00000000e+00]
New Q values:  [-2.98870365e-02 -3.37602450e-01 -5.98845944e+03 -1.80600000e+02]
Reward: -301  Episode Reward:  -274
xxxxx
x g x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[-2.98870365e-02 -3.37602450e-01 -5.98845944e+03 -1.80600000e+02]
------
Step:5, Action:North
State  188
Old Q Values:  [-5999.269728     0.           0.           0.      ]
New Q values:  [-8398.9776192     0.            0.            0.       ]
Reward: -10001  Episode Reward:  -10275
xxxxx
xg  x
x . x
x...x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6           8.19569179    8.424     ]
------
Step:1, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6           8.19569179    8.424     ]
New Q values:  [-951.35054867 -180.6          10.6581674     8.424     ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[    6.5996356  -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:2, Action:North
State  288
Old Q Values:  [    6.5996356  -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [   10.58779512 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    8.49313627  -910.01049469     0.         -5969.91524924]
------
Step:3, Action:North
State  208
Old Q Values:  [    8.49313627  -910.01049469     0.         -5969.91524924]
New Q values:  [   11.7905814   -910.01049469     0.         -5969.91524924]
Reward: 9  Episode Reward:  27
xxxxx
x.gax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6            9.9777563      0.         -6952.25309182]
------
Step:4, Action:South
State  136
Old Q Values:  [-6180.6            9.9777563      0.         -6952.25309182]
New Q values:  [-6.18060000e+03  6.92827694e+00  0.00000000e+00 -6.95225309e+03]
Reward: -1  Episode Reward:  26
xxxxx
xg. x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   11.7905814   -910.01049469     0.         -5969.91524924]
------
Step:5, Action:North
State  208
Old Q Values:  [   11.7905814   -910.01049469     0.         -5969.91524924]
New Q values:  [    6.15746399  -910.01049469     0.         -5969.91524924]
Reward: -1  Episode Reward:  25
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           6.80410476    0.           -0.6       ]
------
Step:6, Action:South
State  130
Old Q Values:  [-180.6           6.80410476    0.           -0.6       ]
New Q values:  [-180.6           3.15739541    0.           -0.6       ]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    3.45251168 -1446.5611984      0.             0.        ]
------
Step:7, Action:North
State  210
Old Q Values:  [    3.45251168 -1446.5611984      0.             0.        ]
New Q values:  [    1.72822329 -1446.5611984      0.             0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x..ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           3.15739541    0.           -0.6       ]
------
Step:8, Action:South
State  130
Old Q Values:  [-180.6           3.15739541    0.           -0.6       ]
New Q values:  [-180.6           2.51019736    0.           -0.6       ]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    6.15746399  -910.01049469     0.         -5969.91524924]
------
Step:9, Action:North
State  208
Old Q Values:  [    6.15746399  -910.01049469     0.         -5969.91524924]
New Q values:  [ 2.61604480e+00 -9.10010495e+02  0.00000000e+00 -5.96991525e+03]
Reward: -1  Episode Reward:  21
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           2.51019736    0.           -0.6       ]
------
Step:10, Action:South
State  130
Old Q Values:  [-180.6           2.51019736    0.           -0.6       ]
New Q values:  [-180.6           1.18889238    0.           -0.6       ]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2.61604480e+00 -9.10010495e+02  0.00000000e+00 -5.96991525e+03]
------
Step:11, Action:North
State  210
Old Q Values:  [    1.72822329 -1446.5611984      0.             0.        ]
New Q values:  [ 4.47957033e-01 -1.44656120e+03  0.00000000e+00  0.00000000e+00]
Reward: -1  Episode Reward:  19
xxxxx
x..ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           1.18889238    0.           -0.6       ]
------
Step:12, Action:South
State  130
Old Q Values:  [-180.6           1.18889238    0.           -0.6       ]
New Q values:  [-1.80600000e+02  9.94406343e-03  0.00000000e+00 -6.00000000e-01]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.47957033e-01 -1.44656120e+03  0.00000000e+00  0.00000000e+00]
------
Step:13, Action:North
State  210
Old Q Values:  [ 4.47957033e-01 -1.44656120e+03  0.00000000e+00  0.00000000e+00]
New Q values:  [-4.17833968e-01 -1.44656120e+03  0.00000000e+00  0.00000000e+00]
Reward: -1  Episode Reward:  17
xxxxx
x..ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-1.80600000e+02  9.94406343e-03  0.00000000e+00 -6.00000000e-01]
------
Step:14, Action:South
State  130
Old Q Values:  [-1.80600000e+02  9.94406343e-03  0.00000000e+00 -6.00000000e-01]
New Q values:  [-180.6          -0.59602237    0.           -0.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[-4.17833968e-01 -1.44656120e+03  0.00000000e+00  0.00000000e+00]
------
Step:15, Action:East
State  208
Old Q Values:  [ 2.61604480e+00 -9.10010495e+02  0.00000000e+00 -5.96991525e+03]
New Q values:  [ 2.61604480e+00 -9.10010495e+02 -6.17981519e+03 -5.96991525e+03]
Reward: -10301  Episode Reward:  -10285
xxxxx
x.. x
x..gx
x   x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   10.58779512 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:1, Action:North
State  288
Old Q Values:  [   10.58779512 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [   10.41993149 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2.61604480e+00 -9.10010495e+02 -6.17981519e+03 -5.96991525e+03]
------
Step:2, Action:North
State  208
Old Q Values:  [ 2.61604480e+00 -9.10010495e+02 -6.17981519e+03 -5.96991525e+03]
New Q values:  [    8.524901    -910.01049469 -6179.81518656 -5969.91524924]
Reward: 9  Episode Reward:  18
xxxxx
x.gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  6.92827694e+00  0.00000000e+00 -6.95225309e+03]
------
Step:3, Action:South
State  136
Old Q Values:  [-6.18060000e+03  6.92827694e+00  0.00000000e+00 -6.95225309e+03]
New Q values:  [-6.18060000e+03  4.72878108e+00  0.00000000e+00 -6.95225309e+03]
Reward: -1  Episode Reward:  17
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    8.524901    -910.01049469 -6179.81518656 -5969.91524924]
------
Step:4, Action:North
State  208
Old Q Values:  [    8.524901    -910.01049469 -6179.81518656 -5969.91524924]
New Q values:  [ 4.22859472e+00 -9.10010495e+02 -6.17981519e+03 -5.96991525e+03]
Reward: -1  Episode Reward:  16
xxxxx
x.gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  4.72878108e+00  0.00000000e+00 -6.95225309e+03]
------
Step:5, Action:South
State  136
Old Q Values:  [-6.18060000e+03  4.72878108e+00  0.00000000e+00 -6.95225309e+03]
New Q values:  [-6.18060000e+03  2.56009085e+00  0.00000000e+00 -6.95225309e+03]
Reward: -1  Episode Reward:  15
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4.22859472e+00 -9.10010495e+02 -6.17981519e+03 -5.96991525e+03]
------
Step:6, Action:South
State  208
Old Q Values:  [ 4.22859472e+00 -9.10010495e+02 -6.17981519e+03 -5.96991525e+03]
New Q values:  [ 4.22859472e+00 -3.61478218e+02 -6.17981519e+03 -5.96991525e+03]
Reward: -1  Episode Reward:  14
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   10.41993149 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:7, Action:North
State  288
Old Q Values:  [   10.41993149 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [ 4.83655101e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4.22859472e+00 -3.61478218e+02 -6.17981519e+03 -5.96991525e+03]
------
Step:8, Action:North
State  208
Old Q Values:  [ 4.22859472e+00 -3.61478218e+02 -6.17981519e+03 -5.96991525e+03]
New Q values:  [ 2.73309699e+00 -3.61478218e+02 -6.17981519e+03 -5.96991525e+03]
Reward: -1  Episode Reward:  12
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -77.8701714 -180.6          5.472197 ]
------
Step:9, Action:West
State  138
Old Q Values:  [-180.6        -77.8701714 -180.6          5.472197 ]
New Q values:  [-180.6         -77.8701714  -180.6           1.55214528]
Reward: -1  Episode Reward:  11
xxxxx
x.a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -6.74682099e-01 -1.22445078e-01]
------
Step:10, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01 -1.22445078e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  5.53753375e+00]
Reward: 9  Episode Reward:  20
xxxxx
xa  x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -1.34484230e+03  6.21705953e-01  0.00000000e+00]
------
Step:11, Action:East
State  107
Old Q Values:  [-2.52351696e+02  8.28088630e-02 -2.73382472e-01  0.00000000e+00]
New Q values:  [-2.52351696e+02  8.28088630e-02  9.51907138e-01  0.00000000e+00]
Reward: -1  Episode Reward:  19
xxxxx
x a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  5.53753375e+00]
------
Step:12, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  8.51947081e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  3.09336047e+00]
Reward: -1  Episode Reward:  18
xxxxx
xa  x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02  9.51907138e-01  0.00000000e+00]
------
Step:13, Action:East
State  107
Old Q Values:  [-2.52351696e+02  8.28088630e-02  9.51907138e-01  0.00000000e+00]
New Q values:  [-2.52351696e+02  8.28088630e-02  7.08770995e-01  0.00000000e+00]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  3.09336047e+00]
------
Step:14, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  5.53753375e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  1.82764480e+00]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02  7.08770995e-01  0.00000000e+00]
------
Step:15, Action:East
State  105
Old Q Values:  [-1.80600000e+02 -2.90222782e-01 -5.98867931e+03  0.00000000e+00]
New Q values:  [-1.80600000e+02 -2.90222782e-01 -8.39058545e+03  0.00000000e+00]
Reward: -10001  Episode Reward:  -9985
xxxxx
x g x
x.. x
x.. x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[-8.12959405e+03  6.28750906e+01  5.06904510e+00  0.00000000e+00]
------
Step:1, Action:South
State  192
Old Q Values:  [-8.12959405e+03  6.28750906e+01  5.06904510e+00  0.00000000e+00]
New Q values:  [-8.12959405e+03  3.37474865e+01  5.06904510e+00  0.00000000e+00]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6          10.6581674     8.424     ]
------
Step:2, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6          10.6581674     8.424     ]
New Q values:  [-951.35054867 -180.6          11.11423226    8.424     ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4.83655101e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
------
Step:3, Action:North
State  288
Old Q Values:  [ 4.83655101e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
New Q values:  [    7.33462041 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[-4.17833968e-01 -1.44656120e+03  0.00000000e+00  0.00000000e+00]
------
Step:4, Action:East
State  208
Old Q Values:  [ 2.73309699e+00 -3.61478218e+02 -6.17981519e+03 -5.96991525e+03]
New Q values:  [ 2.73309699e+00 -3.61478218e+02 -2.65170615e+03 -5.96991525e+03]
Reward: -301  Episode Reward:  -274
xxxxx
x...x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2.73309699e+00 -3.61478218e+02 -2.65170615e+03 -5.96991525e+03]
------
Step:5, Action:North
State  208
Old Q Values:  [ 2.73309699e+00 -3.61478218e+02 -2.65170615e+03 -5.96991525e+03]
New Q values:  [    6.4932388   -361.47821843 -2651.70614553 -5969.91524924]
Reward: 9  Episode Reward:  -265
xxxxx
xg.ax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[    0.       0.   -8652.84     0.  ]
------
Step:6, Action:North
State  128
Old Q Values:  [    0.       0.   -8652.84     0.  ]
New Q values:  [ -180.6      0.   -8652.84     0.  ]
Reward: -301  Episode Reward:  -566
xxxxx
x.gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ -180.6      0.   -8652.84     0.  ]
------
Step:7, Action:South
State  130
Old Q Values:  [-180.6          -0.59602237    0.           -0.6       ]
New Q values:  [-180.6           1.10956269    0.           -0.6       ]
Reward: -1  Episode Reward:  -567
xxxxx
x.. x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    6.4932388   -361.47821843 -2651.70614553 -5969.91524924]
------
Step:8, Action:North
State  210
Old Q Values:  [-4.17833968e-01 -1.44656120e+03  0.00000000e+00  0.00000000e+00]
New Q values:  [-4.3426478e-01 -1.4465612e+03  0.0000000e+00  0.0000000e+00]
Reward: -1  Episode Reward:  -568
xxxxx
x..ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           1.10956269    0.           -0.6       ]
------
Step:9, Action:South
State  130
Old Q Values:  [-180.6           1.10956269    0.           -0.6       ]
New Q values:  [-1.80600000e+02 -1.56174924e-01  0.00000000e+00 -6.00000000e-01]
Reward: -1  Episode Reward:  -569
xxxxx
x.. x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[-4.3426478e-01 -1.4465612e+03  0.0000000e+00  0.0000000e+00]
------
Step:10, Action:East
State  210
Old Q Values:  [-4.3426478e-01 -1.4465612e+03  0.0000000e+00  0.0000000e+00]
New Q values:  [-4.3426478e-01 -1.4465612e+03 -1.8060000e+02  0.0000000e+00]
Reward: -301  Episode Reward:  -870
xxxxx
x.. x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[-4.3426478e-01 -1.4465612e+03 -1.8060000e+02  0.0000000e+00]
------
Step:11, Action:West
State  210
Old Q Values:  [-4.3426478e-01 -1.4465612e+03 -1.8060000e+02  0.0000000e+00]
New Q values:  [-4.34264780e-01 -1.44656120e+03 -1.80600000e+02  8.53300729e+00]
Reward: -1  Episode Reward:  -871
xxxxx
x.. x
x a x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[30.44335764  0.         10.9944      0.        ]
------
Step:12, Action:North
State  193
Old Q Values:  [   12.90739622     0.         -7476.85600442     0.        ]
New Q values:  [   10.56295849     0.         -7476.85600442     0.        ]
Reward: 9  Episode Reward:  -862
xxxxx
x.a x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6    0.     0.     0. ]
------
Step:13, Action:South
State  113
Old Q Values:  [  0.        655.2566939   0.          0.       ]
New Q values:  [  0.        264.6715651   0.          0.       ]
Reward: -1  Episode Reward:  -863
xxxxx
x. gx
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   10.56295849     0.         -7476.85600442     0.        ]
------
Step:14, Action:North
State  192
Old Q Values:  [-8.12959405e+03  3.37474865e+01  5.06904510e+00  0.00000000e+00]
New Q values:  [-9.25243762e+03  3.37474865e+01  5.06904510e+00  0.00000000e+00]
Reward: -10001  Episode Reward:  -10864
xxxxx
x.g x
x   x
x.  x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  3.09336047e+00]
------
Step:1, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  1.82764480e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  6.34368922e+00]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02  7.08770995e-01  0.00000000e+00]
------
Step:2, Action:East
State  110
Old Q Values:  [-1.80600000e+02 -1.34484230e+03  6.21705953e-01  0.00000000e+00]
New Q values:  [ -180.6        -1344.842304       1.55178915     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  6.34368922e+00]
------
Step:3, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  6.34368922e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  2.40301243e+00]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1344.842304       1.55178915     0.        ]
------
Step:4, Action:East
State  107
Old Q Values:  [-2.52351696e+02  8.28088630e-02  7.08770995e-01  0.00000000e+00]
New Q values:  [-2.52351696e+02  8.28088630e-02  4.04412128e-01  0.00000000e+00]
Reward: -1  Episode Reward:  6
xxxxx
x a.x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  2.40301243e+00]
------
Step:5, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  2.40301243e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  4.82528611e-01]
Reward: -1  Episode Reward:  5
xxxxx
xa .x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02  4.04412128e-01  0.00000000e+00]
------
Step:6, Action:East
State  109
Old Q Values:  [ -180.6            5.03512737 -2400.84        -180.6       ]
New Q values:  [-1.80600000e+02  5.03512737e+00 -6.95544973e+03 -1.80600000e+02]
Reward: -10001  Episode Reward:  -9996
xxxxx
x g.x
x.. x
x...x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[    7.33462041 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:1, Action:North
State  288
Old Q Values:  [    7.33462041 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [   10.89375035 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[-4.34264780e-01 -1.44656120e+03 -1.80600000e+02  8.53300729e+00]
------
Step:2, Action:West
State  210
Old Q Values:  [-4.34264780e-01 -1.44656120e+03 -1.80600000e+02  8.53300729e+00]
New Q values:  [-4.34264780e-01 -1.44656120e+03 -1.80600000e+02  4.63650404e+00]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.a x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6         2.56078987  6.0776704   0.        ]
------
Step:3, Action:East
State  192
Old Q Values:  [-9.25243762e+03  3.37474865e+01  5.06904510e+00  0.00000000e+00]
New Q values:  [-9.25243762e+03  3.37474865e+01  3.37558968e+00  0.00000000e+00]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    6.4932388   -361.47821843 -2651.70614553 -5969.91524924]
------
Step:4, Action:North
State  208
Old Q Values:  [    6.4932388   -361.47821843 -2651.70614553 -5969.91524924]
New Q values:  [    8.4629391   -361.47821843 -2651.70614553 -5969.91524924]
Reward: 9  Episode Reward:  16
xxxxx
x..ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -77.8701714  -180.6           1.55214528]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6         -77.8701714  -180.6           1.55214528]
New Q values:  [-180.6         -77.8701714  -180.6           6.16561669]
Reward: 9  Episode Reward:  25
xxxxx
x.a x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  4.82528611e-01]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  4.82528611e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  5.59301144e+00]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.8060000e+02 -6.0006000e+03 -3.2377956e-01  0.0000000e+00]
------
Step:7, Action:West
State  106
Old Q Values:  [-1.8060000e+02 -6.0006000e+03 -3.2377956e-01  0.0000000e+00]
New Q values:  [-1.8060000e+02 -6.0006000e+03 -3.2377956e-01 -1.8060000e+02]
Reward: -301  Episode Reward:  -267
xxxxx
xa  x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.8060000e+02 -6.0006000e+03 -3.2377956e-01 -1.8060000e+02]
------
Step:8, Action:East
State  106
Old Q Values:  [-1.8060000e+02 -6.0006000e+03 -3.2377956e-01 -1.8060000e+02]
New Q values:  [-1.80600000e+02 -6.00060000e+03  9.48391609e-01 -1.80600000e+02]
Reward: -1  Episode Reward:  -268
xxxxx
x a x
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  5.59301144e+00]
------
Step:9, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  5.59301144e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  1.92172206e+00]
Reward: -1  Episode Reward:  -269
xxxxx
xa  x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -6.00060000e+03  9.48391609e-01 -1.80600000e+02]
------
Step:10, Action:East
State  107
Old Q Values:  [-2.52351696e+02  8.28088630e-02  4.04412128e-01  0.00000000e+00]
New Q values:  [-2.52351696e+02  8.28088630e-02  1.38281469e-01  0.00000000e+00]
Reward: -1  Episode Reward:  -270
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  1.92172206e+00]
------
Step:11, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  1.92172206e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  4.53206307e-01]
Reward: -1  Episode Reward:  -271
xxxxx
xa  x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -6.00060000e+03  9.48391609e-01 -1.80600000e+02]
------
Step:12, Action:East
State  104
Old Q Values:  [-8652.84     0.   -6000.6  -6180.6 ]
New Q values:  [-8652.84           0.         -2395.35372705 -6180.6       ]
Reward: -1  Episode Reward:  -272
xxxxx
xga x
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686          18.28757648  -2399.22704449]
------
Step:13, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686          18.28757648  -2399.22704449]
New Q values:  [-1.01561177e+04 -5.99568600e+03  7.48305785e+00 -2.39922704e+03]
Reward: -1  Episode Reward:  -273
xxxxx
x gax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  2.56009085e+00  0.00000000e+00 -6.95225309e+03]
------
Step:14, Action:South
State  136
Old Q Values:  [-6.18060000e+03  2.56009085e+00  0.00000000e+00 -6.95225309e+03]
New Q values:  [-6.18060000e+03  4.05150802e+00  0.00000000e+00 -6.95225309e+03]
Reward: -1  Episode Reward:  -274
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ -938.27182281   -48.49854524 -6170.35693855    12.09157227]
------
Step:15, Action:West
State  216
Old Q Values:  [ -938.27182281   -48.49854524 -6170.35693855    12.09157227]
New Q values:  [-9.38271823e+02 -4.84985452e+01 -6.17035694e+03  5.37384907e+00]
Reward: -1  Episode Reward:  -275
xxxxx
x   x
xga x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[2.47730124 0.         3.79073389 0.        ]
------
Step:16, Action:East
State  200
Old Q Values:  [2.47730124 0.         3.79073389 0.        ]
New Q values:  [2.47730124 0.         2.52844828 0.        ]
Reward: -1  Episode Reward:  -276
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02 -4.84985452e+01 -6.17035694e+03  5.37384907e+00]
------
Step:17, Action:West
State  216
Old Q Values:  [-9.38271823e+02 -4.84985452e+01 -6.17035694e+03  5.37384907e+00]
New Q values:  [-9.38271823e+02 -4.84985452e+01 -6.17035694e+03  2.30807411e+00]
Reward: -1  Episode Reward:  -277
xxxxx
x g x
x.a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[2.47730124 0.         2.52844828 0.        ]
------
Step:18, Action:East
State  200
Old Q Values:  [2.47730124 0.         2.52844828 0.        ]
New Q values:  [2.47730124 0.         1.10380155 0.        ]
Reward: -1  Episode Reward:  -278
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02 -4.84985452e+01 -6.17035694e+03  2.30807411e+00]
------
Step:19, Action:South
State  216
Old Q Values:  [-9.38271823e+02 -4.84985452e+01 -6.17035694e+03  2.30807411e+00]
New Q values:  [-9.38271823e+02 -1.67312930e+01 -6.17035694e+03  2.30807411e+00]
Reward: -1  Episode Reward:  -279
xxxxx
x g x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   10.89375035 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:20, Action:North
State  288
Old Q Values:  [   10.89375035 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [ 4.44992237e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
Reward: -1  Episode Reward:  -280
xxxxx
x  gx
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02 -1.67312930e+01 -6.17035694e+03  2.30807411e+00]
------
Step:21, Action:West
State  216
Old Q Values:  [-9.38271823e+02 -1.67312930e+01 -6.17035694e+03  2.30807411e+00]
New Q values:  [-9.38271823e+02 -1.67312930e+01 -6.17035694e+03  1.06642002e+00]
Reward: -1  Episode Reward:  -281
xxxxx
x g x
x.a x
x.. x
xxxxx
Step:22, Action:West
State  201
Old Q Values:  [1.52371192 0.         0.         0.        ]
New Q values:  [1.52371192 0.         0.         5.4       ]
Reward: 9  Episode Reward:  -272
xxxxx
x  gx
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[    0.             0.         -5997.30277983  -178.98      ]
------
Step:23, Action:North
State  185
Old Q Values:  [    0.             0.         -5997.30277983  -178.98      ]
New Q values:  [-6.00000000e-01  0.00000000e+00 -5.99730278e+03 -1.78980000e+02]
Reward: -1  Episode Reward:  -273
xxxxx
xag x
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-1.80600000e+02 -2.90222782e-01 -8.39058545e+03  0.00000000e+00]
------
Step:24, Action:West
State  107
Old Q Values:  [-2.52351696e+02  8.28088630e-02  1.38281469e-01  0.00000000e+00]
New Q values:  [-2.52351696e+02  8.28088630e-02  1.38281469e-01 -1.80558516e+02]
Reward: -301  Episode Reward:  -574
xxxxx
xa  x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02  1.38281469e-01 -1.80558516e+02]
------
Step:25, Action:East
State  105
Old Q Values:  [-1.80600000e+02 -2.90222782e-01 -8.39058545e+03  0.00000000e+00]
New Q values:  [-1.80600000e+02 -2.90222782e-01 -9.35458926e+03  0.00000000e+00]
Reward: -10001  Episode Reward:  -10575
xxxxx
x g x
x   x
x.. x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -77.8701714  -180.6           6.16561669]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6         -77.8701714  -180.6           6.16561669]
New Q values:  [-180.6         -77.8701714  -180.6           8.79425482]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  3.09336047e+00]
------
Step:2, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  3.09336047e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  9.49859653e+00]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[0.         9.53750781 0.05486176 0.        ]
------
Step:3, Action:South
State  111
Old Q Values:  [0.         9.53750781 0.05486176 0.        ]
New Q values:  [ 0.         12.22186204  0.05486176  0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.1803238   8.25303748 10.02286307  0.        ]
------
Step:4, Action:East
State  182
Old Q Values:  [0.  0.  5.4 0. ]
New Q values:  [0.   0.   7.56 0.  ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x a.x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.7887208e-01 -5.9946000e+03  0.0000000e+00  0.0000000e+00]
------
Step:5, Action:East
State  202
Old Q Values:  [    0.         -5884.35407458     0.             0.        ]
New Q values:  [    0.         -5884.35407458    27.92097842     0.        ]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 0.         75.06992807  0.          0.        ]
------
Step:6, Action:South
State  216
Old Q Values:  [-9.38271823e+02 -1.67312930e+01 -6.17035694e+03  1.06642002e+00]
New Q values:  [-9.38271823e+02 -5.95754048e+00 -6.17035694e+03  1.06642002e+00]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4.44992237e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
------
Step:7, Action:North
State  288
Old Q Values:  [ 4.44992237e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
New Q values:  [   23.70094737 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 0.         75.06992807  0.          0.        ]
------
Step:8, Action:South
State  216
Old Q Values:  [-9.38271823e+02 -5.95754048e+00 -6.17035694e+03  1.06642002e+00]
New Q values:  [-9.38271823e+02  4.12726802e+00 -6.17035694e+03  1.06642002e+00]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   23.70094737 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:9, Action:North
State  288
Old Q Values:  [   23.70094737 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [   10.11855935 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02  4.12726802e+00 -6.17035694e+03  1.06642002e+00]
------
Step:10, Action:South
State  218
Old Q Values:  [ 0.         75.06992807  0.          0.        ]
New Q values:  [ 0.         32.46353903  0.          0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x   x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   10.11855935 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:11, Action:North
State  288
Old Q Values:  [   10.11855935 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [   13.18648545 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 0.         32.46353903  0.          0.        ]
------
Step:12, Action:South
State  216
Old Q Values:  [-9.38271823e+02  4.12726802e+00 -6.17035694e+03  1.06642002e+00]
New Q values:  [-9.38271823e+02  5.00685284e+00 -6.17035694e+03  1.06642002e+00]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   13.18648545 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:13, Action:North
State  288
Old Q Values:  [   13.18648545 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [   14.41365589 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 0.         32.46353903  0.          0.        ]
------
Step:14, Action:South
State  218
Old Q Values:  [ 0.         32.46353903  0.          0.        ]
New Q values:  [ 0.         16.70951238  0.          0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x   x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   14.41365589 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:15, Action:North
State  288
Old Q Values:  [   14.41365589 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [    6.66751821 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02  5.00685284e+00 -6.17035694e+03  1.06642002e+00]
------
Step:16, Action:South
State  216
Old Q Values:  [-9.38271823e+02  5.00685284e+00 -6.17035694e+03  1.06642002e+00]
New Q values:  [-9.38271823e+02  3.40299660e+00 -6.17035694e+03  1.06642002e+00]
Reward: -1  Episode Reward:  34
xxxxx
xg  x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[    6.66751821 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:17, Action:North
State  288
Old Q Values:  [    6.66751821 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [ 3.08790626e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02  3.40299660e+00 -6.17035694e+03  1.06642002e+00]
------
Step:18, Action:South
State  218
Old Q Values:  [ 0.         16.70951238  0.          0.        ]
New Q values:  [0.         7.01017683 0.         0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x   x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3.08790626e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
------
Step:19, Action:North
State  288
Old Q Values:  [ 3.08790626e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
New Q values:  [ 1.65606149e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02  3.40299660e+00 -6.17035694e+03  1.06642002e+00]
------
Step:20, Action:South
State  216
Old Q Values:  [-9.38271823e+02  3.40299660e+00 -6.17035694e+03  1.06642002e+00]
New Q values:  [-9.38271823e+02  1.25801709e+00 -6.17035694e+03  1.06642002e+00]
Reward: -1  Episode Reward:  30
xxxxx
xg  x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1.65606149e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
------
Step:21, Action:North
State  288
Old Q Values:  [ 1.65606149e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
New Q values:  [ 4.39829720e-01 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02  1.25801709e+00 -6.17035694e+03  1.06642002e+00]
------
Step:22, Action:South
State  216
Old Q Values:  [-9.38271823e+02  1.25801709e+00 -6.17035694e+03  1.06642002e+00]
New Q values:  [-9.38271823e+02  3.51557501e-02 -6.17035694e+03  1.06642002e+00]
Reward: -1  Episode Reward:  28
xxxxx
xg  x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4.39829720e-01 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
------
Step:23, Action:North
State  288
Old Q Values:  [ 4.39829720e-01 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
New Q values:  [-1.04142107e-01 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
Reward: -1  Episode Reward:  27
xxxxx
x g x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02  3.51557501e-02 -6.17035694e+03  1.06642002e+00]
------
Step:24, Action:West
State  216
Old Q Values:  [-9.38271823e+02  3.51557501e-02 -6.17035694e+03  1.06642002e+00]
New Q values:  [-9.38271823e+02  3.51557501e-02 -6.17035694e+03  1.44656801e+00]
Reward: -1  Episode Reward:  26
xxxxx
x  gx
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[1.52371192 0.         0.         5.4       ]
------
Step:25, Action:West
State  200
Old Q Values:  [2.47730124 0.         1.10380155 0.        ]
New Q values:  [ 2.47730124  0.          1.10380155 -0.6       ]
Reward: -1  Episode Reward:  25
xxxxx
x g x
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -5.99730278e+03 -1.78980000e+02]
------
Step:26, Action:South
State  189
Old Q Values:  [-2.98870365e-02 -3.37602450e-01 -5.98845944e+03 -1.80600000e+02]
New Q values:  [-2.98870365e-02  7.27987392e+00 -5.98845944e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  34
xxxxx
x  gx
x   x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    6.716383    -289.59534477 -5467.19309188  -180.6       ]
------
Step:27, Action:North
State  261
Old Q Values:  [    6.716383    -289.59534477 -5467.19309188  -180.6       ]
New Q values:  [ 4.27051537e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  33
xxxxx
x g x
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[-2.98870365e-02  7.27987392e+00 -5.98845944e+03 -1.80600000e+02]
------
Step:28, Action:South
State  189
Old Q Values:  [-2.98870365e-02  7.27987392e+00 -5.98845944e+03 -1.80600000e+02]
New Q values:  [-2.98870365e-02  3.59310418e+00 -5.98845944e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x g x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4.27051537e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
------
Step:29, Action:North
State  257
Old Q Values:  [   0.  -180.6    0.     0. ]
New Q values:  [  -0.6 -180.6    0.     0. ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[0. 0. 0. 0.]
------
Step:30, Action:North
State  191
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [3.06655861 0.         0.         0.        ]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[ 0.         12.22186204  0.05486176  0.        ]
------
Step:31, Action:South
State  111
Old Q Values:  [ 0.         12.22186204  0.05486176  0.        ]
New Q values:  [0.         5.36667607 0.05486176 0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xa gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[-2.98870365e-02  3.59310418e+00 -5.98845944e+03 -1.80600000e+02]
------
Step:32, Action:South
State  189
Old Q Values:  [-2.98870365e-02  3.59310418e+00 -5.98845944e+03 -1.80600000e+02]
New Q values:  [-2.98870365e-02  2.11839628e+00 -5.98845944e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  28
xxxxx
x  gx
x   x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4.27051537e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
------
Step:33, Action:North
State  261
Old Q Values:  [ 4.27051537e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
New Q values:  [ 1.74372503e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  27
xxxxx
x g x
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[-2.98870365e-02  2.11839628e+00 -5.98845944e+03 -1.80600000e+02]
------
Step:34, Action:South
State  188
Old Q Values:  [-8398.9776192     0.            0.            0.       ]
New Q values:  [-8398.9776192   -92.706         0.            0.       ]
Reward: -1  Episode Reward:  26
xxxxx
xg  x
x   x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2397.576  -307.02  -6000.6   -6307.02 ]
------
Step:35, Action:South
State  260
Old Q Values:  [-2397.576  -307.02  -6000.6   -6307.02 ]
New Q values:  [-2397.576  -395.514 -6000.6   -6307.02 ]
Reward: -301  Episode Reward:  -275
xxxxx
x   x
xg  x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2397.576  -395.514 -6000.6   -6307.02 ]
------
Step:36, Action:South
State  260
Old Q Values:  [-2397.576  -395.514 -6000.6   -6307.02 ]
New Q values:  [-2397.576  -6457.4598 -6000.6    -6307.02  ]
Reward: -10301  Episode Reward:  -10576
xxxxx
x   x
x   x
xg. x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[-9.25243762e+03  3.37474865e+01  3.37558968e+00  0.00000000e+00]
------
Step:1, Action:South
State  192
Old Q Values:  [-9.25243762e+03  3.37474865e+01  3.37558968e+00  0.00000000e+00]
New Q values:  [-9.25243762e+03  2.22332643e+01  3.37558968e+00  0.00000000e+00]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6          11.11423226    8.424     ]
------
Step:2, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6          11.11423226    8.424     ]
New Q values:  [-951.35054867 -180.6           9.81445027    8.424     ]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1.04142107e-01 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
------
Step:3, Action:North
State  288
Old Q Values:  [-1.04142107e-01 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
New Q values:  [    7.89722489 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    8.4629391   -361.47821843 -2651.70614553 -5969.91524924]
------
Step:4, Action:North
State  208
Old Q Values:  [    8.4629391   -361.47821843 -2651.70614553 -5969.91524924]
New Q values:  [    8.78517564  -361.47821843 -2651.70614553 -5969.91524924]
Reward: 9  Episode Reward:  36
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -1.56174924e-01  0.00000000e+00 -6.00000000e-01]
------
Step:5, Action:East
State  136
Old Q Values:  [-6.18060000e+03  4.05150802e+00  0.00000000e+00 -6.95225309e+03]
New Q values:  [-6.18060000e+03  4.05150802e+00 -1.79384548e+02 -6.95225309e+03]
Reward: -301  Episode Reward:  -265
xxxxx
xg ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  4.05150802e+00 -1.79384548e+02 -6.95225309e+03]
------
Step:6, Action:South
State  130
Old Q Values:  [-1.80600000e+02 -1.56174924e-01  0.00000000e+00 -6.00000000e-01]
New Q values:  [-180.6           1.97308272    0.           -0.6       ]
Reward: -1  Episode Reward:  -266
xxxxx
x.  x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    8.78517564  -361.47821843 -2651.70614553 -5969.91524924]
------
Step:7, Action:North
State  210
Old Q Values:  [-4.34264780e-01 -1.44656120e+03 -1.80600000e+02  4.63650404e+00]
New Q values:  [-1.81781095e-01 -1.44656120e+03 -1.80600000e+02  4.63650404e+00]
Reward: -1  Episode Reward:  -267
xxxxx
x. ax
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           1.97308272    0.           -0.6       ]
------
Step:8, Action:South
State  138
Old Q Values:  [-180.6         -77.8701714  -180.6           8.79425482]
New Q values:  [-180.6         -30.35711735 -180.6           8.79425482]
Reward: -1  Episode Reward:  -268
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[-1.81781095e-01 -1.44656120e+03 -1.80600000e+02  4.63650404e+00]
------
Step:9, Action:West
State  210
Old Q Values:  [-1.81781095e-01 -1.44656120e+03 -1.80600000e+02  4.63650404e+00]
New Q values:  [-1.81781095e-01 -1.44656120e+03 -1.80600000e+02  3.07790274e+00]
Reward: -1  Episode Reward:  -269
xxxxx
x.  x
x.a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6         2.56078987  6.0776704   0.        ]
------
Step:10, Action:East
State  194
Old Q Values:  [-0.6         2.56078987  6.0776704   0.        ]
New Q values:  [-0.6         2.56078987  2.75443898  0.        ]
Reward: -1  Episode Reward:  -270
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[-1.81781095e-01 -1.44656120e+03 -1.80600000e+02  3.07790274e+00]
------
Step:11, Action:West
State  208
Old Q Values:  [    8.78517564  -361.47821843 -2651.70614553 -5969.91524924]
New Q values:  [    8.78517564  -361.47821843 -2651.70614553 -8381.89612042]
Reward: -10001  Episode Reward:  -10271
xxxxx
x.  x
x.g x
x.  x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02  5.03512737e+00 -6.95544973e+03 -1.80600000e+02]
------
Step:1, Action:South
State  108
Old Q Values:  [-6.18060000e+03  4.43424000e+00  1.14369011e+00  0.00000000e+00]
New Q values:  [-6.18060000e+03  7.17369600e+00  1.14369011e+00  0.00000000e+00]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3431.06190038   -46.62           0.             0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [-9.48320810e-01 -3.70021941e-01 -7.09505549e+03 -1.80600000e+02]
New Q values:  [-9.48320810e-01 -3.70021941e-01 -2.83009499e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  18
xxxxx
x g.x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-5994.76239261     8.424          0.             0.        ]
------
Step:3, Action:South
State  197
Old Q Values:  [1.02 0.   0.   0.  ]
New Q values:  [1.02 5.4  0.   0.  ]
Reward: 9  Episode Reward:  27
xxxxx
x  gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[0. 0. 0. 0.]
------
Step:4, Action:North
State  277
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [1.02 0.   0.   0.  ]
Reward: -1  Episode Reward:  26
xxxxx
x  .x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[1.02 5.4  0.   0.  ]
------
Step:5, Action:South
State  197
Old Q Values:  [1.02 5.4  0.   0.  ]
New Q values:  [1.02  1.866 0.    0.   ]
Reward: -1  Episode Reward:  25
xxxxx
x  gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.02 0.   0.   0.  ]
------
Step:6, Action:North
State  277
Old Q Values:  [1.02 0.   0.   0.  ]
New Q values:  [0.3678 0.     0.     0.    ]
Reward: -1  Episode Reward:  24
xxxxx
x  .x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[1.02  1.866 0.    0.   ]
------
Step:7, Action:South
State  197
Old Q Values:  [1.02  1.866 0.    0.   ]
New Q values:  [1.02    0.25674 0.      0.     ]
Reward: -1  Episode Reward:  23
xxxxx
x  gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[0.3678 0.     0.     0.    ]
------
Step:8, Action:North
State  277
Old Q Values:  [0.3678 0.     0.     0.    ]
New Q values:  [-0.14688  0.       0.       0.     ]
Reward: -1  Episode Reward:  22
xxxxx
x  .x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[1.02    0.25674 0.      0.     ]
------
Step:9, Action:North
State  197
Old Q Values:  [1.02    0.25674 0.      0.     ]
New Q values:  [0.13244338 0.25674    0.         0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x agx
x  .x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         1.08147795]
------
Step:10, Action:West
State  124
Old Q Values:  [0.  5.4 0.  0. ]
New Q values:  [0.         5.4        0.         0.91053821]
Reward: -1  Episode Reward:  20
xxxxx
xag.x
x  .x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02  5.03512737e+00 -6.95544973e+03 -1.80600000e+02]
------
Step:11, Action:South
State  109
Old Q Values:  [-1.80600000e+02  5.03512737e+00 -6.95544973e+03 -1.80600000e+02]
New Q values:  [-1.80600000e+02  1.30304437e+00 -6.95544973e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  19
xxxxx
x  gx
xa .x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[-9.48320810e-01 -3.70021941e-01 -2.83009499e+03 -1.80600000e+02]
------
Step:12, Action:South
State  181
Old Q Values:  [-9.48320810e-01 -3.70021941e-01 -2.83009499e+03 -1.80600000e+02]
New Q values:  [-9.48320810e-01  5.77510873e+00 -2.83009499e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  28
xxxxx
x g.x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1.74372503e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
------
Step:13, Action:North
State  260
Old Q Values:  [-2397.576  -6457.4598 -6000.6    -6307.02  ]
New Q values:  [ -959.6304 -6457.4598 -6000.6    -6307.02  ]
Reward: -1  Episode Reward:  27
xxxxx
xg .x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3431.06190038   -46.62           0.             0.        ]
------
Step:14, Action:East
State  181
Old Q Values:  [-9.48320810e-01  5.77510873e+00 -2.83009499e+03 -1.80600000e+02]
New Q values:  [-9.48320810e-01  5.77510873e+00 -1.13011080e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-5994.76239261     8.424          0.             0.        ]
------
Step:15, Action:South
State  196
Old Q Values:  [-5994.76239261     8.424          0.             0.        ]
New Q values:  [-5.99476239e+03  2.76960000e+00  0.00000000e+00  0.00000000e+00]
Reward: -1  Episode Reward:  25
xxxxx
x  .x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  -0.192 -180.6      0.     -48.78 ]
------
Step:16, Action:East
State  276
Old Q Values:  [  -0.192 -180.6      0.     -48.78 ]
New Q values:  [  -0.192      -180.6           7.76916747  -48.78      ]
Reward: 9  Episode Reward:  34
xxxxx
x g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[    7.89722489 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:17, Action:North
State  288
Old Q Values:  [    7.89722489 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [   11.19444265 -6251.71315483 -6173.56321028 -5654.78955431]
Reward: 9  Episode Reward:  43
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    8.78517564  -361.47821843 -2651.70614553 -8381.89612042]
------
Step:18, Action:North
State  208
Old Q Values:  [    8.78517564  -361.47821843 -2651.70614553 -8381.89612042]
New Q values:  [60008.91407026  -361.47821843 -2651.70614553 -8381.89612042]
Reward: 100009  Episode Reward:  100052
xxxxx
x gax
x   x
x   x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[-1.81781095e-01 -1.44656120e+03 -1.80600000e+02  3.07790274e+00]
------
Step:1, Action:West
State  208
Old Q Values:  [60008.91407026  -361.47821843 -2651.70614553 -8381.89612042]
New Q values:  [60008.91407026  -361.47821843 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   10.56295849     0.         -7476.85600442     0.        ]
------
Step:2, Action:North
State  193
Old Q Values:  [   10.56295849     0.         -7476.85600442     0.        ]
New Q values:  [   11.09169436     0.         -7476.85600442     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x.agx
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.60215563e+03  4.88836988e+00]
------
Step:3, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  7.48305785e+00 -2.39922704e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  7.48305785e+00 -9.54290818e+02]
Reward: 9  Episode Reward:  27
xxxxx
xag.x
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-1.80600000e+02 -2.90222782e-01 -9.35458926e+03  0.00000000e+00]
------
Step:4, Action:West
State  107
Old Q Values:  [-2.52351696e+02  8.28088630e-02  1.38281469e-01 -1.80558516e+02]
New Q values:  [-2.52351696e+02  8.28088630e-02  1.38281469e-01 -2.52781922e+02]
Reward: -301  Episode Reward:  -274
xxxxx
xa .x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02  1.38281469e-01 -2.52781922e+02]
------
Step:5, Action:East
State  105
Old Q Values:  [-1.80600000e+02 -2.90222782e-01 -9.35458926e+03  0.00000000e+00]
New Q values:  [-1.80600000e+02 -2.90222782e-01 -9.74019079e+03  0.00000000e+00]
Reward: -10001  Episode Reward:  -10275
xxxxx
x g.x
x.  x
x.. x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  9.49859653e+00]
------
Step:1, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  9.49859653e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  9.24092305e+00]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02  1.38281469e-01 -2.52781922e+02]
------
Step:2, Action:East
State  109
Old Q Values:  [-1.80600000e+02  1.30304437e+00 -6.95544973e+03 -1.80600000e+02]
New Q values:  [-1.80600000e+02  1.30304437e+00 -2.78131338e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  8
xxxxx
x agx
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.60215563e+03  4.88836988e+00]
------
Step:3, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  9.24092305e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  3.13785366e+00]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02  1.38281469e-01 -2.52781922e+02]
------
Step:4, Action:East
State  111
Old Q Values:  [0.         5.36667607 0.05486176 0.        ]
New Q values:  [0.         5.36667607 0.3633008  0.        ]
Reward: -1  Episode Reward:  6
xxxxx
x a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  3.13785366e+00]
------
Step:5, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  3.13785366e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  6.96625906e-01]
Reward: -1  Episode Reward:  5
xxxxx
xa .x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02  1.38281469e-01 -2.52781922e+02]
------
Step:6, Action:East
State  111
Old Q Values:  [0.         5.36667607 0.3633008  0.        ]
New Q values:  [0.         5.36667607 0.10609835 0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  6.96625906e-01]
------
Step:7, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.74682099e-01  4.53206307e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.76840361e+00  4.53206307e-01]
Reward: 9  Episode Reward:  13
xxxxx
x  ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -30.35711735 -180.6           8.79425482]
------
Step:8, Action:West
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6           8.79425482]
New Q values:  [-180.6         -30.35711735 -180.6           3.47847996]
Reward: -1  Episode Reward:  12
xxxxx
x a x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  6.96625906e-01]
------
Step:9, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.76840361e+00  4.53206307e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.55090543e+00  4.53206307e-01]
Reward: -1  Episode Reward:  11
xxxxx
x  ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -30.35711735 -180.6           3.47847996]
------
Step:10, Action:West
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6           3.47847996]
New Q values:  [-180.6         -30.35711735 -180.6           1.85666361]
Reward: -1  Episode Reward:  10
xxxxx
x a x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.55090543e+00  4.53206307e-01]
------
Step:11, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.55090543e+00  4.53206307e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.37736126e+00  4.53206307e-01]
Reward: -1  Episode Reward:  9
xxxxx
x  ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -30.35711735 -180.6           1.85666361]
------
Step:12, Action:West
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6           1.85666361]
New Q values:  [-180.6         -30.35711735 -180.6           0.55587382]
Reward: -1  Episode Reward:  8
xxxxx
x a x
x...x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.37736126e+00  4.53206307e-01]
------
Step:13, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.37736126e+00  4.53206307e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.17706649e-01  4.53206307e-01]
Reward: -1  Episode Reward:  7
xxxxx
x  ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -30.35711735 -180.6           0.55587382]
------
Step:14, Action:West
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6           0.55587382]
New Q values:  [-180.6         -30.35711735 -180.6          -0.24168858]
Reward: -1  Episode Reward:  6
xxxxx
x a x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.17706649e-01  4.53206307e-01]
------
Step:15, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  7.48305785e+00 -9.54290818e+02]
New Q values:  [-1.01561177e+04 -5.99568600e+03  7.48305785e+00 -3.81925414e+02]
Reward: -1  Episode Reward:  5
xxxxx
xag x
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02  1.30304437e+00 -2.78131338e+03 -1.80600000e+02]
------
Step:16, Action:South
State  108
Old Q Values:  [-6.18060000e+03  7.17369600e+00  1.14369011e+00  0.00000000e+00]
New Q values:  [-6.18060000e+03  8.26947840e+00  1.14369011e+00  0.00000000e+00]
Reward: 9  Episode Reward:  14
xxxxx
xg  x
xa..x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3431.06190038   -46.62           0.             0.        ]
------
Step:17, Action:East
State  181
Old Q Values:  [-9.48320810e-01  5.77510873e+00 -1.13011080e+03 -1.80600000e+02]
New Q values:  [  -0.94832081    5.77510873 -445.90112872 -180.6       ]
Reward: 9  Episode Reward:  23
xxxxx
x g x
x a.x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 2.47730124  0.          1.10380155 -0.6       ]
------
Step:18, Action:East
State  200
Old Q Values:  [ 2.47730124  0.          1.10380155 -0.6       ]
New Q values:  [ 2.47730124  0.          6.27549102 -0.6       ]
Reward: 9  Episode Reward:  32
xxxxx
xg  x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02  3.51557501e-02 -6.17035694e+03  1.44656801e+00]
------
Step:19, Action:West
State  216
Old Q Values:  [-9.38271823e+02  3.51557501e-02 -6.17035694e+03  1.44656801e+00]
New Q values:  [-9.38271823e+02  3.51557501e-02 -6.17035694e+03  1.86127451e+00]
Reward: -1  Episode Reward:  31
xxxxx
x g x
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 2.47730124  0.          6.27549102 -0.6       ]
------
Step:20, Action:East
State  200
Old Q Values:  [ 2.47730124  0.          6.27549102 -0.6       ]
New Q values:  [ 2.47730124  0.          2.46857876 -0.6       ]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02  3.51557501e-02 -6.17035694e+03  1.86127451e+00]
------
Step:21, Action:South
State  218
Old Q Values:  [0.         7.01017683 0.         0.        ]
New Q values:  [0.         5.56240353 0.         0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   11.19444265 -6251.71315483 -6173.56321028 -5654.78955431]
------
Step:22, Action:North
State  288
Old Q Values:  [   11.19444265 -6251.71315483 -6173.56321028 -5654.78955431]
New Q values:  [ 5.54649812e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[0.         5.56240353 0.         0.        ]
------
Step:23, Action:West
State  216
Old Q Values:  [-9.38271823e+02  3.51557501e-02 -6.17035694e+03  1.86127451e+00]
New Q values:  [-9.38271823e+02  3.51557501e-02 -6.17035694e+03  1.76450980e+00]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[1.52371192 0.         0.         5.4       ]
------
Step:24, Action:West
State  200
Old Q Values:  [ 2.47730124  0.          2.46857876 -0.6       ]
New Q values:  [ 2.47730124  0.          2.46857876 -0.84      ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -5.99730278e+03 -1.78980000e+02]
------
Step:25, Action:South
State  189
Old Q Values:  [-2.98870365e-02  2.11839628e+00 -5.98845944e+03 -1.80600000e+02]
New Q values:  [-2.98870365e-02  6.77047602e+00 -5.98845944e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x  gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1.74372503e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
------
Step:26, Action:North
State  261
Old Q Values:  [ 1.74372503e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
New Q values:  [ 2.12863282e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[-2.98870365e-02  6.77047602e+00 -5.98845944e+03 -1.80600000e+02]
------
Step:27, Action:South
State  189
Old Q Values:  [-2.98870365e-02  6.77047602e+00 -5.98845944e+03 -1.80600000e+02]
New Q values:  [-2.98870365e-02  2.74678026e+00 -5.98845944e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x   x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2.12863282e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
------
Step:28, Action:North
State  261
Old Q Values:  [ 2.12863282e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
New Q values:  [ 1.07548721e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xag x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[-2.98870365e-02  2.74678026e+00 -5.98845944e+03 -1.80600000e+02]
------
Step:29, Action:South
State  179
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [ 0.  -0.6  0.   0. ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[  -0.6 -180.6    0.     0. ]
------
Step:30, Action:East
State  261
Old Q Values:  [ 1.07548721e+00 -2.89595345e+02 -5.46719309e+03 -1.80600000e+02]
New Q values:  [ 1.07548721e+00 -2.89595345e+02 -8.18715459e+03 -1.80600000e+02]
Reward: -10001  Episode Reward:  -9970
xxxxx
x   x
x   x
xa.gx
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  6.96625906e-01]
------
Step:1, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.17706649e-01  4.53206307e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  5.37457609e+00  4.53206307e-01]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -30.35711735 -180.6          -0.24168858]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6          -0.24168858]
New Q values:  [-1.80600000e+02 -3.03571173e+01 -1.80600000e+02 -1.35897399e-01]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03  1.86926011e+00  6.96625906e-01]
------
Step:3, Action:East
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.86926011e+00  6.96625906e-01]
New Q values:  [-2.68351473e+02 -6.00060000e+03  1.06934824e-01  6.96625906e-01]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -3.03571173e+01 -1.80600000e+02 -1.35897399e-01]
------
Step:4, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -3.03571173e+01 -1.80600000e+02 -1.35897399e-01]
New Q values:  [-180.6         -30.35711735 -180.6           0.95801387]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  5.37457609e+00  4.53206307e-01]
------
Step:5, Action:East
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03  1.06934824e-01  6.96625906e-01]
New Q values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  6.96625906e-01]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -30.35711735 -180.6           0.95801387]
------
Step:6, Action:West
State  136
Old Q Values:  [-6.18060000e+03  4.05150802e+00 -1.79384548e+02 -6.95225309e+03]
New Q values:  [-6.18060000e+03  4.05150802e+00 -1.79384548e+02 -2.78003473e+03]
Reward: -1  Episode Reward:  4
xxxxx
x.agx
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.60215563e+03  4.88836988e+00]
------
Step:7, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  7.48305785e+00 -3.81925414e+02]
New Q values:  [-1.01561177e+04 -5.99568600e+03  7.48305785e+00 -1.46979252e+02]
Reward: 9  Episode Reward:  13
xxxxx
xag x
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02  1.30304437e+00 -2.78131338e+03 -1.80600000e+02]
------
Step:8, Action:South
State  111
Old Q Values:  [0.         5.36667607 0.10609835 0.        ]
New Q values:  [0.         9.27920305 0.10609835 0.        ]
Reward: 9  Episode Reward:  22
xxxxx
x   x
xag.x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    5.77510873 -445.90112872 -180.6       ]
------
Step:9, Action:South
State  180
Old Q Values:  [-3431.06190038   -46.62           0.             0.        ]
New Q values:  [-3431.06190038  -301.13712        0.             0.        ]
Reward: 9  Episode Reward:  31
xxxxx
x   x
xg..x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -959.6304 -6457.4598 -6000.6    -6307.02  ]
------
Step:10, Action:East
State  261
Old Q Values:  [ 1.07548721e+00 -2.89595345e+02 -8.18715459e+03 -1.80600000e+02]
New Q values:  [ 1.07548721e+00 -2.89595345e+02 -3.26713109e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  40
xxxxx
x   x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  -0.192      -180.6           7.76916747  -48.78      ]
------
Step:11, Action:East
State  273
Old Q Values:  [3863.70089583    0.            0.            0.        ]
New Q values:  [3.86370090e+03 0.00000000e+00 1.06394944e+00 0.00000000e+00]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5.54649812e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
------
Step:12, Action:West
State  288
Old Q Values:  [ 5.54649812e+00 -6.25171315e+03 -6.17356321e+03 -5.65478955e+03]
New Q values:  [ 5.54649812e+00 -6.25171315e+03 -6.17356321e+03 -1.10340555e+03]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x ..x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3.86370090e+03 0.00000000e+00 1.06394944e+00 0.00000000e+00]
------
Step:13, Action:North
State  273
Old Q Values:  [3.86370090e+03 0.00000000e+00 1.06394944e+00 0.00000000e+00]
New Q values:  [1.55420787e+03 0.00000000e+00 1.06394944e+00 0.00000000e+00]
Reward: 9  Episode Reward:  47
xxxxx
x   x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   11.09169436     0.         -7476.85600442     0.        ]
------
Step:14, Action:North
State  199
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [-0.6  0.   0.   0. ]
Reward: -1  Episode Reward:  46
xxxxx
x a x
x  .x
x  gx
xxxxx
Step:15, Action:South
State  115
Old Q Values:  [-180.6    0.     0.     0. ]
New Q values:  [-180.6           2.72750831    0.            0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   11.09169436     0.         -7476.85600442     0.        ]
------
Step:16, Action:North
State  196
Old Q Values:  [-5.99476239e+03  2.76960000e+00  0.00000000e+00  0.00000000e+00]
New Q values:  [-2398.16618312     2.7696         0.             0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x g.x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[0.        0.        1.1292464 0.       ]
------
Step:17, Action:East
State  124
Old Q Values:  [0.         5.4        0.         0.91053821]
New Q values:  [0.         5.4        0.61545241 0.91053821]
Reward: -1  Episode Reward:  43
xxxxx
x gax
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  4.05150802e+00 -1.79384548e+02 -2.78003473e+03]
------
Step:18, Action:South
State  128
Old Q Values:  [ -180.6      0.   -8652.84     0.  ]
New Q values:  [ -180.6        78008.07422108 -8652.84           0.        ]
Reward: 100009  Episode Reward:  100052
xxxxx
x  gx
x  ax
x   x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.1803238   8.25303748 10.02286307  0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [-0.1803238   8.25303748 10.02286307  0.        ]
New Q values:  [-0.1803238   8.25303748 10.23547692  0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6         2.56078987  2.75443898  0.        ]
------
Step:2, Action:East
State  192
Old Q Values:  [-9.25243762e+03  2.22332643e+01  3.37558968e+00  0.00000000e+00]
New Q values:  [-9252.43762121    22.23326427 18009.42445695     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[60008.91407026  -361.47821843 -2651.70614553 -3344.18956062]
------
Step:3, Action:North
State  208
Old Q Values:  [60008.91407026  -361.47821843 -2651.70614553 -3344.18956062]
New Q values:  [24009.55755292  -361.47821843 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           1.97308272    0.           -0.6       ]
------
Step:4, Action:East
State  130
Old Q Values:  [-180.6           1.97308272    0.           -0.6       ]
New Q values:  [-180.6           1.97308272 -180.00807518   -0.6       ]
Reward: -301  Episode Reward:  -274
xxxxx
x..ax
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6           1.97308272 -180.00807518   -0.6       ]
------
Step:5, Action:South
State  130
Old Q Values:  [-180.6           1.97308272 -180.00807518   -0.6       ]
New Q values:  [-1.80600000e+02  1.20305650e+03 -1.80008075e+02 -6.00000000e-01]
Reward: -10001  Episode Reward:  -10275
xxxxx
x.. x
x  gx
x.. x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-951.35054867 -180.6           9.81445027    8.424     ]
------
Step:1, Action:East
State  272
Old Q Values:  [-951.35054867 -180.6           9.81445027    8.424     ]
New Q values:  [-951.35054867 -180.6          10.98972954    8.424     ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5.54649812e+00 -6.25171315e+03 -6.17356321e+03 -1.10340555e+03]
------
Step:2, Action:North
State  288
Old Q Values:  [ 5.54649812e+00 -6.25171315e+03 -6.17356321e+03 -1.10340555e+03]
New Q values:  [ 7210.48586512 -6251.71315483 -6173.56321028 -1103.40555297]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24009.55755292  -361.47821843 -2651.70614553 -3344.18956062]
------
Step:3, Action:North
State  210
Old Q Values:  [-1.81781095e-01 -1.44656120e+03 -1.80600000e+02  3.07790274e+00]
New Q values:  [  366.24423725 -1446.5611984   -180.6            3.07790274]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-1.80600000e+02  1.20305650e+03 -1.80008075e+02 -6.00000000e-01]
------
Step:4, Action:South
State  130
Old Q Values:  [-1.80600000e+02  1.20305650e+03 -1.80008075e+02 -6.00000000e-01]
New Q values:  [-1.80600000e+02  7.68348987e+03 -1.80008075e+02 -6.00000000e-01]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24009.55755292  -361.47821843 -2651.70614553 -3344.18956062]
------
Step:5, Action:North
State  208
Old Q Values:  [24009.55755292  -361.47821843 -2651.70614553 -3344.18956062]
New Q values:  [11908.26998081  -361.47821843 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  25
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-1.80600000e+02  7.68348987e+03 -1.80008075e+02 -6.00000000e-01]
------
Step:6, Action:South
State  130
Old Q Values:  [-1.80600000e+02  7.68348987e+03 -1.80008075e+02 -6.00000000e-01]
New Q values:  [-1.80600000e+02  6.64527694e+03 -1.80008075e+02 -6.00000000e-01]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11908.26998081  -361.47821843 -2651.70614553 -3344.18956062]
------
Step:7, Action:North
State  208
Old Q Values:  [11908.26998081  -361.47821843 -2651.70614553 -3344.18956062]
New Q values:  [ 6756.29107445  -361.47821843 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  23
xxxxx
x..ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-1.80600000e+02  6.64527694e+03 -1.80008075e+02 -6.00000000e-01]
------
Step:8, Action:West
State  130
Old Q Values:  [-1.80600000e+02  6.64527694e+03 -1.80008075e+02 -6.00000000e-01]
New Q values:  [-1.80600000e+02  6.64527694e+03 -1.80008075e+02  5.40399242e+00]
Reward: 9  Episode Reward:  32
xxxxx
x.a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.            0.81330808    0.        ]
------
Step:9, Action:East
State  114
Old Q Values:  [-180.6           0.            0.81330808    0.        ]
New Q values:  [-180.6           0.         1993.30840536    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x. ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-1.80600000e+02  6.64527694e+03 -1.80008075e+02  5.40399242e+00]
------
Step:10, Action:South
State  130
Old Q Values:  [-1.80600000e+02  6.64527694e+03 -1.80008075e+02  5.40399242e+00]
New Q values:  [-180.6        4684.39809851 -180.00807518    5.40399242]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6756.29107445  -361.47821843 -2651.70614553 -3344.18956062]
------
Step:11, Action:North
State  208
Old Q Values:  [ 6756.29107445  -361.47821843 -2651.70614553 -3344.18956062]
New Q values:  [ 2702.20383394  -361.47821843 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  29
xxxxx
x. ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -30.35711735 -180.6           0.95801387]
------
Step:12, Action:West
State  130
Old Q Values:  [-180.6        4684.39809851 -180.00807518    5.40399242]
New Q values:  [-180.6        4684.39809851 -180.00807518  599.55411858]
Reward: -1  Episode Reward:  28
xxxxx
x.a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.         1993.30840536    0.        ]
------
Step:13, Action:East
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  7.48305785e+00 -1.46979252e+02]
New Q values:  [-1.01561177e+04 -5.99568600e+03  3.60867555e+00 -1.46979252e+02]
Reward: -1  Episode Reward:  27
xxxxx
x.gax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  4.05150802e+00 -1.79384548e+02 -2.78003473e+03]
------
Step:14, Action:South
State  130
Old Q Values:  [-180.6        4684.39809851 -180.00807518  599.55411858]
New Q values:  [-180.6        2683.82038958 -180.00807518  599.55411858]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2702.20383394  -361.47821843 -2651.70614553 -3344.18956062]
------
Step:15, Action:North
State  208
Old Q Values:  [ 2702.20383394  -361.47821843 -2651.70614553 -3344.18956062]
New Q values:  [ 1080.56893774  -361.47821843 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  25
xxxxx
x. ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -30.35711735 -180.6           0.95801387]
------
Step:16, Action:West
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6           0.95801387]
New Q values:  [-1.80600000e+02 -3.03571173e+01 -1.80600000e+02 -7.80668186e-03]
Reward: -1  Episode Reward:  24
xxxxx
x.a x
x.. x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  6.96625906e-01]
------
Step:17, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  6.96625906e-01]
New Q values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  5.72013480e+00]
Reward: 9  Episode Reward:  33
xxxxx
xa  x
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02  1.38281469e-01 -2.52781922e+02]
------
Step:18, Action:East
State  105
Old Q Values:  [-1.80600000e+02 -2.90222782e-01 -9.74019079e+03  0.00000000e+00]
New Q values:  [-1.80600000e+02 -2.90222782e-01 -3.89520980e+03  0.00000000e+00]
Reward: -1  Episode Reward:  32
xxxxx
x agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.60215563e+03  4.88836988e+00]
------
Step:19, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  5.72013480e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  1.72953836e+00]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02  1.38281469e-01 -2.52781922e+02]
------
Step:20, Action:East
State  107
Old Q Values:  [-2.52351696e+02  8.28088630e-02  1.38281469e-01 -2.52781922e+02]
New Q values:  [-2.52351696e+02  8.28088630e-02 -2.58259038e-02 -2.52781922e+02]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x.. x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  1.72953836e+00]
------
Step:21, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  5.37457609e+00  4.53206307e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  5.37457609e+00 -3.93874818e-01]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02 -2.58259038e-02 -2.52781922e+02]
------
Step:22, Action:South
State  111
Old Q Values:  [0.         9.27920305 0.10609835 0.        ]
New Q values:  [ 0.         12.1823243   0.10609835  0.        ]
Reward: 9  Episode Reward:  38
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.1803238   8.25303748 10.23547692  0.        ]
------
Step:23, Action:East
State  179
Old Q Values:  [ 0.  -0.6  0.   0. ]
New Q values:  [ 0.00000000e+00 -6.00000000e-01  6.00062263e+04  0.00000000e+00]
Reward: 100009  Episode Reward:  100047
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7210.48586512 -6251.71315483 -6173.56321028 -1103.40555297]
------
Step:1, Action:West
State  288
Old Q Values:  [ 7210.48586512 -6251.71315483 -6173.56321028 -1103.40555297]
New Q values:  [ 7210.48586512 -6251.71315483 -6173.56321028    30.3001388 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1.55420787e+03 0.00000000e+00 1.06394944e+00 0.00000000e+00]
------
Step:2, Action:North
State  272
Old Q Values:  [-951.35054867 -180.6          10.98972954    8.424     ]
New Q values:  [-374.31388777 -180.6          10.98972954    8.424     ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6         2.56078987  2.75443898  0.        ]
------
Step:3, Action:East
State  194
Old Q Values:  [-0.6         2.56078987  2.75443898  0.        ]
New Q values:  [ -0.6          2.56078987 110.37504677   0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  366.24423725 -1446.5611984   -180.6            3.07790274]
------
Step:4, Action:North
State  210
Old Q Values:  [  366.24423725 -1446.5611984   -180.6            3.07790274]
New Q values:  [  957.04381178 -1446.5611984   -180.6            3.07790274]
Reward: 9  Episode Reward:  26
xxxxx
x..ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        2683.82038958 -180.00807518  599.55411858]
------
Step:5, Action:South
State  130
Old Q Values:  [-180.6        2683.82038958 -180.00807518  599.55411858]
New Q values:  [-180.6        1360.04129937 -180.00807518  599.55411858]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  957.04381178 -1446.5611984   -180.6            3.07790274]
------
Step:6, Action:North
State  210
Old Q Values:  [  957.04381178 -1446.5611984   -180.6            3.07790274]
New Q values:  [  790.22991452 -1446.5611984   -180.6            3.07790274]
Reward: -1  Episode Reward:  24
xxxxx
x..ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        1360.04129937 -180.00807518  599.55411858]
------
Step:7, Action:South
State  130
Old Q Values:  [-180.6        1360.04129937 -180.00807518  599.55411858]
New Q values:  [-180.6         780.4854941  -180.00807518  599.55411858]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  790.22991452 -1446.5611984   -180.6            3.07790274]
------
Step:8, Action:North
State  208
Old Q Values:  [ 1080.56893774  -361.47821843 -2651.70614553 -3344.18956062]
New Q values:  [  665.77322333  -361.47821843 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  22
xxxxx
x..ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6         780.4854941  -180.00807518  599.55411858]
------
Step:9, Action:South
State  130
Old Q Values:  [-180.6         780.4854941  -180.00807518  599.55411858]
New Q values:  [-180.6         548.663172   -180.00807518  599.55411858]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  790.22991452 -1446.5611984   -180.6            3.07790274]
------
Step:10, Action:North
State  208
Old Q Values:  [  665.77322333  -361.47821843 -2651.70614553 -3344.18956062]
New Q values:  [  445.5755249   -361.47821843 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  20
xxxxx
x..ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6         548.663172   -180.00807518  599.55411858]
------
Step:11, Action:West
State  130
Old Q Values:  [-180.6         548.663172   -180.00807518  599.55411858]
New Q values:  [-180.6         548.663172   -180.00807518  843.21416904]
Reward: 9  Episode Reward:  29
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.         1993.30840536    0.        ]
------
Step:12, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  5.37457609e+00 -3.93874818e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.54748843e+00 -3.93874818e-01]
Reward: -1  Episode Reward:  28
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -3.03571173e+01 -1.80600000e+02 -7.80668186e-03]
------
Step:13, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -3.03571173e+01 -1.80600000e+02 -7.80668186e-03]
New Q values:  [-1.80600000e+02 -3.03571173e+01 -1.80600000e+02 -1.38876144e-01]
Reward: -1  Episode Reward:  27
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.54748843e+00 -3.93874818e-01]
------
Step:14, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.54748843e+00 -3.93874818e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -2.26674712e-02 -3.93874818e-01]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -3.03571173e+01 -1.80600000e+02 -1.38876144e-01]
------
Step:15, Action:West
State  130
Old Q Values:  [-180.6         548.663172   -180.00807518  843.21416904]
New Q values:  [-180.6         548.663172   -180.00807518  934.67818922]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.         1993.30840536    0.        ]
------
Step:16, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -2.26674712e-02 -3.93874818e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.50729832e-01 -3.93874818e-01]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -3.03571173e+01 -1.80600000e+02 -1.38876144e-01]
------
Step:17, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -3.03571173e+01 -1.80600000e+02 -1.38876144e-01]
New Q values:  [-1.80600000e+02 -3.03571173e+01 -1.80600000e+02 -1.36688949e-01]
Reward: -1  Episode Reward:  23
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  1.72953836e+00]
------
Step:18, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  1.72953836e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  6.11665800e+00]
Reward: 9  Episode Reward:  32
xxxxx
xa  x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02 -2.58259038e-02 -2.52781922e+02]
------
Step:19, Action:South
State  105
Old Q Values:  [-1.80600000e+02 -2.90222782e-01 -3.89520980e+03  0.00000000e+00]
New Q values:  [ -180.6            5.28391089 -3895.20980426     0.        ]
Reward: 9  Episode Reward:  41
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -5.99730278e+03 -1.78980000e+02]
------
Step:20, Action:South
State  177
Old Q Values:  [    0.            0.        60661.2566939     0.       ]
New Q values:  [    0.        60005.4       60661.2566939     0.       ]
Reward: 100009  Episode Reward:  100050
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.1803238   8.25303748 10.23547692  0.        ]
------
Step:1, Action:East
State  182
Old Q Values:  [0.   0.   7.56 0.  ]
New Q values:  [ 0.          0.         41.53651403  0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ -0.6          2.56078987 110.37504677   0.        ]
------
Step:2, Action:East
State  192
Old Q Values:  [-9252.43762121    22.23326427 18009.42445695     0.        ]
New Q values:  [-9252.43762121    22.23326427  7342.84244025     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  445.5755249   -361.47821843 -2651.70614553 -3344.18956062]
------
Step:3, Action:North
State  208
Old Q Values:  [  445.5755249   -361.47821843 -2651.70614553 -3344.18956062]
New Q values:  [  464.03366673  -361.47821843 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6         548.663172   -180.00807518  934.67818922]
------
Step:4, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -3.03571173e+01 -1.80600000e+02 -1.36688949e-01]
New Q values:  [-180.6         -30.35711735 -180.6         603.33784603]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.         1993.30840536    0.        ]
------
Step:5, Action:East
State  114
Old Q Values:  [-180.6           0.         1993.30840536    0.        ]
New Q values:  [-180.6           0.          977.72471595    0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x. ax
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -30.35711735 -180.6         603.33784603]
------
Step:6, Action:West
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6         603.33784603]
New Q values:  [-180.6         -30.35711735 -180.6         534.0525532 ]
Reward: -1  Episode Reward:  34
xxxxx
x.a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.          977.72471595    0.        ]
------
Step:7, Action:East
State  115
Old Q Values:  [-180.6           2.72750831    0.            0.        ]
New Q values:  [-180.6           2.72750831  279.80345677    0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x. ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6         548.663172   -180.00807518  934.67818922]
------
Step:8, Action:West
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6         534.0525532 ]
New Q values:  [-180.6         -30.35711735 -180.6         296.96205831]
Reward: -1  Episode Reward:  32
xxxxx
x.a x
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6           2.72750831  279.80345677    0.        ]
------
Step:9, Action:East
State  113
Old Q Values:  [  0.        264.6715651   0.          0.       ]
New Q values:  [    0.           264.6715651  -5999.38454759     0.        ]
Reward: -10001  Episode Reward:  -9969
xxxxx
x. gx
x   x
x. .x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -6.50729832e-01 -3.93874818e-01]
------
Step:1, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.50729832e-01 -3.93874818e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.50729832e-01  5.26729273e+00]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02 -2.58259038e-02 -2.52781922e+02]
------
Step:2, Action:South
State  111
Old Q Values:  [ 0.         12.1823243   0.10609835  0.        ]
New Q values:  [ 0.         12.00546234  0.10609835  0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    5.77510873 -445.90112872 -180.6       ]
------
Step:3, Action:South
State  180
Old Q Values:  [-3431.06190038  -301.13712        0.             0.        ]
New Q values:  [-3431.06190038  -402.943968       0.             0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
xg .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -959.6304 -6457.4598 -6000.6    -6307.02  ]
------
Step:4, Action:East
State  261
Old Q Values:  [ 1.07548721e+00 -2.89595345e+02 -3.26713109e+03 -1.80600000e+02]
New Q values:  [ 1.07548721e+00 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  -0.192      -180.6           7.76916747  -48.78      ]
------
Step:5, Action:East
State  277
Old Q Values:  [-0.14688  0.       0.       0.     ]
New Q values:  [-1.46880000e-01  0.00000000e+00  2.16854576e+03  0.00000000e+00]
Reward: 9  Episode Reward:  45
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7210.48586512 -6251.71315483 -6173.56321028    30.3001388 ]
------
Step:6, Action:West
State  288
Old Q Values:  [ 7210.48586512 -6251.71315483 -6173.56321028    30.3001388 ]
New Q values:  [ 7210.48586512 -6251.71315483 -6173.56321028    13.85080576]
Reward: -1  Episode Reward:  44
xxxxx
x  .x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  -0.192      -180.6           7.76916747  -48.78      ]
------
Step:7, Action:East
State  277
Old Q Values:  [-1.46880000e-01  0.00000000e+00  2.16854576e+03  0.00000000e+00]
New Q values:  [-1.46880000e-01  0.00000000e+00  3.02996406e+03  0.00000000e+00]
Reward: -1  Episode Reward:  43
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7210.48586512 -6251.71315483 -6173.56321028    13.85080576]
------
Step:8, Action:West
State  288
Old Q Values:  [ 7210.48586512 -6251.71315483 -6173.56321028    13.85080576]
New Q values:  [ 7210.48586512 -6251.71315483 -6173.56321028   913.92954131]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[-1.46880000e-01  0.00000000e+00  3.02996406e+03  0.00000000e+00]
------
Step:9, Action:West
State  276
Old Q Values:  [  -0.192      -180.6           7.76916747  -48.78      ]
New Q values:  [  -0.192      -180.6           7.76916747  -19.78935384]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1.07548721e+00 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
------
Step:10, Action:North
State  261
Old Q Values:  [ 1.07548721e+00 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
New Q values:  [    1.5627275   -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    5.77510873 -445.90112872 -180.6       ]
------
Step:11, Action:South
State  183
Old Q Values:  [-0.1803238   8.25303748 10.23547692  0.        ]
New Q values:  [-0.1803238   3.17003324 10.23547692  0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    1.5627275   -289.59534477 -1299.12168416  -180.6       ]
------
Step:12, Action:North
State  260
Old Q Values:  [ -959.6304 -6457.4598 -6000.6    -6307.02  ]
New Q values:  [ -371.99120579 -6457.4598     -6000.6        -6307.02      ]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
xa .x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[ 0.          0.         41.53651403  0.        ]
------
Step:13, Action:East
State  180
Old Q Values:  [-3431.06190038  -402.943968       0.             0.        ]
New Q values:  [-3.43106190e+03 -4.02943968e+02  2.30880000e-01  0.00000000e+00]
Reward: -1  Episode Reward:  37
xxxxx
x  .x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2398.16618312     2.7696         0.             0.        ]
------
Step:14, Action:South
State  196
Old Q Values:  [-2398.16618312     2.7696         0.             0.        ]
New Q values:  [-2398.16618312     2.83859024     0.             0.        ]
Reward: -1  Episode Reward:  36
xxxxx
xg .x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  -0.192      -180.6           7.76916747  -19.78935384]
------
Step:15, Action:East
State  276
Old Q Values:  [  -0.192      -180.6           7.76916747  -19.78935384]
New Q values:  [-1.92000000e-01 -1.80600000e+02  2.16565343e+03 -1.97893538e+01]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7210.48586512 -6251.71315483 -6173.56321028   913.92954131]
------
Step:16, Action:North
State  288
Old Q Values:  [ 7210.48586512 -6251.71315483 -6173.56321028   913.92954131]
New Q values:  [ 3028.80444607 -6251.71315483 -6173.56321028   913.92954131]
Reward: 9  Episode Reward:  44
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  464.03366673  -361.47821843 -2651.70614553 -3344.18956062]
------
Step:17, Action:North
State  208
Old Q Values:  [  464.03366673  -361.47821843 -2651.70614553 -3344.18956062]
New Q values:  [83593.43573301  -361.47821843 -2651.70614553 -3344.18956062]
Reward: 100009  Episode Reward:  100053
xxxxx
x gax
x   x
x   x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-374.31388777 -180.6          10.98972954    8.424     ]
------
Step:1, Action:East
State  273
Old Q Values:  [1.55420787e+03 0.00000000e+00 1.06394944e+00 0.00000000e+00]
New Q values:  [1554.20786664    0.          914.46691359    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x...x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3028.80444607 -6251.71315483 -6173.56321028   913.92954131]
------
Step:2, Action:North
State  288
Old Q Values:  [ 3028.80444607 -6251.71315483 -6173.56321028   913.92954131]
New Q values:  [26294.95249833 -6251.71315483 -6173.56321028   913.92954131]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[83593.43573301  -361.47821843 -2651.70614553 -3344.18956062]
------
Step:3, Action:North
State  208
Old Q Values:  [83593.43573301  -361.47821843 -2651.70614553 -3344.18956062]
New Q values:  [27443.98974561  -361.47821843 -2651.70614553 -3344.18956062]
Reward: -9991  Episode Reward:  -9973
xxxxx
x. gx
x.. x
x.  x
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[26294.95249833 -6251.71315483 -6173.56321028   913.92954131]
------
Step:1, Action:North
State  288
Old Q Values:  [26294.95249833 -6251.71315483 -6173.56321028   913.92954131]
New Q values:  [18756.57792302 -6251.71315483 -6173.56321028   913.92954131]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27443.98974561  -361.47821843 -2651.70614553 -3344.18956062]
------
Step:2, Action:North
State  208
Old Q Values:  [27443.98974561  -361.47821843 -2651.70614553 -3344.18956062]
New Q values:  [10984.21135065  -361.47821843 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  18
xxxxx
xg.ax
x.. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  4.05150802e+00 -1.79384548e+02 -2.78003473e+03]
------
Step:3, Action:South
State  130
Old Q Values:  [-180.6         548.663172   -180.00807518  934.67818922]
New Q values:  [-180.6        3514.12867399 -180.00807518  934.67818922]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10984.21135065  -361.47821843 -2651.70614553 -3344.18956062]
------
Step:4, Action:North
State  210
Old Q Values:  [  790.22991452 -1446.5611984   -180.6            3.07790274]
New Q values:  [  404.5805833  -1446.5611984   -180.6            3.07790274]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -30.35711735 -180.6         296.96205831]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6         296.96205831]
New Q values:  [-180.6         -30.35711735 -180.6         125.76501114]
Reward: 9  Episode Reward:  25
xxxxx
x.a x
xg. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -6.50729832e-01  5.26729273e+00]
------
Step:6, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  3.60867555e+00 -1.46979252e+02]
New Q values:  [-1.01561177e+04 -5.99568600e+03  3.60867555e+00 -6.05091086e+03]
Reward: -9991  Episode Reward:  -9966
xxxxx
xg  x
x.. x
x . x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-374.31388777 -180.6          10.98972954    8.424     ]
------
Step:1, Action:East
State  272
Old Q Values:  [-374.31388777 -180.6          10.98972954    8.424     ]
New Q values:  [-374.31388777 -180.6        5636.76926872    8.424     ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18756.57792302 -6251.71315483 -6173.56321028   913.92954131]
------
Step:2, Action:North
State  288
Old Q Values:  [18756.57792302 -6251.71315483 -6173.56321028   913.92954131]
New Q values:  [ 7629.4053442  -6251.71315483 -6173.56321028   913.92954131]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  404.5805833  -1446.5611984   -180.6            3.07790274]
------
Step:3, Action:North
State  210
Old Q Values:  [  404.5805833  -1446.5611984   -180.6            3.07790274]
New Q values:  [  204.96173666 -1446.5611984   -180.6            3.07790274]
Reward: 9  Episode Reward:  27
xxxxx
x .ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -30.35711735 -180.6         125.76501114]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6         125.76501114]
New Q values:  [-180.6         -30.35711735 -180.6          57.54100186]
Reward: 9  Episode Reward:  36
xxxxx
x a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  6.11665800e+00]
------
Step:5, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  6.11665800e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  1.87150586e+00]
Reward: -1  Episode Reward:  35
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  8.28088630e-02 -2.58259038e-02 -2.52781922e+02]
------
Step:6, Action:South
State  107
Old Q Values:  [-2.52351696e+02  8.28088630e-02 -2.58259038e-02 -2.52781922e+02]
New Q values:  [-2.52351696e+02  5.43312355e+00 -2.58259038e-02 -2.52781922e+02]
Reward: 9  Episode Reward:  44
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -5.99730278e+03 -1.78980000e+02]
------
Step:7, Action:South
State  181
Old Q Values:  [  -0.94832081    5.77510873 -445.90112872 -180.6       ]
New Q values:  [  -0.94832081    8.17886174 -445.90112872 -180.6       ]
Reward: 9  Episode Reward:  53
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    1.5627275   -289.59534477 -1299.12168416  -180.6       ]
------
Step:8, Action:North
State  260
Old Q Values:  [ -371.99120579 -6457.4598     -6000.6        -6307.02      ]
New Q values:  [ -149.32721832 -6457.4598     -6000.6        -6307.02      ]
Reward: -1  Episode Reward:  52
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3.43106190e+03 -4.02943968e+02  2.30880000e-01  0.00000000e+00]
------
Step:9, Action:East
State  176
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [    0.             0.         62208.25273208     0.        ]
Reward: 100009  Episode Reward:  100061
xxxxx
xg  x
x a x
x   x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -149.32721832 -6457.4598     -6000.6        -6307.02      ]
------
Step:1, Action:North
State  261
Old Q Values:  [    1.5627275   -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [    8.47874952  -289.59534477 -1299.12168416  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    8.17886174 -445.90112872 -180.6       ]
------
Step:2, Action:South
State  180
Old Q Values:  [-3.43106190e+03 -4.02943968e+02  2.30880000e-01  0.00000000e+00]
New Q values:  [-3.43106190e+03 -2.06575753e+02  2.30880000e-01  0.00000000e+00]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -149.32721832 -6457.4598     -6000.6        -6307.02      ]
------
Step:3, Action:North
State  260
Old Q Values:  [ -149.32721832 -6457.4598     -6000.6        -6307.02      ]
New Q values:  [-6060.26162333 -6457.4598     -6000.6        -6307.02      ]
Reward: -10001  Episode Reward:  -9993
xxxxx
x ..x
xg..x
x ..x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1554.20786664    0.          914.46691359    0.        ]
------
Step:1, Action:North
State  272
Old Q Values:  [-374.31388777 -180.6        5636.76926872    8.424     ]
New Q values:  [2058.52717697 -180.6        5636.76926872    8.424     ]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x.a.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[-9252.43762121    22.23326427  7342.84244025     0.        ]
------
Step:2, Action:East
State  193
Old Q Values:  [   11.09169436     0.         -7476.85600442     0.        ]
New Q values:  [ 11.09169436   0.         309.92100343   0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x. ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10984.21135065  -361.47821843 -2651.70614553 -3344.18956062]
------
Step:3, Action:South
State  208
Old Q Values:  [10984.21135065  -361.47821843 -2651.70614553 -3344.18956062]
New Q values:  [10984.21135065  2149.63031589 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7629.4053442  -6251.71315483 -6173.56321028   913.92954131]
------
Step:4, Action:North
State  288
Old Q Values:  [ 7629.4053442  -6251.71315483 -6173.56321028   913.92954131]
New Q values:  [ 6346.42554287 -6251.71315483 -6173.56321028   913.92954131]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10984.21135065  2149.63031589 -2651.70614553 -3344.18956062]
------
Step:5, Action:North
State  210
Old Q Values:  [  204.96173666 -1446.5611984   -180.6            3.07790274]
New Q values:  [ 1135.62329686 -1446.5611984   -180.6            3.07790274]
Reward: -1  Episode Reward:  25
xxxxx
x..ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        3514.12867399 -180.00807518  934.67818922]
------
Step:6, Action:South
State  130
Old Q Values:  [-180.6        3514.12867399 -180.00807518  934.67818922]
New Q values:  [-180.6        1745.73845866 -180.00807518  934.67818922]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1135.62329686 -1446.5611984   -180.6            3.07790274]
------
Step:7, Action:North
State  208
Old Q Values:  [10984.21135065  2149.63031589 -2651.70614553 -3344.18956062]
New Q values:  [ 4916.80607786  2149.63031589 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  23
xxxxx
x..ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        1745.73845866 -180.00807518  934.67818922]
------
Step:8, Action:West
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6          57.54100186]
New Q values:  [-180.6         -30.35711735 -180.6          28.9778525 ]
Reward: 9  Episode Reward:  32
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  1.87150586e+00]
------
Step:9, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.50729832e-01  5.26729273e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.50729832e-01  9.13685416e+00]
Reward: 9  Episode Reward:  41
xxxxx
xa  x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  5.43312355e+00 -2.58259038e-02 -2.52781922e+02]
------
Step:10, Action:South
State  107
Old Q Values:  [-2.52351696e+02  5.43312355e+00 -2.58259038e-02 -2.52781922e+02]
New Q values:  [-2.52351696e+02  7.66123994e+00 -2.58259038e-02 -2.52781922e+02]
Reward: 9  Episode Reward:  50
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[-0.11058345  0.          0.29330173  0.        ]
------
Step:11, Action:East
State  185
Old Q Values:  [-6.00000000e-01  0.00000000e+00 -5.99730278e+03 -1.78980000e+02]
New Q values:  [-6.00000000e-01  0.00000000e+00 -2.39790111e+03 -1.78980000e+02]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[1.52371192 0.         0.         5.4       ]
------
Step:12, Action:West
State  201
Old Q Values:  [1.52371192 0.         0.         5.4       ]
New Q values:  [1.52371192 0.         0.         1.56      ]
Reward: -1  Episode Reward:  48
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -2.39790111e+03 -1.78980000e+02]
------
Step:13, Action:South
State  177
Old Q Values:  [    0.        60005.4       60661.2566939     0.       ]
New Q values:  [    0.        84007.56      60661.2566939     0.       ]
Reward: 100009  Episode Reward:  100057
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[ 11.09169436   0.         309.92100343   0.        ]
------
Step:1, Action:North
State  193
Old Q Values:  [ 11.09169436   0.         309.92100343   0.        ]
New Q values:  [ 11.30318871   0.         309.92100343   0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x.agx
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.60215563e+03  4.88836988e+00]
------
Step:2, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  1.87150586e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  8.44697433e+00]
Reward: 9  Episode Reward:  18
xxxxx
xa .x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  7.66123994e+00 -2.58259038e-02 -2.52781922e+02]
------
Step:3, Action:South
State  111
Old Q Values:  [ 0.         12.00546234  0.10609835  0.        ]
New Q values:  [ 0.         11.02621901  0.10609835  0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[-2.98870365e-02  2.74678026e+00 -5.98845944e+03 -1.80600000e+02]
------
Step:4, Action:South
State  189
Old Q Values:  [-2.98870365e-02  2.74678026e+00 -5.98845944e+03 -1.80600000e+02]
New Q values:  [-2.98870365e-02  9.04233696e+00 -5.98845944e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    8.47874952  -289.59534477 -1299.12168416  -180.6       ]
------
Step:5, Action:North
State  261
Old Q Values:  [    8.47874952  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [    5.86214289  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.1803238   3.17003324 10.23547692  0.        ]
------
Step:6, Action:East
State  189
Old Q Values:  [-2.98870365e-02  9.04233696e+00 -5.98845944e+03 -1.80600000e+02]
New Q values:  [-2.98870365e-02  9.04233696e+00 -2.39590675e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[0.13244338 0.25674    0.         0.        ]
------
Step:7, Action:South
State  199
Old Q Values:  [-0.6  0.   0.   0. ]
New Q values:  [-6.00000000e-01  9.14389219e+02  0.00000000e+00  0.00000000e+00]
Reward: 9  Episode Reward:  43
xxxxx
x  .x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[-1.46880000e-01  0.00000000e+00  3.02996406e+03  0.00000000e+00]
------
Step:8, Action:West
State  276
Old Q Values:  [-1.92000000e-01 -1.80600000e+02  2.16565343e+03 -1.97893538e+01]
New Q values:  [-1.92000000e-01 -1.80600000e+02  2.16565343e+03 -6.75709867e+00]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    5.86214289  -289.59534477 -1299.12168416  -180.6       ]
------
Step:9, Action:North
State  260
Old Q Values:  [-6060.26162333 -6457.4598     -6000.6        -6307.02      ]
New Q values:  [-2412.24369512 -6457.4598     -6000.6        -6307.02      ]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[ 0.          0.         41.53651403  0.        ]
------
Step:10, Action:East
State  180
Old Q Values:  [-3.43106190e+03 -2.06575753e+02  2.30880000e-01  0.00000000e+00]
New Q values:  [-3.43106190e+03 -2.06575753e+02  3.43929072e-01  0.00000000e+00]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2398.16618312     2.83859024     0.             0.        ]
------
Step:11, Action:South
State  196
Old Q Values:  [-2398.16618312     2.83859024     0.             0.        ]
New Q values:  [-2398.16618312   650.23146405     0.             0.        ]
Reward: -1  Episode Reward:  39
xxxxx
xg .x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-1.92000000e-01 -1.80600000e+02  2.16565343e+03 -6.75709867e+00]
------
Step:12, Action:East
State  276
Old Q Values:  [-1.92000000e-01 -1.80600000e+02  2.16565343e+03 -6.75709867e+00]
New Q values:  [-1.92000000e-01 -1.80600000e+02  2.77558903e+03 -6.75709867e+00]
Reward: 9  Episode Reward:  48
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6346.42554287 -6251.71315483 -6173.56321028   913.92954131]
------
Step:13, Action:North
State  288
Old Q Values:  [ 6346.42554287 -6251.71315483 -6173.56321028   913.92954131]
New Q values:  [ 4013.01204051 -6251.71315483 -6173.56321028   913.92954131]
Reward: -1  Episode Reward:  47
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4916.80607786  2149.63031589 -2651.70614553 -3344.18956062]
------
Step:14, Action:North
State  208
Old Q Values:  [ 4916.80607786  2149.63031589 -2651.70614553 -3344.18956062]
New Q values:  [62495.84396874  2149.63031589 -2651.70614553 -3344.18956062]
Reward: 100009  Episode Reward:  100056
xxxxx
x  ax
xg  x
x   x
xxxxx
Episode # 100
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -30.35711735 -180.6          28.9778525 ]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6          28.9778525 ]
New Q values:  [-180.6         -30.35711735 -180.6          19.5252333 ]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  8.44697433e+00]
------
Step:2, Action:West
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  8.44697433e+00]
New Q values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  1.20866554e+01]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[ 0.         11.02621901  0.10609835  0.        ]
------
Step:3, Action:South
State  111
Old Q Values:  [ 0.         11.02621901  0.10609835  0.        ]
New Q values:  [ 0.         12.88113068  0.10609835  0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.1803238   3.17003324 10.23547692  0.        ]
------
Step:4, Action:East
State  187
Old Q Values:  [-0.11058345  0.          0.29330173  0.        ]
New Q values:  [-0.11058345  0.         13.89361422  0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    27.92097842     0.        ]
------
Step:5, Action:East
State  202
Old Q Values:  [    0.         -5884.35407458    27.92097842     0.        ]
New Q values:  [    0.         -5884.35407458    12.23711243     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[0.         5.56240353 0.         0.        ]
------
Step:6, Action:South
State  216
Old Q Values:  [-9.38271823e+02  3.51557501e-02 -6.17035694e+03  1.76450980e+00]
New Q values:  [-9.38271823e+02  1.20931767e+03 -6.17035694e+03  1.76450980e+00]
Reward: 9  Episode Reward:  44
xxxxx
x   x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4013.01204051 -6251.71315483 -6173.56321028   913.92954131]
------
Step:7, Action:North
State  288
Old Q Values:  [ 4013.01204051 -6251.71315483 -6173.56321028   913.92954131]
New Q values:  [ 1967.40011854 -6251.71315483 -6173.56321028   913.92954131]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02  1.20931767e+03 -6.17035694e+03  1.76450980e+00]
------
Step:8, Action:South
State  218
Old Q Values:  [0.         5.56240353 0.         0.        ]
New Q values:  [  0.         591.84499697   0.           0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1967.40011854 -6251.71315483 -6173.56321028   913.92954131]
------
Step:9, Action:North
State  288
Old Q Values:  [ 1967.40011854 -6251.71315483 -6173.56321028   913.92954131]
New Q values:  [  963.91354651 -6251.71315483 -6173.56321028   913.92954131]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  0.         591.84499697   0.           0.        ]
------
Step:10, Action:West
State  216
Old Q Values:  [-9.38271823e+02  1.20931767e+03 -6.17035694e+03  1.76450980e+00]
New Q values:  [-9.38271823e+02  1.20931767e+03 -6.17035694e+03  5.73803921e-01]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[1.52371192 0.         0.         1.56      ]
------
Step:11, Action:West
State  201
Old Q Values:  [1.52371192 0.         0.         1.56      ]
New Q values:  [1.52371192 0.         0.         0.024     ]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -2.39790111e+03 -1.78980000e+02]
------
Step:12, Action:South
State  189
Old Q Values:  [-2.98870365e-02  9.04233696e+00 -2.39590675e+03 -1.80600000e+02]
New Q values:  [-2.98870365e-02  1.07755776e+01 -2.39590675e+03 -1.80600000e+02]
Reward: 9  Episode Reward:  48
xxxxx
x g x
x   x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    5.86214289  -289.59534477 -1299.12168416  -180.6       ]
------
Step:13, Action:North
State  261
Old Q Values:  [    5.86214289  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [    4.97753045  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[-2.98870365e-02  1.07755776e+01 -2.39590675e+03 -1.80600000e+02]
------
Step:14, Action:South
State  189
Old Q Values:  [-2.98870365e-02  1.07755776e+01 -2.39590675e+03 -1.80600000e+02]
New Q values:  [-2.98870365e-02  5.20349019e+00 -2.39590675e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x  gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    4.97753045  -289.59534477 -1299.12168416  -180.6       ]
------
Step:15, Action:North
State  261
Old Q Values:  [    4.97753045  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [    2.31097976  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[3.06655861 0.         0.         0.        ]
------
Step:16, Action:North
State  189
Old Q Values:  [-2.98870365e-02  5.20349019e+00 -2.39590675e+03 -1.80600000e+02]
New Q values:  [    3.25238439     5.20349019 -2395.90675455  -180.6       ]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[ 0.         12.88113068  0.10609835  0.        ]
------
Step:17, Action:South
State  109
Old Q Values:  [-1.80600000e+02  1.30304437e+00 -2.78131338e+03 -1.80600000e+02]
New Q values:  [-1.80600000e+02  1.48226480e+00 -2.78131338e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  43
xxxxx
x  gx
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    3.25238439     5.20349019 -2395.90675455  -180.6       ]
------
Step:18, Action:South
State  189
Old Q Values:  [    3.25238439     5.20349019 -2395.90675455  -180.6       ]
New Q values:  [ 3.25238439e+00  2.17469001e+00 -2.39590675e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x   x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    2.31097976  -289.59534477 -1299.12168416  -180.6       ]
------
Step:19, Action:North
State  261
Old Q Values:  [    2.31097976  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [    1.30010722  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 3.25238439e+00  2.17469001e+00 -2.39590675e+03 -1.80600000e+02]
------
Step:20, Action:North
State  189
Old Q Values:  [ 3.25238439e+00  2.17469001e+00 -2.39590675e+03 -1.80600000e+02]
New Q values:  [ 4.56529296e+00  2.17469001e+00 -2.39590675e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  40
xxxxx
xa  x
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[ 0.         12.88113068  0.10609835  0.        ]
------
Step:21, Action:South
State  111
Old Q Values:  [ 0.         12.88113068  0.10609835  0.        ]
New Q values:  [0.         5.92204016 0.10609835 0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xag x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 4.56529296e+00  2.17469001e+00 -2.39590675e+03 -1.80600000e+02]
------
Step:22, Action:North
State  188
Old Q Values:  [-8398.9776192   -92.706         0.            0.       ]
New Q values:  [-3359.72551094   -92.706          0.             0.        ]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1344.842304       1.55178915     0.        ]
------
Step:23, Action:East
State  110
Old Q Values:  [ -180.6        -1344.842304       1.55178915     0.        ]
New Q values:  [ -180.6        -1344.842304       2.76177191     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -6.50729832e-01  9.13685416e+00]
------
Step:24, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.50729832e-01  9.13685416e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.50729832e-01  3.88327323e+00]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1344.842304       2.76177191     0.        ]
------
Step:25, Action:East
State  108
Old Q Values:  [-6.18060000e+03  8.26947840e+00  1.14369011e+00  0.00000000e+00]
New Q values:  [-6.18060000e+03  8.26947840e+00  9.40078706e-01  0.00000000e+00]
Reward: -1  Episode Reward:  35
xxxxx
xga x
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-1.01561177e+04 -5.99568600e+03  3.60867555e+00 -6.05091086e+03]
------
Step:26, Action:East
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  3.60867555e+00 -6.05091086e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  2.05892262e+00 -6.05091086e+03]
Reward: -1  Episode Reward:  34
xxxxx
x gax
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  4.05150802e+00 -1.79384548e+02 -2.78003473e+03]
------
Step:27, Action:South
State  138
Old Q Values:  [-180.6         -30.35711735 -180.6          19.5252333 ]
New Q values:  [-180.6        350.0524554 -180.6         19.5252333]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02  1.20931767e+03 -6.17035694e+03  5.73803921e-01]
------
Step:28, Action:South
State  216
Old Q Values:  [-9.38271823e+02  1.20931767e+03 -6.17035694e+03  5.73803921e-01]
New Q values:  [-9.38271823e+02  7.72301134e+02 -6.17035694e+03  5.73803921e-01]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x  gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  963.91354651 -6251.71315483 -6173.56321028   913.92954131]
------
Step:29, Action:West
State  288
Old Q Values:  [  963.91354651 -6251.71315483 -6173.56321028   913.92954131]
New Q values:  [  963.91354651 -6251.71315483 -6173.56321028 62062.00259714]
Reward: 100009  Episode Reward:  100041
xxxxx
x   x
x g x
x a x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ -0.6          2.56078987 110.37504677   0.        ]
------
Step:1, Action:East
State  192
Old Q Values:  [-9252.43762121    22.23326427  7342.84244025     0.        ]
New Q values:  [-9252.43762121    22.23326427 21691.29016672     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[62495.84396874  2149.63031589 -2651.70614553 -3344.18956062]
------
Step:2, Action:North
State  208
Old Q Values:  [62495.84396874  2149.63031589 -2651.70614553 -3344.18956062]
New Q values:  [25108.75332412  2149.63031589 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        350.0524554 -180.6         19.5252333]
------
Step:3, Action:South
State  130
Old Q Values:  [-180.6        1745.73845866 -180.00807518  934.67818922]
New Q values:  [-180.6        8230.3213807  -180.00807518  934.67818922]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25108.75332412  2149.63031589 -2651.70614553 -3344.18956062]
------
Step:4, Action:North
State  208
Old Q Values:  [25108.75332412  2149.63031589 -2651.70614553 -3344.18956062]
New Q values:  [10044.11678205  2149.63031589 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  16
xxxxx
xg.ax
x.  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  4.05150802e+00 -1.79384548e+02 -2.78003473e+03]
------
Step:5, Action:South
State  136
Old Q Values:  [-6.18060000e+03  4.05150802e+00 -1.79384548e+02 -2.78003473e+03]
New Q values:  [-6180.6         3014.25563782  -179.38454759 -2780.03472576]
Reward: -1  Episode Reward:  15
xxxxx
x.g x
x. ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10044.11678205  2149.63031589 -2651.70614553 -3344.18956062]
------
Step:6, Action:North
State  208
Old Q Values:  [10044.11678205  2149.63031589 -2651.70614553 -3344.18956062]
New Q values:  [ 4921.32340417  2149.63031589 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  14
xxxxx
xg.ax
x.  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6         3014.25563782  -179.38454759 -2780.03472576]
------
Step:7, Action:South
State  136
Old Q Values:  [-6180.6         3014.25563782  -179.38454759 -2780.03472576]
New Q values:  [-6180.6         2681.49927638  -179.38454759 -2780.03472576]
Reward: -1  Episode Reward:  13
xxxxx
x.g x
x. ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4921.32340417  2149.63031589 -2651.70614553 -3344.18956062]
------
Step:8, Action:North
State  208
Old Q Values:  [ 4921.32340417  2149.63031589 -2651.70614553 -3344.18956062]
New Q values:  [-3227.62085542  2149.63031589 -2651.70614553 -3344.18956062]
Reward: -10001  Episode Reward:  -9988
xxxxx
x..gx
x.  x
x ..x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        350.0524554 -180.6         19.5252333]
------
Step:1, Action:South
State  138
Old Q Values:  [-180.6        350.0524554 -180.6         19.5252333]
New Q values:  [-180.6         486.10797122 -180.6          19.5252333 ]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1135.62329686 -1446.5611984   -180.6            3.07790274]
------
Step:2, Action:North
State  208
Old Q Values:  [-3227.62085542  2149.63031589 -2651.70614553 -3344.18956062]
New Q values:  [-1145.8159508   2149.63031589 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         486.10797122 -180.6          19.5252333 ]
------
Step:3, Action:South
State  136
Old Q Values:  [-6180.6         2681.49927638  -179.38454759 -2780.03472576]
New Q values:  [-6180.6         1303.69005067  -179.38454759 -2780.03472576]
Reward: -1  Episode Reward:  7
xxxxx
xg. x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02  7.72301134e+02 -6.17035694e+03  5.73803921e-01]
------
Step:4, Action:South
State  208
Old Q Values:  [-1145.8159508   2149.63031589 -2651.70614553 -3344.18956062]
New Q values:  [-1145.8159508  19483.8529055  -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  16
xxxxx
x.. x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  963.91354651 -6251.71315483 -6173.56321028 62062.00259714]
------
Step:5, Action:West
State  288
Old Q Values:  [  963.91354651 -6251.71315483 -6173.56321028 62062.00259714]
New Q values:  [  963.91354651 -6251.71315483 -6173.56321028 26521.23181947]
Reward: 9  Episode Reward:  25
xxxxx
xg. x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[2058.52717697 -180.6        5636.76926872    8.424     ]
------
Step:6, Action:East
State  272
Old Q Values:  [2058.52717697 -180.6        5636.76926872    8.424     ]
New Q values:  [ 2.05852718e+03 -1.80600000e+02  1.02104773e+04  8.42400000e+00]
Reward: -1  Episode Reward:  24
xxxxx
x.g x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  963.91354651 -6251.71315483 -6173.56321028 26521.23181947]
------
Step:7, Action:West
State  288
Old Q Values:  [  963.91354651 -6251.71315483 -6173.56321028 26521.23181947]
New Q values:  [  963.91354651 -6251.71315483 -6173.56321028 13671.03590379]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2.05852718e+03 -1.80600000e+02  1.02104773e+04  8.42400000e+00]
------
Step:8, Action:East
State  273
Old Q Values:  [1554.20786664    0.          914.46691359    0.        ]
New Q values:  [1554.20786664    0.         4466.49753657    0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
x .gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  963.91354651 -6251.71315483 -6173.56321028 13671.03590379]
------
Step:9, Action:West
State  288
Old Q Values:  [  963.91354651 -6251.71315483 -6173.56321028 13671.03590379]
New Q values:  [  963.91354651 -6251.71315483 -6173.56321028  8530.95753751]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2.05852718e+03 -1.80600000e+02  1.02104773e+04  8.42400000e+00]
------
Step:10, Action:East
State  273
Old Q Values:  [1554.20786664    0.         4466.49753657    0.        ]
New Q values:  [1554.20786664    0.         4345.28627588    0.        ]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
x .gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  963.91354651 -6251.71315483 -6173.56321028  8530.95753751]
------
Step:11, Action:West
State  288
Old Q Values:  [  963.91354651 -6251.71315483 -6173.56321028  8530.95753751]
New Q values:  [  963.91354651 -6251.71315483 -6173.56321028  6474.92619101]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2.05852718e+03 -1.80600000e+02  1.02104773e+04  8.42400000e+00]
------
Step:12, Action:East
State  272
Old Q Values:  [ 2.05852718e+03 -1.80600000e+02  1.02104773e+04  8.42400000e+00]
New Q values:  [2058.52717697 -180.6        6026.06875863    8.424     ]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  963.91354651 -6251.71315483 -6173.56321028  6474.92619101]
------
Step:13, Action:North
State  288
Old Q Values:  [  963.91354651 -6251.71315483 -6173.56321028  6474.92619101]
New Q values:  [ 6230.12129025 -6251.71315483 -6173.56321028  6474.92619101]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1145.8159508  19483.8529055  -2651.70614553 -3344.18956062]
------
Step:14, Action:South
State  208
Old Q Values:  [-1145.8159508  19483.8529055  -2651.70614553 -3344.18956062]
New Q values:  [-1145.8159508   9735.4190195  -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6230.12129025 -6251.71315483 -6173.56321028  6474.92619101]
------
Step:15, Action:West
State  288
Old Q Values:  [ 6230.12129025 -6251.71315483 -6173.56321028  6474.92619101]
New Q values:  [ 6230.12129025 -6251.71315483 -6173.56321028  4397.19110399]
Reward: -1  Episode Reward:  15
xxxxx
xg. x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[2058.52717697 -180.6        6026.06875863    8.424     ]
------
Step:16, Action:East
State  272
Old Q Values:  [2058.52717697 -180.6        6026.06875863    8.424     ]
New Q values:  [2058.52717697 -180.6        4278.86389053    8.424     ]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6230.12129025 -6251.71315483 -6173.56321028  4397.19110399]
------
Step:17, Action:North
State  288
Old Q Values:  [ 6230.12129025 -6251.71315483 -6173.56321028  4397.19110399]
New Q values:  [ 5412.07422195 -6251.71315483 -6173.56321028  4397.19110399]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1145.8159508   9735.4190195  -2651.70614553 -3344.18956062]
------
Step:18, Action:South
State  210
Old Q Values:  [ 1135.62329686 -1446.5611984   -180.6            3.07790274]
New Q values:  [1135.62329686 1044.39778723 -180.6           3.07790274]
Reward: -1  Episode Reward:  12
xxxxx
x.. x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5412.07422195 -6251.71315483 -6173.56321028  4397.19110399]
------
Step:19, Action:North
State  288
Old Q Values:  [ 5412.07422195 -6251.71315483 -6173.56321028  4397.19110399]
New Q values:  [ 5084.85539463 -6251.71315483 -6173.56321028  4397.19110399]
Reward: -1  Episode Reward:  11
xxxxx
x.. x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1145.8159508   9735.4190195  -2651.70614553 -3344.18956062]
------
Step:20, Action:South
State  208
Old Q Values:  [-1145.8159508   9735.4190195  -2651.70614553 -3344.18956062]
New Q values:  [-1145.8159508   5419.02422619 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  10
xxxxx
x.. x
x .gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5084.85539463 -6251.71315483 -6173.56321028  4397.19110399]
------
Step:21, Action:West
State  288
Old Q Values:  [ 5084.85539463 -6251.71315483 -6173.56321028  4397.19110399]
New Q values:  [ 5084.85539463 -6251.71315483 -6173.56321028  3061.86232436]
Reward: -1  Episode Reward:  9
xxxxx
x.. x
x . x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1554.20786664    0.         4345.28627588    0.        ]
------
Step:22, Action:North
State  272
Old Q Values:  [2058.52717697 -180.6        4278.86389053    8.424     ]
New Q values:  [ 861.92338482 -180.6        4278.86389053    8.424     ]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ -0.6          2.56078987 110.37504677   0.        ]
------
Step:23, Action:East
State  195
Old Q Values:  [30.44335764  0.         10.9944      0.        ]
New Q values:  [ 30.44335764   0.         344.48474906   0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1135.62329686 1044.39778723 -180.6           3.07790274]
------
Step:24, Action:North
State  208
Old Q Values:  [-1145.8159508   5419.02422619 -2651.70614553 -3344.18956062]
New Q values:  [ 2010.17003389  5419.02422619 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        8230.3213807  -180.00807518  934.67818922]
------
Step:25, Action:West
State  130
Old Q Values:  [-180.6        8230.3213807  -180.00807518  934.67818922]
New Q values:  [-180.6        8230.3213807  -180.00807518  463.21231272]
Reward: 9  Episode Reward:  25
xxxxx
x.a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6           2.72750831  279.80345677    0.        ]
------
Step:26, Action:East
State  114
Old Q Values:  [-180.6           0.          977.72471595    0.        ]
New Q values:  [-180.6           0.         2859.58630059    0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        8230.3213807  -180.00807518  463.21231272]
------
Step:27, Action:South
State  130
Old Q Values:  [-180.6        8230.3213807  -180.00807518  463.21231272]
New Q values:  [-180.6        4917.23582014 -180.00807518  463.21231272]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2010.17003389  5419.02422619 -2651.70614553 -3344.18956062]
------
Step:28, Action:South
State  208
Old Q Values:  [ 2010.17003389  5419.02422619 -2651.70614553 -3344.18956062]
New Q values:  [ 2010.17003389  3692.46630886 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5084.85539463 -6251.71315483 -6173.56321028  3061.86232436]
------
Step:29, Action:North
State  288
Old Q Values:  [ 5084.85539463 -6251.71315483 -6173.56321028  3061.86232436]
New Q values:  [ 2374.02914691 -6251.71315483 -6173.56321028  3061.86232436]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1135.62329686 1044.39778723 -180.6           3.07790274]
------
Step:30, Action:North
State  208
Old Q Values:  [ 2010.17003389  3692.46630886 -2651.70614553 -3344.18956062]
New Q values:  [ 2278.6387596   3692.46630886 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        4917.23582014 -180.00807518  463.21231272]
------
Step:31, Action:South
State  130
Old Q Values:  [-180.6        4917.23582014 -180.00807518  463.21231272]
New Q values:  [-180.6        3074.03422071 -180.00807518  463.21231272]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2278.6387596   3692.46630886 -2651.70614553 -3344.18956062]
------
Step:32, Action:South
State  208
Old Q Values:  [ 2278.6387596   3692.46630886 -2651.70614553 -3344.18956062]
New Q values:  [ 2278.6387596   2394.94522085 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  18
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2374.02914691 -6251.71315483 -6173.56321028  3061.86232436]
------
Step:33, Action:West
State  288
Old Q Values:  [ 2374.02914691 -6251.71315483 -6173.56321028  3061.86232436]
New Q values:  [ 2374.02914691 -6251.71315483 -6173.56321028  2507.8040969 ]
Reward: -1  Episode Reward:  17
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 861.92338482 -180.6        4278.86389053    8.424     ]
------
Step:34, Action:East
State  272
Old Q Values:  [ 861.92338482 -180.6        4278.86389053    8.424     ]
New Q values:  [ 861.92338482 -180.6        2463.28678528    8.424     ]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2374.02914691 -6251.71315483 -6173.56321028  2507.8040969 ]
------
Step:35, Action:West
State  288
Old Q Values:  [ 2374.02914691 -6251.71315483 -6173.56321028  2507.8040969 ]
New Q values:  [ 2374.02914691 -6251.71315483 -6173.56321028  2306.10752153]
Reward: -1  Episode Reward:  15
xxxxx
x. gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1554.20786664    0.         4345.28627588    0.        ]
------
Step:36, Action:East
State  273
Old Q Values:  [1554.20786664    0.         4345.28627588    0.        ]
New Q values:  [1554.20786664    0.         2449.72325443    0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2374.02914691 -6251.71315483 -6173.56321028  2306.10752153]
------
Step:37, Action:West
State  288
Old Q Values:  [ 2374.02914691 -6251.71315483 -6173.56321028  2306.10752153]
New Q values:  [ 2374.02914691 -6251.71315483 -6173.56321028  1660.8290442 ]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 861.92338482 -180.6        2463.28678528    8.424     ]
------
Step:38, Action:East
State  272
Old Q Values:  [ 861.92338482 -180.6        2463.28678528    8.424     ]
New Q values:  [ 861.92338482 -180.6        1696.92345819    8.424     ]
Reward: -1  Episode Reward:  12
xxxxx
x.g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2374.02914691 -6251.71315483 -6173.56321028  1660.8290442 ]
------
Step:39, Action:North
State  288
Old Q Values:  [ 2374.02914691 -6251.71315483 -6173.56321028  1660.8290442 ]
New Q values:  [ 1667.49522502 -6251.71315483 -6173.56321028  1660.8290442 ]
Reward: -1  Episode Reward:  11
xxxxx
x. gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2278.6387596   2394.94522085 -2651.70614553 -3344.18956062]
------
Step:40, Action:South
State  208
Old Q Values:  [ 2278.6387596   2394.94522085 -2651.70614553 -3344.18956062]
New Q values:  [ 2278.6387596   1457.62665585 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  10
xxxxx
x.g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1667.49522502 -6251.71315483 -6173.56321028  1660.8290442 ]
------
Step:41, Action:North
State  288
Old Q Values:  [ 1667.49522502 -6251.71315483 -6173.56321028  1660.8290442 ]
New Q values:  [ 1349.98971789 -6251.71315483 -6173.56321028  1660.8290442 ]
Reward: -1  Episode Reward:  9
xxxxx
x. gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2278.6387596   1457.62665585 -2651.70614553 -3344.18956062]
------
Step:42, Action:South
State  208
Old Q Values:  [ 2278.6387596   1457.62665585 -2651.70614553 -3344.18956062]
New Q values:  [ 2278.6387596   1080.6993756  -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  8
xxxxx
x. gx
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1349.98971789 -6251.71315483 -6173.56321028  1660.8290442 ]
------
Step:43, Action:West
State  288
Old Q Values:  [ 1349.98971789 -6251.71315483 -6173.56321028  1660.8290442 ]
New Q values:  [ 1349.98971789 -6251.71315483 -6173.56321028  1398.64859401]
Reward: -1  Episode Reward:  7
xxxxx
x.  x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1554.20786664    0.         2449.72325443    0.        ]
------
Step:44, Action:East
State  273
Old Q Values:  [1554.20786664    0.         2449.72325443    0.        ]
New Q values:  [ 1554.20786664     0.         -4601.11612003     0.        ]
Reward: -10001  Episode Reward:  -9994
xxxxx
x.  x
x   x
x. gx
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         486.10797122 -180.6          19.5252333 ]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6         486.10797122 -180.6          19.5252333 ]
New Q values:  [-180.6         486.10797122 -180.6          16.83608995]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  1.20866554e+01]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.50729832e-01  3.88327323e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -6.50729832e-01  9.25168127e+00]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  7.66123994e+00 -2.58259038e-02 -2.52781922e+02]
------
Step:3, Action:South
State  110
Old Q Values:  [ -180.6        -1344.842304       2.76177191     0.        ]
New Q values:  [-180.6        -532.22453432    2.76177191    0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa. x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094  0.         -0.84        0.        ]
------
Step:4, Action:North
State  188
Old Q Values:  [-3359.72551094   -92.706          0.             0.        ]
New Q values:  [-1343.6616728   -92.706         0.            0.       ]
Reward: -1  Episode Reward:  26
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6        -532.22453432    2.76177191    0.        ]
------
Step:5, Action:East
State  108
Old Q Values:  [-6.18060000e+03  8.26947840e+00  9.40078706e-01  0.00000000e+00]
New Q values:  [-6.1806000e+03  8.2694784e+00  3.9370827e-01  0.0000000e+00]
Reward: -1  Episode Reward:  25
xxxxx
xga x
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-1.01561177e+04 -5.99568600e+03  2.05892262e+00 -6.05091086e+03]
------
Step:6, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -6.50729832e-01  9.25168127e+00]
New Q values:  [ -281.736      -8914.18985193   144.97209943     9.25168127]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         486.10797122 -180.6          16.83608995]
------
Step:7, Action:South
State  138
Old Q Values:  [-180.6         486.10797122 -180.6          16.83608995]
New Q values:  [-180.6         371.39668758 -180.6          16.83608995]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  0.         591.84499697   0.           0.        ]
------
Step:8, Action:South
State  210
Old Q Values:  [1135.62329686 1044.39778723 -180.6           3.07790274]
New Q values:  [1135.62329686  842.75369309 -180.6           3.07790274]
Reward: 9  Episode Reward:  32
xxxxx
x   x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1349.98971789 -6251.71315483 -6173.56321028  1398.64859401]
------
Step:9, Action:North
State  288
Old Q Values:  [ 1349.98971789 -6251.71315483 -6173.56321028  1398.64859401]
New Q values:  [  716.94938625 -6251.71315483 -6173.56321028  1398.64859401]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x .ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  0.         591.84499697   0.           0.        ]
------
Step:10, Action:West
State  216
Old Q Values:  [-9.38271823e+02  7.72301134e+02 -6.17035694e+03  5.73803921e-01]
New Q values:  [-9.38271823e+02  7.72301134e+02 -6.17035694e+03  6.08663514e+00]
Reward: 9  Episode Reward:  40
xxxxx
x   x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[1.52371192 0.         0.         0.024     ]
------
Step:11, Action:North
State  201
Old Q Values:  [1.52371192 0.         0.         0.024     ]
New Q values:  [1.47599573 0.         0.         0.024     ]
Reward: -1  Episode Reward:  39
xxxxx
x agx
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.60215563e+03  4.88836988e+00]
------
Step:12, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  2.05892262e+00 -6.05091086e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  2.05892262e+00 -2.41937917e+03]
Reward: -1  Episode Reward:  38
xxxxx
xag x
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6            5.28391089 -3895.20980426     0.        ]
------
Step:13, Action:South
State  107
Old Q Values:  [-2.52351696e+02  7.66123994e+00 -2.58259038e-02 -2.52781922e+02]
New Q values:  [-2.52351696e+02  2.46449597e+00 -2.58259038e-02 -2.52781922e+02]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -2.39790111e+03 -1.78980000e+02]
------
Step:14, Action:South
State  188
Old Q Values:  [-1343.6616728   -92.706         0.            0.       ]
New Q values:  [-1343.6616728   -755.35550854     0.             0.        ]
Reward: 9  Episode Reward:  46
xxxxx
x   x
xg  x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2412.24369512 -6457.4598     -6000.6        -6307.02      ]
------
Step:15, Action:East
State  257
Old Q Values:  [  -0.6 -180.6    0.     0. ]
New Q values:  [-6.0000000e-01 -1.8060000e+02  6.0514477e+04  0.0000000e+00]
Reward: 100009  Episode Reward:  100055
xxxxx
x   x
x g x
x a x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[-9252.43762121    22.23326427 21691.29016672     0.        ]
------
Step:1, Action:East
State  193
Old Q Values:  [ 11.30318871   0.         309.92100343   0.        ]
New Q values:  [ 11.30318871   0.         361.05874149   0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-9.38271823e+02  7.72301134e+02 -6.17035694e+03  6.08663514e+00]
------
Step:2, Action:South
State  208
Old Q Values:  [ 2278.6387596   1080.6993756  -2651.70614553 -3344.18956062]
New Q values:  [ 2278.6387596    857.27432844 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
x. gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  716.94938625 -6251.71315483 -6173.56321028  1398.64859401]
------
Step:3, Action:West
State  288
Old Q Values:  [  716.94938625 -6251.71315483 -6173.56321028  1398.64859401]
New Q values:  [  716.94938625 -6251.71315483 -6173.56321028  1073.93647506]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 861.92338482 -180.6        1696.92345819    8.424     ]
------
Step:4, Action:East
State  273
Old Q Values:  [ 1554.20786664     0.         -4601.11612003     0.        ]
New Q values:  [ 1554.20786664     0.         -1518.86550549     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x. .x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  716.94938625 -6251.71315483 -6173.56321028  1073.93647506]
------
Step:5, Action:West
State  288
Old Q Values:  [  716.94938625 -6251.71315483 -6173.56321028  1073.93647506]
New Q values:  [  716.94938625 -6251.71315483 -6173.56321028   895.23695002]
Reward: -1  Episode Reward:  25
xxxxx
x. gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1554.20786664     0.         -1518.86550549     0.        ]
------
Step:6, Action:North
State  273
Old Q Values:  [ 1554.20786664     0.         -1518.86550549     0.        ]
New Q values:  [  729.4007691      0.         -1518.86550549     0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x. .x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[ 11.30318871   0.         361.05874149   0.        ]
------
Step:7, Action:North
State  193
Old Q Values:  [ 11.30318871   0.         361.05874149   0.        ]
New Q values:  [  5.38778645   0.         361.05874149   0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x.agx
x.  x
x.  x
xxxxx
Step:8, Action:North
State  123
Old Q Values:  [-2.68351473e+02 -6.00060000e+03 -2.69821910e-01  1.20866554e+01]
New Q values:  [-2.84314593e+02 -6.00060000e+03 -2.69821910e-01  1.20866554e+01]
Reward: -301  Episode Reward:  -278
xxxxx
x.a.x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.84314593e+02 -6.00060000e+03 -2.69821910e-01  1.20866554e+01]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -8914.18985193   144.97209943     9.25168127]
New Q values:  [ -281.736      -8914.18985193   144.97209943     9.8400213 ]
Reward: 9  Episode Reward:  -269
xxxxx
xa .x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  2.46449597e+00 -2.58259038e-02 -2.52781922e+02]
------
Step:10, Action:South
State  102
Old Q Values:  [-1.8060e+02 -6.0006e+03 -6.0000e-01  0.0000e+00]
New Q values:  [-1.80600000e+02 -8.39473682e+03 -6.00000000e-01  0.00000000e+00]
Reward: -9991  Episode Reward:  -10260
xxxxx
x  .x
xg  x
x.  x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  5.38778645   0.         361.05874149   0.        ]
------
Step:1, Action:East
State  193
Old Q Values:  [  5.38778645   0.         361.05874149   0.        ]
New Q values:  [    5.38778645     0.         -5166.58487553     0.        ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x.. x
x. gx
x...x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2278.6387596    857.27432844 -2651.70614553 -3344.18956062]
------
Step:1, Action:North
State  216
Old Q Values:  [-9.38271823e+02  7.72301134e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 2.11982861e+01  7.72301134e+02 -6.17035694e+03  6.08663514e+00]
Reward: 9  Episode Reward:  9
xxxxx
xg ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6         1303.69005067  -179.38454759 -2780.03472576]
------
Step:2, Action:South
State  136
Old Q Values:  [-6180.6         1303.69005067  -179.38454759 -2780.03472576]
New Q values:  [-6180.6         1204.46764815  -179.38454759 -2780.03472576]
Reward: -1  Episode Reward:  8
xxxxx
x.g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2278.6387596    857.27432844 -2651.70614553 -3344.18956062]
------
Step:3, Action:North
State  216
Old Q Values:  [ 2.11982861e+01  7.72301134e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 3.69219609e+02  7.72301134e+02 -6.17035694e+03  6.08663514e+00]
Reward: -1  Episode Reward:  7
xxxxx
xg ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6         1204.46764815  -179.38454759 -2780.03472576]
------
Step:4, Action:South
State  138
Old Q Values:  [-180.6         371.39668758 -180.6          16.83608995]
New Q values:  [-180.6         379.64901515 -180.6          16.83608995]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.69219609e+02  7.72301134e+02 -6.17035694e+03  6.08663514e+00]
------
Step:5, Action:South
State  210
Old Q Values:  [1135.62329686  842.75369309 -180.6           3.07790274]
New Q values:  [1135.62329686  611.07256224 -180.6           3.07790274]
Reward: 9  Episode Reward:  15
xxxxx
x.  x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  716.94938625 -6251.71315483 -6173.56321028   895.23695002]
------
Step:6, Action:West
State  288
Old Q Values:  [  716.94938625 -6251.71315483 -6173.56321028   895.23695002]
New Q values:  [  716.94938625 -6251.71315483 -6173.56321028 -5127.42818254]
Reward: -9991  Episode Reward:  -9976
xxxxx
x.  x
x.. x
x.g x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6        -532.22453432    2.76177191    0.        ]
------
Step:1, Action:East
State  111
Old Q Values:  [0.         5.92204016 0.10609835 0.        ]
New Q values:  [ 0.          5.92204016 48.93406917  0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8914.18985193   144.97209943     9.8400213 ]
------
Step:2, Action:East
State  123
Old Q Values:  [-2.84314593e+02 -6.00060000e+03 -2.69821910e-01  1.20866554e+01]
New Q values:  [ -284.31459256 -6000.6          119.18677578    12.08665543]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         379.64901515 -180.6          16.83608995]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6         379.64901515 -180.6          16.83608995]
New Q values:  [-180.6         379.64901515 -180.6          41.89046871]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6          119.18677578    12.08665543]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -8914.18985193   144.97209943     9.8400213 ]
New Q values:  [ -281.736      -8914.18985193   171.28354432     9.8400213 ]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         379.64901515 -180.6          41.89046871]
------
Step:5, Action:South
State  138
Old Q Values:  [-180.6         379.64901515 -180.6          41.89046871]
New Q values:  [-180.6         334.81310515 -180.6          41.89046871]
Reward: 9  Episode Reward:  25
xxxxx
x   x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  0.         591.84499697   0.           0.        ]
------
Step:6, Action:South
State  216
Old Q Values:  [ 3.69219609e+02  7.72301134e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 3.69219609e+02  5.29405269e+02 -6.17035694e+03  6.08663514e+00]
Reward: 9  Episode Reward:  34
xxxxx
x   x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  716.94938625 -6251.71315483 -6173.56321028 -5127.42818254]
------
Step:7, Action:North
State  288
Old Q Values:  [  716.94938625 -6251.71315483 -6173.56321028 -5127.42818254]
New Q values:  [  445.00133531 -6251.71315483 -6173.56321028 -5127.42818254]
Reward: -1  Episode Reward:  33
xxxxx
xg  x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.69219609e+02  5.29405269e+02 -6.17035694e+03  6.08663514e+00]
------
Step:8, Action:South
State  216
Old Q Values:  [ 3.69219609e+02  5.29405269e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 3.69219609e+02  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
Reward: -1  Episode Reward:  32
xxxxx
xg  x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  445.00133531 -6251.71315483 -6173.56321028 -5127.42818254]
------
Step:9, Action:North
State  288
Old Q Values:  [  445.00133531 -6251.71315483 -6173.56321028 -5127.42818254]
New Q values:  [  288.16641679 -6251.71315483 -6173.56321028 -5127.42818254]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.69219609e+02  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
------
Step:10, Action:North
State  210
Old Q Values:  [1135.62329686  611.07256224 -180.6           3.07790274]
New Q values:  [ 554.09325029  611.07256224 -180.6           3.07790274]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x . x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         334.81310515 -180.6          41.89046871]
------
Step:11, Action:South
State  138
Old Q Values:  [-180.6         334.81310515 -180.6          41.89046871]
New Q values:  [-180.6         244.09112472 -180.6          41.89046871]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.69219609e+02  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
------
Step:12, Action:North
State  210
Old Q Values:  [ 554.09325029  611.07256224 -180.6           3.07790274]
New Q values:  [ 294.26463753  611.07256224 -180.6           3.07790274]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
x . x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         244.09112472 -180.6          41.89046871]
------
Step:13, Action:South
State  138
Old Q Values:  [-180.6         244.09112472 -180.6          41.89046871]
New Q values:  [-180.6         207.80233255 -180.6          41.89046871]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.69219609e+02  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
------
Step:14, Action:North
State  216
Old Q Values:  [ 3.69219609e+02  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 5.08428138e+02  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
Reward: -1  Episode Reward:  26
xxxxx
xg ax
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6         1204.46764815  -179.38454759 -2780.03472576]
------
Step:15, Action:South
State  136
Old Q Values:  [-6180.6         1204.46764815  -179.38454759 -2780.03472576]
New Q values:  [-6180.6          633.71550066  -179.38454759 -2780.03472576]
Reward: -1  Episode Reward:  25
xxxxx
x g x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5.08428138e+02  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
------
Step:16, Action:North
State  216
Old Q Values:  [ 5.08428138e+02  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [-5.60711409e+03  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
Reward: -10001  Episode Reward:  -9976
xxxxx
x  gx
x . x
x.. x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 294.26463753  611.07256224 -180.6           3.07790274]
------
Step:1, Action:South
State  210
Old Q Values:  [ 294.26463753  611.07256224 -180.6           3.07790274]
New Q values:  [ 294.26463753  336.27894993 -180.6           3.07790274]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  288.16641679 -6251.71315483 -6173.56321028 -5127.42818254]
------
Step:2, Action:North
State  288
Old Q Values:  [  288.16641679 -6251.71315483 -6173.56321028 -5127.42818254]
New Q values:  [  798.25819459 -6251.71315483 -6173.56321028 -5127.42818254]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2278.6387596    857.27432844 -2651.70614553 -3344.18956062]
------
Step:3, Action:North
State  208
Old Q Values:  [ 2278.6387596    857.27432844 -2651.70614553 -3344.18956062]
New Q values:  [ 1106.97015404   857.27432844 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  17
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          633.71550066  -179.38454759 -2780.03472576]
------
Step:4, Action:South
State  136
Old Q Values:  [-6180.6          633.71550066  -179.38454759 -2780.03472576]
New Q values:  [-6180.6          584.97724647  -179.38454759 -2780.03472576]
Reward: -1  Episode Reward:  16
xxxxx
xg. x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1106.97015404   857.27432844 -2651.70614553 -3344.18956062]
------
Step:5, Action:North
State  208
Old Q Values:  [ 1106.97015404   857.27432844 -2651.70614553 -3344.18956062]
New Q values:  [ 1364.39832783   857.27432844 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  15
xxxxx
x..ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        3074.03422071 -180.00807518  463.21231272]
------
Step:6, Action:South
State  130
Old Q Values:  [-180.6        3074.03422071 -180.00807518  463.21231272]
New Q values:  [-180.6        1329.89737327 -180.00807518  463.21231272]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 294.26463753  336.27894993 -180.6           3.07790274]
------
Step:7, Action:South
State  210
Old Q Values:  [ 294.26463753  336.27894993 -180.6           3.07790274]
New Q values:  [ 294.26463753  373.38903835 -180.6           3.07790274]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  798.25819459 -6251.71315483 -6173.56321028 -5127.42818254]
------
Step:8, Action:North
State  288
Old Q Values:  [  798.25819459 -6251.71315483 -6173.56321028 -5127.42818254]
New Q values:  [  430.71998934 -6251.71315483 -6173.56321028 -5127.42818254]
Reward: -1  Episode Reward:  12
xxxxx
x.. x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 294.26463753  373.38903835 -180.6           3.07790274]
------
Step:9, Action:South
State  208
Old Q Values:  [ 1364.39832783   857.27432844 -2651.70614553 -3344.18956062]
New Q values:  [ 1364.39832783   471.52572818 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  11
xxxxx
x.. x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  430.71998934 -6251.71315483 -6173.56321028 -5127.42818254]
------
Step:10, Action:North
State  288
Old Q Values:  [  430.71998934 -6251.71315483 -6173.56321028 -5127.42818254]
New Q values:  [  283.70470724 -6251.71315483 -6173.56321028 -5127.42818254]
Reward: -1  Episode Reward:  10
xxxxx
x.. x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 294.26463753  373.38903835 -180.6           3.07790274]
------
Step:11, Action:South
State  210
Old Q Values:  [ 294.26463753  373.38903835 -180.6           3.07790274]
New Q values:  [ 294.26463753  233.86702751 -180.6           3.07790274]
Reward: -1  Episode Reward:  9
xxxxx
x.. x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  283.70470724 -6251.71315483 -6173.56321028 -5127.42818254]
------
Step:12, Action:North
State  288
Old Q Values:  [  283.70470724 -6251.71315483 -6173.56321028 -5127.42818254]
New Q values:  [  201.16127416 -6251.71315483 -6173.56321028 -5127.42818254]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 294.26463753  233.86702751 -180.6           3.07790274]
------
Step:13, Action:North
State  208
Old Q Values:  [ 1364.39832783   471.52572818 -2651.70614553 -3344.18956062]
New Q values:  [  607.5000309    471.52572818 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         207.80233255 -180.6          41.89046871]
------
Step:14, Action:West
State  138
Old Q Values:  [-180.6         207.80233255 -180.6          41.89046871]
New Q values:  [-180.6         207.80233255 -180.6          73.54125078]
Reward: 9  Episode Reward:  16
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8914.18985193   171.28354432     9.8400213 ]
------
Step:15, Action:East
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  2.05892262e+00 -2.41937917e+03]
New Q values:  [-10156.11771313  -5995.686         175.71674299  -2419.37916968]
Reward: -1  Episode Reward:  15
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          584.97724647  -179.38454759 -2780.03472576]
------
Step:16, Action:South
State  136
Old Q Values:  [-6180.6          584.97724647  -179.38454759 -2780.03472576]
New Q values:  [-6180.6          415.64090786  -179.38454759 -2780.03472576]
Reward: -1  Episode Reward:  14
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  607.5000309    471.52572818 -2651.70614553 -3344.18956062]
------
Step:17, Action:North
State  208
Old Q Values:  [  607.5000309    471.52572818 -2651.70614553 -3344.18956062]
New Q values:  [  367.09228472   471.52572818 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  13
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          415.64090786  -179.38454759 -2780.03472576]
------
Step:18, Action:South
State  136
Old Q Values:  [-6180.6          415.64090786  -179.38454759 -2780.03472576]
New Q values:  [-6180.6          307.1140816   -179.38454759 -2780.03472576]
Reward: -1  Episode Reward:  12
xxxxx
x. gx
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  367.09228472   471.52572818 -2651.70614553 -3344.18956062]
------
Step:19, Action:South
State  208
Old Q Values:  [  367.09228472   471.52572818 -2651.70614553 -3344.18956062]
New Q values:  [  367.09228472   248.35867352 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  201.16127416 -6251.71315483 -6173.56321028 -5127.42818254]
------
Step:20, Action:West
State  288
Old Q Values:  [  201.16127416 -6251.71315483 -6173.56321028 -5127.42818254]
New Q values:  [  201.16127416 -6251.71315483 -6173.56321028 -1542.49423556]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 861.92338482 -180.6        1696.92345819    8.424     ]
------
Step:21, Action:East
State  272
Old Q Values:  [ 861.92338482 -180.6        1696.92345819    8.424     ]
New Q values:  [ 861.92338482 -180.6         738.51776552    8.424     ]
Reward: -1  Episode Reward:  9
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  201.16127416 -6251.71315483 -6173.56321028 -1542.49423556]
------
Step:22, Action:North
State  288
Old Q Values:  [  201.16127416 -6251.71315483 -6173.56321028 -1542.49423556]
New Q values:  [  168.14390092 -6251.71315483 -6173.56321028 -1542.49423556]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 294.26463753  233.86702751 -180.6           3.07790274]
------
Step:23, Action:North
State  208
Old Q Values:  [  367.09228472   248.35867352 -2651.70614553 -3344.18956062]
New Q values:  [  208.57761365   248.35867352 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         207.80233255 -180.6          73.54125078]
------
Step:24, Action:South
State  138
Old Q Values:  [-180.6         207.80233255 -180.6          73.54125078]
New Q values:  [-180.6         170.80032428 -180.6          73.54125078]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 294.26463753  233.86702751 -180.6           3.07790274]
------
Step:25, Action:North
State  208
Old Q Values:  [  208.57761365   248.35867352 -2651.70614553 -3344.18956062]
New Q values:  [  134.07114275   248.35867352 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         170.80032428 -180.6          73.54125078]
------
Step:26, Action:South
State  138
Old Q Values:  [-180.6         170.80032428 -180.6          73.54125078]
New Q values:  [-180.6         142.22773177 -180.6          73.54125078]
Reward: -1  Episode Reward:  4
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  134.07114275   248.35867352 -2651.70614553 -3344.18956062]
------
Step:27, Action:South
State  208
Old Q Values:  [  134.07114275   248.35867352 -2651.70614553 -3344.18956062]
New Q values:  [  134.07114275   149.18663968 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  3
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  168.14390092 -6251.71315483 -6173.56321028 -1542.49423556]
------
Step:28, Action:North
State  288
Old Q Values:  [  168.14390092 -6251.71315483 -6173.56321028 -1542.49423556]
New Q values:  [  111.41355227 -6251.71315483 -6173.56321028 -1542.49423556]
Reward: -1  Episode Reward:  2
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  134.07114275   149.18663968 -2651.70614553 -3344.18956062]
------
Step:29, Action:South
State  208
Old Q Values:  [  134.07114275   149.18663968 -2651.70614553 -3344.18956062]
New Q values:  [  134.07114275    92.49872156 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  1
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  111.41355227 -6251.71315483 -6173.56321028 -1542.49423556]
------
Step:30, Action:North
State  288
Old Q Values:  [  111.41355227 -6251.71315483 -6173.56321028 -1542.49423556]
New Q values:  [   84.18676373 -6251.71315483 -6173.56321028 -1542.49423556]
Reward: -1  Episode Reward:  0
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  134.07114275    92.49872156 -2651.70614553 -3344.18956062]
------
Step:31, Action:North
State  210
Old Q Values:  [ 294.26463753  233.86702751 -180.6           3.07790274]
New Q values:  [ 159.77417454  233.86702751 -180.6           3.07790274]
Reward: -1  Episode Reward:  -1
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         142.22773177 -180.6          73.54125078]
------
Step:32, Action:South
State  138
Old Q Values:  [-180.6         142.22773177 -180.6          73.54125078]
New Q values:  [-180.6          96.51243553 -180.6          73.54125078]
Reward: -1  Episode Reward:  -2
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  134.07114275    92.49872156 -2651.70614553 -3344.18956062]
------
Step:33, Action:North
State  208
Old Q Values:  [  134.07114275    92.49872156 -2651.70614553 -3344.18956062]
New Q values:  [  145.16268158    92.49872156 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -3
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          307.1140816   -179.38454759 -2780.03472576]
------
Step:34, Action:South
State  136
Old Q Values:  [-6180.6          307.1140816   -179.38454759 -2780.03472576]
New Q values:  [-6180.6          165.79443711  -179.38454759 -2780.03472576]
Reward: -1  Episode Reward:  -4
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  145.16268158    92.49872156 -2651.70614553 -3344.18956062]
------
Step:35, Action:North
State  208
Old Q Values:  [  145.16268158    92.49872156 -2651.70614553 -3344.18956062]
New Q values:  [  107.20340376    92.49872156 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -5
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          165.79443711  -179.38454759 -2780.03472576]
------
Step:36, Action:South
State  138
Old Q Values:  [-180.6          96.51243553 -180.6          73.54125078]
New Q values:  [-180.6          70.16599534 -180.6          73.54125078]
Reward: -1  Episode Reward:  -6
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  107.20340376    92.49872156 -2651.70614553 -3344.18956062]
------
Step:37, Action:North
State  208
Old Q Values:  [  107.20340376    92.49872156 -2651.70614553 -3344.18956062]
New Q values:  [   64.34373674    92.49872156 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -7
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          70.16599534 -180.6          73.54125078]
------
Step:38, Action:West
State  138
Old Q Values:  [-180.6          70.16599534 -180.6          73.54125078]
New Q values:  [-180.6          70.16599534 -180.6          80.20156361]
Reward: -1  Episode Reward:  -8
xxxxx
x.a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8914.18985193   171.28354432     9.8400213 ]
------
Step:39, Action:East
State  122
Old Q Values:  [ -281.736      -8914.18985193   171.28354432     9.8400213 ]
New Q values:  [ -281.736      -8914.18985193    91.97388681     9.8400213 ]
Reward: -1  Episode Reward:  -9
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          70.16599534 -180.6          80.20156361]
------
Step:40, Action:West
State  136
Old Q Values:  [-6180.6          165.79443711  -179.38454759 -2780.03472576]
New Q values:  [-6180.6          165.79443711  -179.38454759 -1059.89886741]
Reward: -1  Episode Reward:  -10
xxxxx
xga x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         175.71674299  -2419.37916968]
------
Step:41, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686         175.71674299  -2419.37916968]
New Q values:  [-10156.11771313  -5995.686         119.42502833  -2419.37916968]
Reward: -1  Episode Reward:  -11
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          165.79443711  -179.38454759 -1059.89886741]
------
Step:42, Action:South
State  136
Old Q Values:  [-6180.6          165.79443711  -179.38454759 -1059.89886741]
New Q values:  [-6180.6           93.46739131  -179.38454759 -1059.89886741]
Reward: -1  Episode Reward:  -12
xxxxx
x. gx
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   64.34373674    92.49872156 -2651.70614553 -3344.18956062]
------
Step:43, Action:South
State  208
Old Q Values:  [   64.34373674    92.49872156 -2651.70614553 -3344.18956062]
New Q values:  [   64.34373674    61.65551774 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -13
xxxxx
x.  x
x..gx
x. ax
xxxxx
Step:44, Action:South
State  288
Old Q Values:  [   84.18676373 -6251.71315483 -6173.56321028 -1542.49423556]
New Q values:  [   84.18676373 -8656.02923281 -6173.56321028 -1542.49423556]
Reward: -10301  Episode Reward:  -10314
xxxxx
x.  x
x.. x
x. gx
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 159.77417454  233.86702751 -180.6           3.07790274]
------
Step:1, Action:North
State  210
Old Q Values:  [ 159.77417454  233.86702751 -180.6           3.07790274]
New Q values:  [  93.3701389   233.86702751 -180.6           3.07790274]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          70.16599534 -180.6          80.20156361]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6          70.16599534 -180.6          80.20156361]
New Q values:  [-180.6          70.16599534 -180.6          65.07279149]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8914.18985193    91.97388681     9.8400213 ]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -8914.18985193    91.97388681     9.8400213 ]
New Q values:  [ -281.736      -8914.18985193    57.23935333     9.8400213 ]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          70.16599534 -180.6          65.07279149]
------
Step:4, Action:South
State  138
Old Q Values:  [-180.6          70.16599534 -180.6          65.07279149]
New Q values:  [-180.6          97.62650639 -180.6          65.07279149]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  93.3701389   233.86702751 -180.6           3.07790274]
------
Step:5, Action:South
State  208
Old Q Values:  [   64.34373674    61.65551774 -2651.70614553 -3344.18956062]
New Q values:  [   64.34373674    49.31823622 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   84.18676373 -8656.02923281 -6173.56321028 -1542.49423556]
------
Step:6, Action:North
State  288
Old Q Values:  [   84.18676373 -8656.02923281 -6173.56321028 -1542.49423556]
New Q values:  [  103.23481375 -8656.02923281 -6173.56321028 -1542.49423556]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  93.3701389   233.86702751 -180.6           3.07790274]
------
Step:7, Action:South
State  208
Old Q Values:  [   64.34373674    49.31823622 -2651.70614553 -3344.18956062]
New Q values:  [   64.34373674    50.09773861 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  103.23481375 -8656.02923281 -6173.56321028 -1542.49423556]
------
Step:8, Action:North
State  288
Old Q Values:  [  103.23481375 -8656.02923281 -6173.56321028 -1542.49423556]
New Q values:  [   59.99704652 -8656.02923281 -6173.56321028 -1542.49423556]
Reward: -1  Episode Reward:  12
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   64.34373674    50.09773861 -2651.70614553 -3344.18956062]
------
Step:9, Action:North
State  208
Old Q Values:  [   64.34373674    50.09773861 -2651.70614553 -3344.18956062]
New Q values:  [   54.42544661    50.09773861 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          97.62650639 -180.6          65.07279149]
------
Step:10, Action:South
State  138
Old Q Values:  [-180.6          97.62650639 -180.6          65.07279149]
New Q values:  [-180.6         108.61071081 -180.6          65.07279149]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  93.3701389   233.86702751 -180.6           3.07790274]
------
Step:11, Action:South
State  208
Old Q Values:  [   54.42544661    50.09773861 -2651.70614553 -3344.18956062]
New Q values:  [   54.42544661    37.4382094  -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  9
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   59.99704652 -8656.02923281 -6173.56321028 -1542.49423556]
------
Step:12, Action:North
State  288
Old Q Values:  [   59.99704652 -8656.02923281 -6173.56321028 -1542.49423556]
New Q values:  [   39.72645259 -8656.02923281 -6173.56321028 -1542.49423556]
Reward: -1  Episode Reward:  8
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   54.42544661    37.4382094  -2651.70614553 -3344.18956062]
------
Step:13, Action:North
State  208
Old Q Values:  [   54.42544661    37.4382094  -2651.70614553 -3344.18956062]
New Q values:  [   49.21039604    37.4382094  -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  7
xxxxx
x.gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           93.46739131  -179.38454759 -1059.89886741]
------
Step:14, Action:South
State  136
Old Q Values:  [-6180.6           93.46739131  -179.38454759 -1059.89886741]
New Q values:  [-6180.6           51.55007534  -179.38454759 -1059.89886741]
Reward: -1  Episode Reward:  6
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   49.21039604    37.4382094  -2651.70614553 -3344.18956062]
------
Step:15, Action:North
State  208
Old Q Values:  [   49.21039604    37.4382094  -2651.70614553 -3344.18956062]
New Q values:  [   34.54918102    37.4382094  -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  5
xxxxx
x.gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           51.55007534  -179.38454759 -1059.89886741]
------
Step:16, Action:South
State  136
Old Q Values:  [-6180.6           51.55007534  -179.38454759 -1059.89886741]
New Q values:  [-6180.6           31.25149295  -179.38454759 -1059.89886741]
Reward: -1  Episode Reward:  4
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   34.54918102    37.4382094  -2651.70614553 -3344.18956062]
------
Step:17, Action:South
State  208
Old Q Values:  [   34.54918102    37.4382094  -2651.70614553 -3344.18956062]
New Q values:  [   34.54918102    26.29321954 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  3
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   39.72645259 -8656.02923281 -6173.56321028 -1542.49423556]
------
Step:18, Action:North
State  288
Old Q Values:  [   39.72645259 -8656.02923281 -6173.56321028 -1542.49423556]
New Q values:  [   25.65533534 -8656.02923281 -6173.56321028 -1542.49423556]
Reward: -1  Episode Reward:  2
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   34.54918102    26.29321954 -2651.70614553 -3344.18956062]
------
Step:19, Action:North
State  208
Old Q Values:  [   34.54918102    26.29321954 -2651.70614553 -3344.18956062]
New Q values:  [   22.59512029    26.29321954 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  1
xxxxx
x.gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           31.25149295  -179.38454759 -1059.89886741]
------
Step:20, Action:South
State  136
Old Q Values:  [-6180.6           31.25149295  -179.38454759 -1059.89886741]
New Q values:  [-6180.6           19.78856304  -179.38454759 -1059.89886741]
Reward: -1  Episode Reward:  0
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   22.59512029    26.29321954 -2651.70614553 -3344.18956062]
------
Step:21, Action:South
State  208
Old Q Values:  [   22.59512029    26.29321954 -2651.70614553 -3344.18956062]
New Q values:  [   22.59512029    17.61388842 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -1
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   25.65533534 -8656.02923281 -6173.56321028 -1542.49423556]
------
Step:22, Action:North
State  288
Old Q Values:  [   25.65533534 -8656.02923281 -6173.56321028 -1542.49423556]
New Q values:  [   16.44067022 -8656.02923281 -6173.56321028 -1542.49423556]
Reward: -1  Episode Reward:  -2
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   22.59512029    17.61388842 -2651.70614553 -3344.18956062]
------
Step:23, Action:South
State  208
Old Q Values:  [   22.59512029    17.61388842 -2651.70614553 -3344.18956062]
New Q values:  [   22.59512029    11.37775643 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -3
xxxxx
x.  x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   16.44067022 -8656.02923281 -6173.56321028 -1542.49423556]
------
Step:24, Action:West
State  288
Old Q Values:  [   16.44067022 -8656.02923281 -6173.56321028 -1542.49423556]
New Q values:  [   16.44067022 -8656.02923281 -6173.56321028  -353.02067878]
Reward: 9  Episode Reward:  6
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 861.92338482 -180.6         738.51776552    8.424     ]
------
Step:25, Action:East
State  272
Old Q Values:  [ 861.92338482 -180.6         738.51776552    8.424     ]
New Q values:  [ 861.92338482 -180.6         299.73930728    8.424     ]
Reward: -1  Episode Reward:  5
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   16.44067022 -8656.02923281 -6173.56321028  -353.02067878]
------
Step:26, Action:North
State  288
Old Q Values:  [   16.44067022 -8656.02923281 -6173.56321028  -353.02067878]
New Q values:  [   76.13637634 -8656.02923281 -6173.56321028  -353.02067878]
Reward: -1  Episode Reward:  4
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  93.3701389   233.86702751 -180.6           3.07790274]
------
Step:27, Action:South
State  210
Old Q Values:  [  93.3701389   233.86702751 -180.6           3.07790274]
New Q values:  [  93.3701389   115.78772391 -180.6           3.07790274]
Reward: -1  Episode Reward:  3
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   76.13637634 -8656.02923281 -6173.56321028  -353.02067878]
------
Step:28, Action:North
State  288
Old Q Values:  [   76.13637634 -8656.02923281 -6173.56321028  -353.02067878]
New Q values:  [   64.59086771 -8656.02923281 -6173.56321028  -353.02067878]
Reward: -1  Episode Reward:  2
xxxxx
x.  x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  93.3701389   115.78772391 -180.6           3.07790274]
------
Step:29, Action:North
State  208
Old Q Values:  [   22.59512029    11.37775643 -2651.70614553 -3344.18956062]
New Q values:  [   41.02126136    11.37775643 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  1
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         108.61071081 -180.6          65.07279149]
------
Step:30, Action:West
State  138
Old Q Values:  [-180.6         108.61071081 -180.6          65.07279149]
New Q values:  [-180.6         108.61071081 -180.6          61.18514933]
Reward: -1  Episode Reward:  0
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6          119.18677578    12.08665543]
------
Step:31, Action:East
State  123
Old Q Values:  [ -284.31459256 -6000.6          119.18677578    12.08665543]
New Q values:  [ -284.31459256 -6000.6           79.65792356    12.08665543]
Reward: -1  Episode Reward:  -1
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         108.61071081 -180.6          61.18514933]
------
Step:32, Action:West
State  138
Old Q Values:  [-180.6         108.61071081 -180.6          61.18514933]
New Q values:  [-180.6         108.61071081 -180.6          47.7714368 ]
Reward: -1  Episode Reward:  -2
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6           79.65792356    12.08665543]
------
Step:33, Action:East
State  122
Old Q Values:  [ -281.736      -8914.18985193    57.23935333     9.8400213 ]
New Q values:  [ -281.736      -8914.18985193    54.87895457     9.8400213 ]
Reward: -1  Episode Reward:  -3
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         108.61071081 -180.6          47.7714368 ]
------
Step:34, Action:South
State  138
Old Q Values:  [-180.6         108.61071081 -180.6          47.7714368 ]
New Q values:  [-180.6         77.5806015 -180.6         47.7714368]
Reward: -1  Episode Reward:  -4
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  93.3701389   115.78772391 -180.6           3.07790274]
------
Step:35, Action:South
State  210
Old Q Values:  [  93.3701389   115.78772391 -180.6           3.07790274]
New Q values:  [  93.3701389    65.09234988 -180.6           3.07790274]
Reward: -1  Episode Reward:  -5
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   64.59086771 -8656.02923281 -6173.56321028  -353.02067878]
------
Step:36, Action:North
State  288
Old Q Values:  [   64.59086771 -8656.02923281 -6173.56321028  -353.02067878]
New Q values:  [   53.24738875 -8656.02923281 -6173.56321028  -353.02067878]
Reward: -1  Episode Reward:  -6
xxxxx
x.  x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  93.3701389    65.09234988 -180.6           3.07790274]
------
Step:37, Action:North
State  208
Old Q Values:  [   41.02126136    11.37775643 -2651.70614553 -3344.18956062]
New Q values:  [   39.08268499    11.37775643 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -7
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         77.5806015 -180.6         47.7714368]
------
Step:38, Action:West
State  138
Old Q Values:  [-180.6         77.5806015 -180.6         47.7714368]
New Q values:  [-180.6          77.5806015  -180.6          34.97226109]
Reward: -1  Episode Reward:  -8
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8914.18985193    54.87895457     9.8400213 ]
------
Step:39, Action:East
State  122
Old Q Values:  [ -281.736      -8914.18985193    54.87895457     9.8400213 ]
New Q values:  [ -281.736      -8914.18985193    44.62576228     9.8400213 ]
Reward: -1  Episode Reward:  -9
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          77.5806015  -180.6          34.97226109]
------
Step:40, Action:South
State  136
Old Q Values:  [-6180.6           19.78856304  -179.38454759 -1059.89886741]
New Q values:  [-6180.6           19.04023072  -179.38454759 -1059.89886741]
Reward: -1  Episode Reward:  -10
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   39.08268499    11.37775643 -2651.70614553 -3344.18956062]
------
Step:41, Action:North
State  208
Old Q Values:  [   39.08268499    11.37775643 -2651.70614553 -3344.18956062]
New Q values:  [   38.30725445    11.37775643 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -11
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          77.5806015  -180.6          34.97226109]
------
Step:42, Action:South
State  136
Old Q Values:  [-6180.6           19.04023072  -179.38454759 -1059.89886741]
New Q values:  [-6180.6           18.50826862  -179.38454759 -1059.89886741]
Reward: -1  Episode Reward:  -12
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   38.30725445    11.37775643 -2651.70614553 -3344.18956062]
------
Step:43, Action:North
State  208
Old Q Values:  [   38.30725445    11.37775643 -2651.70614553 -3344.18956062]
New Q values:  [   37.99708223    11.37775643 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -13
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          77.5806015  -180.6          34.97226109]
------
Step:44, Action:South
State  138
Old Q Values:  [-180.6          77.5806015  -180.6          34.97226109]
New Q values:  [-180.6          58.44328227 -180.6          34.97226109]
Reward: -1  Episode Reward:  -14
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  93.3701389    65.09234988 -180.6           3.07790274]
------
Step:45, Action:North
State  210
Old Q Values:  [  93.3701389    65.09234988 -180.6           3.07790274]
New Q values:  [  54.28104024   65.09234988 -180.6           3.07790274]
Reward: -1  Episode Reward:  -15
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          58.44328227 -180.6          34.97226109]
------
Step:46, Action:South
State  138
Old Q Values:  [-180.6          58.44328227 -180.6          34.97226109]
New Q values:  [-180.6          42.30501787 -180.6          34.97226109]
Reward: -1  Episode Reward:  -16
xxxxx
x.  x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  54.28104024   65.09234988 -180.6           3.07790274]
------
Step:47, Action:North
State  210
Old Q Values:  [  54.28104024   65.09234988 -180.6           3.07790274]
New Q values:  [  33.80392146   65.09234988 -180.6           3.07790274]
Reward: -1  Episode Reward:  -17
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          42.30501787 -180.6          34.97226109]
------
Step:48, Action:South
State  138
Old Q Values:  [-180.6          42.30501787 -180.6          34.97226109]
New Q values:  [-180.6          27.72113182 -180.6          34.97226109]
Reward: -1  Episode Reward:  -18
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   37.99708223    11.37775643 -2651.70614553 -3344.18956062]
------
Step:49, Action:North
State  208
Old Q Values:  [   37.99708223    11.37775643 -2651.70614553 -3344.18956062]
New Q values:  [   25.09051122    11.37775643 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -19
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          27.72113182 -180.6          34.97226109]
------
Step:50, Action:West
State  138
Old Q Values:  [-180.6          27.72113182 -180.6          34.97226109]
New Q values:  [-180.6          27.72113182 -180.6          26.77663312]
Reward: -1  Episode Reward:  -20
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8914.18985193    44.62576228     9.8400213 ]
------
Step:51, Action:East
State  122
Old Q Values:  [ -281.736      -8914.18985193    44.62576228     9.8400213 ]
New Q values:  [ -281.736      -8914.18985193    25.56664446     9.8400213 ]
Reward: -1  Episode Reward:  -21
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          27.72113182 -180.6          26.77663312]
------
Step:52, Action:South
State  138
Old Q Values:  [-180.6          27.72113182 -180.6          26.77663312]
New Q values:  [-180.6          30.01615769 -180.6          26.77663312]
Reward: -1  Episode Reward:  -22
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  33.80392146   65.09234988 -180.6           3.07790274]
------
Step:53, Action:South
State  208
Old Q Values:  [   25.09051122    11.37775643 -2651.70614553 -3344.18956062]
New Q values:  [   25.09051122    19.9253192  -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -23
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   53.24738875 -8656.02923281 -6173.56321028  -353.02067878]
------
Step:54, Action:North
State  288
Old Q Values:  [   53.24738875 -8656.02923281 -6173.56321028  -353.02067878]
New Q values:  [   28.22610887 -8656.02923281 -6173.56321028  -353.02067878]
Reward: -1  Episode Reward:  -24
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   25.09051122    19.9253192  -2651.70614553 -3344.18956062]
------
Step:55, Action:North
State  208
Old Q Values:  [   25.09051122    19.9253192  -2651.70614553 -3344.18956062]
New Q values:  [   14.98868507    19.9253192  -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -25
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           18.50826862  -179.38454759 -1059.89886741]
------
Step:56, Action:South
State  136
Old Q Values:  [-6180.6           18.50826862  -179.38454759 -1059.89886741]
New Q values:  [-6180.6           12.78090321  -179.38454759 -1059.89886741]
Reward: -1  Episode Reward:  -26
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   14.98868507    19.9253192  -2651.70614553 -3344.18956062]
------
Step:57, Action:South
State  208
Old Q Values:  [   14.98868507    19.9253192  -2651.70614553 -3344.18956062]
New Q values:  [   14.98868507    15.83796034 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -27
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   28.22610887 -8656.02923281 -6173.56321028  -353.02067878]
------
Step:58, Action:North
State  288
Old Q Values:  [   28.22610887 -8656.02923281 -6173.56321028  -353.02067878]
New Q values:  [   15.44183165 -8656.02923281 -6173.56321028  -353.02067878]
Reward: -1  Episode Reward:  -28
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   14.98868507    15.83796034 -2651.70614553 -3344.18956062]
------
Step:59, Action:South
State  208
Old Q Values:  [   14.98868507    15.83796034 -2651.70614553 -3344.18956062]
New Q values:  [   14.98868507    10.36773363 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -29
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   15.44183165 -8656.02923281 -6173.56321028  -353.02067878]
------
Step:60, Action:North
State  288
Old Q Values:  [   15.44183165 -8656.02923281 -6173.56321028  -353.02067878]
New Q values:  [   10.07333818 -8656.02923281 -6173.56321028  -353.02067878]
Reward: -1  Episode Reward:  -30
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   14.98868507    10.36773363 -2651.70614553 -3344.18956062]
------
Step:61, Action:North
State  208
Old Q Values:  [   14.98868507    10.36773363 -2651.70614553 -3344.18956062]
New Q values:  [   14.40032134    10.36773363 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -31
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          30.01615769 -180.6          26.77663312]
------
Step:62, Action:West
State  138
Old Q Values:  [-180.6          30.01615769 -180.6          26.77663312]
New Q values:  [-180.6          30.01615769 -180.6          17.78064658]
Reward: -1  Episode Reward:  -32
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8914.18985193    25.56664446     9.8400213 ]
------
Step:63, Action:East
State  122
Old Q Values:  [ -281.736      -8914.18985193    25.56664446     9.8400213 ]
New Q values:  [ -281.736      -8914.18985193    18.63150509     9.8400213 ]
Reward: -1  Episode Reward:  -33
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          30.01615769 -180.6          17.78064658]
------
Step:64, Action:South
State  138
Old Q Values:  [-180.6          30.01615769 -180.6          17.78064658]
New Q values:  [-180.6          15.72655948 -180.6          17.78064658]
Reward: -1  Episode Reward:  -34
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   14.40032134    10.36773363 -2651.70614553 -3344.18956062]
------
Step:65, Action:North
State  208
Old Q Values:  [   14.40032134    10.36773363 -2651.70614553 -3344.18956062]
New Q values:  [   10.49432251    10.36773363 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -35
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          15.72655948 -180.6          17.78064658]
------
Step:66, Action:West
State  138
Old Q Values:  [-180.6          15.72655948 -180.6          17.78064658]
New Q values:  [-180.6          15.72655948 -180.6          30.4096357 ]
Reward: -1  Episode Reward:  -36
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6           79.65792356    12.08665543]
------
Step:67, Action:East
State  122
Old Q Values:  [ -281.736      -8914.18985193    18.63150509     9.8400213 ]
New Q values:  [ -281.736      -8914.18985193    15.97549275     9.8400213 ]
Reward: -1  Episode Reward:  -37
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          15.72655948 -180.6          30.4096357 ]
------
Step:68, Action:West
State  138
Old Q Values:  [-180.6          15.72655948 -180.6          30.4096357 ]
New Q values:  [-180.6          15.72655948 -180.6          16.3565021 ]
Reward: -1  Episode Reward:  -38
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8914.18985193    15.97549275     9.8400213 ]
------
Step:69, Action:East
State  122
Old Q Values:  [ -281.736      -8914.18985193    15.97549275     9.8400213 ]
New Q values:  [ -281.736      -8914.18985193    10.69714773     9.8400213 ]
Reward: -1  Episode Reward:  -39
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          15.72655948 -180.6          16.3565021 ]
------
Step:70, Action:West
State  138
Old Q Values:  [-180.6          15.72655948 -180.6          16.3565021 ]
New Q values:  [-180.6          15.72655948 -180.6           9.15174516]
Reward: -1  Episode Reward:  -40
xxxxx
x.a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8914.18985193    10.69714773     9.8400213 ]
------
Step:71, Action:East
State  122
Old Q Values:  [ -281.736      -8914.18985193    10.69714773     9.8400213 ]
New Q values:  [-2.81736000e+02 -8.91418985e+03  8.39682693e+00  9.84002130e+00]
Reward: -1  Episode Reward:  -41
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          15.72655948 -180.6           9.15174516]
------
Step:72, Action:South
State  138
Old Q Values:  [-180.6          15.72655948 -180.6           9.15174516]
New Q values:  [-180.6          25.21832875 -180.6           9.15174516]
Reward: -1  Episode Reward:  -42
xxxxx
x.  x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  33.80392146   65.09234988 -180.6           3.07790274]
------
Step:73, Action:North
State  210
Old Q Values:  [  33.80392146   65.09234988 -180.6           3.07790274]
New Q values:  [  20.48706721   65.09234988 -180.6           3.07790274]
Reward: -1  Episode Reward:  -43
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          25.21832875 -180.6           9.15174516]
------
Step:74, Action:South
State  138
Old Q Values:  [-180.6          25.21832875 -180.6           9.15174516]
New Q values:  [-180.6          29.01503646 -180.6           9.15174516]
Reward: -1  Episode Reward:  -44
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  20.48706721   65.09234988 -180.6           3.07790274]
------
Step:75, Action:South
State  208
Old Q Values:  [   10.49432251    10.36773363 -2651.70614553 -3344.18956062]
New Q values:  [   10.49432251     6.56909491 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -45
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   10.07333818 -8656.02923281 -6173.56321028  -353.02067878]
------
Step:76, Action:North
State  288
Old Q Values:  [   10.07333818 -8656.02923281 -6173.56321028  -353.02067878]
New Q values:  [ 6.57763203e+00 -8.65602923e+03 -6.17356321e+03 -3.53020679e+02]
Reward: -1  Episode Reward:  -46
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   10.49432251     6.56909491 -2651.70614553 -3344.18956062]
------
Step:77, Action:North
State  208
Old Q Values:  [   10.49432251     6.56909491 -2651.70614553 -3344.18956062]
New Q values:  [   12.30223994     6.56909491 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -47
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          29.01503646 -180.6           9.15174516]
------
Step:78, Action:South
State  138
Old Q Values:  [-180.6          29.01503646 -180.6           9.15174516]
New Q values:  [-180.6          30.53371955 -180.6           9.15174516]
Reward: -1  Episode Reward:  -48
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  20.48706721   65.09234988 -180.6           3.07790274]
------
Step:79, Action:South
State  210
Old Q Values:  [  20.48706721   65.09234988 -180.6           3.07790274]
New Q values:  [  20.48706721   27.41022956 -180.6           3.07790274]
Reward: -1  Episode Reward:  -49
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6.57763203e+00 -8.65602923e+03 -6.17356321e+03 -3.53020679e+02]
------
Step:80, Action:North
State  288
Old Q Values:  [ 6.57763203e+00 -8.65602923e+03 -6.17356321e+03 -3.53020679e+02]
New Q values:  [   10.25412168 -8656.02923281 -6173.56321028  -353.02067878]
Reward: -1  Episode Reward:  -50
xxxxx
x.  x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  20.48706721   27.41022956 -180.6           3.07790274]
------
Step:81, Action:North
State  208
Old Q Values:  [   12.30223994     6.56909491 -2651.70614553 -3344.18956062]
New Q values:  [   13.48101184     6.56909491 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -51
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          30.53371955 -180.6           9.15174516]
------
Step:82, Action:West
State  138
Old Q Values:  [-180.6          30.53371955 -180.6           9.15174516]
New Q values:  [-180.6          30.53371955 -180.6           6.01270445]
Reward: -1  Episode Reward:  -52
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  8.39682693e+00  9.84002130e+00]
------
Step:83, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  8.39682693e+00  9.84002130e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  8.39682693e+00  9.62052600e+00]
Reward: 9  Episode Reward:  -43
xxxxx
xa  x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -6.00060000e+03  9.48391609e-01 -1.80600000e+02]
------
Step:84, Action:East
State  106
Old Q Values:  [-1.80600000e+02 -6.00060000e+03  9.48391609e-01 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -6.00060000e+03  2.66551444e+00 -1.80600000e+02]
Reward: -1  Episode Reward:  -44
xxxxx
x a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  8.39682693e+00  9.62052600e+00]
------
Step:85, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  8.39682693e+00  9.62052600e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  8.39682693e+00  4.04786473e+00]
Reward: -1  Episode Reward:  -45
xxxxx
xa  x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -6.00060000e+03  2.66551444e+00 -1.80600000e+02]
------
Step:86, Action:East
State  106
Old Q Values:  [-1.80600000e+02 -6.00060000e+03  2.66551444e+00 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -6.00060000e+03  2.98525386e+00 -1.80600000e+02]
Reward: -1  Episode Reward:  -46
xxxxx
x a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  8.39682693e+00  4.04786473e+00]
------
Step:87, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  8.39682693e+00  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.19188466e+01  4.04786473e+00]
Reward: -1  Episode Reward:  -47
xxxxx
x  ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          30.53371955 -180.6           6.01270445]
------
Step:88, Action:South
State  138
Old Q Values:  [-180.6          30.53371955 -180.6           6.01270445]
New Q values:  [-180.6          19.83655669 -180.6           6.01270445]
Reward: -1  Episode Reward:  -48
xxxxx
x   x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  20.48706721   27.41022956 -180.6           3.07790274]
------
Step:89, Action:South
State  210
Old Q Values:  [  20.48706721   27.41022956 -180.6           3.07790274]
New Q values:  [  20.48706721   13.44032833 -180.6           3.07790274]
Reward: -1  Episode Reward:  -49
xxxxx
x   x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   10.25412168 -8656.02923281 -6173.56321028  -353.02067878]
------
Step:90, Action:North
State  288
Old Q Values:  [   10.25412168 -8656.02923281 -6173.56321028  -353.02067878]
New Q values:  [ 7.54595222e+00 -8.65602923e+03 -6.17356321e+03 -3.53020679e+02]
Reward: -1  Episode Reward:  -50
xxxxx
x   x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   13.48101184     6.56909491 -2651.70614553 -3344.18956062]
------
Step:91, Action:North
State  210
Old Q Values:  [  20.48706721   13.44032833 -180.6           3.07790274]
New Q values:  [  13.54579389   13.44032833 -180.6           3.07790274]
Reward: -1  Episode Reward:  -51
xxxxx
x  ax
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          19.83655669 -180.6           6.01270445]
------
Step:92, Action:South
State  138
Old Q Values:  [-180.6          19.83655669 -180.6           6.01270445]
New Q values:  [-180.6          11.37892623 -180.6           6.01270445]
Reward: -1  Episode Reward:  -52
xxxxx
x   x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   13.48101184     6.56909491 -2651.70614553 -3344.18956062]
------
Step:93, Action:North
State  208
Old Q Values:  [   13.48101184     6.56909491 -2651.70614553 -3344.18956062]
New Q values:  [    8.6266757      6.56909491 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -53
xxxxx
xg ax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           12.78090321  -179.38454759 -1059.89886741]
------
Step:94, Action:South
State  138
Old Q Values:  [-180.6          11.37892623 -180.6           6.01270445]
New Q values:  [-180.6           6.5395732  -180.6           6.01270445]
Reward: -1  Episode Reward:  -54
xxxxx
x   x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    8.6266757      6.56909491 -2651.70614553 -3344.18956062]
------
Step:95, Action:North
State  208
Old Q Values:  [    8.6266757      6.56909491 -2651.70614553 -3344.18956062]
New Q values:  [    4.81254224     6.56909491 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -55
xxxxx
x  ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           6.5395732  -180.6           6.01270445]
------
Step:96, Action:South
State  138
Old Q Values:  [-180.6           6.5395732  -180.6           6.01270445]
New Q values:  [ -180.6        -5996.01344225  -180.6            6.01270445]
Reward: -10001  Episode Reward:  -10056
xxxxx
x   x
x..gx
x.  x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 861.92338482 -180.6         299.73930728    8.424     ]
------
Step:1, Action:North
State  276
Old Q Values:  [-1.92000000e-01 -1.80600000e+02  2.77558903e+03 -6.75709867e+00]
New Q values:  [  38.43571403 -180.6        2775.58903347   -6.75709867]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ -0.6          2.56078987 110.37504677   0.        ]
------
Step:2, Action:East
State  194
Old Q Values:  [ -0.6          2.56078987 110.37504677   0.        ]
New Q values:  [-0.6         2.56078987 53.61375687  0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  13.54579389   13.44032833 -180.6           3.07790274]
------
Step:3, Action:North
State  210
Old Q Values:  [  13.54579389   13.44032833 -180.6           3.07790274]
New Q values:  [ 409.78752954   13.44032833 -180.6           3.07790274]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        1329.89737327 -180.00807518  463.21231272]
------
Step:4, Action:South
State  130
Old Q Values:  [-180.6        1329.89737327 -180.00807518  463.21231272]
New Q values:  [-180.6         654.29520817 -180.00807518  463.21231272]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 409.78752954   13.44032833 -180.6           3.07790274]
------
Step:5, Action:North
State  210
Old Q Values:  [ 409.78752954   13.44032833 -180.6           3.07790274]
New Q values:  [ 359.60357426   13.44032833 -180.6           3.07790274]
Reward: -1  Episode Reward:  25
xxxxx
x..ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6         654.29520817 -180.00807518  463.21231272]
------
Step:6, Action:South
State  130
Old Q Values:  [-180.6         654.29520817 -180.00807518  463.21231272]
New Q values:  [-180.6         368.99915555 -180.00807518  463.21231272]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 359.60357426   13.44032833 -180.6           3.07790274]
------
Step:7, Action:North
State  208
Old Q Values:  [    4.81254224     6.56909491 -2651.70614553 -3344.18956062]
New Q values:  [  140.28871071     6.56909491 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  23
xxxxx
x..ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6         368.99915555 -180.00807518  463.21231272]
------
Step:8, Action:West
State  130
Old Q Values:  [-180.6         368.99915555 -180.00807518  463.21231272]
New Q values:  [-180.6         368.99915555 -180.00807518 1048.56081526]
Reward: 9  Episode Reward:  32
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.         2859.58630059    0.        ]
------
Step:9, Action:East
State  114
Old Q Values:  [-180.6           0.         2859.58630059    0.        ]
New Q values:  [-180.6           0.         1145.03833157    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x. ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5996.01344225  -180.6            6.01270445]
------
Step:10, Action:West
State  130
Old Q Values:  [-180.6         368.99915555 -180.00807518 1048.56081526]
New Q values:  [-180.6         368.99915555 -180.00807518  502.76536314]
Reward: -1  Episode Reward:  30
xxxxx
x.a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6           2.72750831  279.80345677    0.        ]
------
Step:11, Action:East
State  115
Old Q Values:  [-180.6           2.72750831  279.80345677    0.        ]
New Q values:  [-180.6           2.72750831  113.12519404    0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x. ax
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5996.01344225  -180.6            6.01270445]
------
Step:12, Action:West
State  136
Old Q Values:  [-6180.6           12.78090321  -179.38454759 -1059.89886741]
New Q values:  [-6180.6           12.78090321  -179.38454759  -345.15807743]
Reward: -1  Episode Reward:  28
xxxxx
x.agx
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:13, Action:South
State  115
Old Q Values:  [-180.6           2.72750831  113.12519404    0.        ]
New Q values:  [-180.6           2.10733926  113.12519404    0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x.  x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[    5.38778645     0.         -5166.58487553     0.        ]
------
Step:14, Action:North
State  192
Old Q Values:  [-9252.43762121    22.23326427 21691.29016672     0.        ]
New Q values:  [-3358.06354901    22.23326427 21691.29016672     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x.a x
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.         1145.03833157    0.        ]
------
Step:15, Action:East
State  112
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.         0.         3.23427096 0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x.gax
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           12.78090321  -179.38454759  -345.15807743]
------
Step:16, Action:South
State  138
Old Q Values:  [ -180.6        -5996.01344225  -180.6            6.01270445]
New Q values:  [ -180.6        -2295.6066244   -180.6            6.01270445]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5.60711409e+03  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
------
Step:17, Action:South
State  208
Old Q Values:  [  140.28871071     6.56909491 -2651.70614553 -3344.18956062]
New Q values:  [  140.28871071    10.29142363 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  33
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7.54595222e+00 -8.65602923e+03 -6.17356321e+03 -3.53020679e+02]
------
Step:18, Action:North
State  288
Old Q Values:  [ 7.54595222e+00 -8.65602923e+03 -6.17356321e+03 -3.53020679e+02]
New Q values:  [  105.81713339 -8656.02923281 -6173.56321028  -353.02067878]
Reward: -1  Episode Reward:  32
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5.60711409e+03  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
------
Step:19, Action:South
State  208
Old Q Values:  [  140.28871071    10.29142363 -2651.70614553 -3344.18956062]
New Q values:  [  140.28871071    35.26170947 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  31
xxxxx
x.g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  105.81713339 -8656.02923281 -6173.56321028  -353.02067878]
------
Step:20, Action:North
State  288
Old Q Values:  [  105.81713339 -8656.02923281 -6173.56321028  -353.02067878]
New Q values:  [   83.81346657 -8656.02923281 -6173.56321028  -353.02067878]
Reward: -1  Episode Reward:  30
xxxxx
x. gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  140.28871071    35.26170947 -2651.70614553 -3344.18956062]
------
Step:21, Action:South
State  208
Old Q Values:  [  140.28871071    35.26170947 -2651.70614553 -3344.18956062]
New Q values:  [  140.28871071    38.64872376 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  29
xxxxx
x.  x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   83.81346657 -8656.02923281 -6173.56321028  -353.02067878]
------
Step:22, Action:West
State  288
Old Q Values:  [   83.81346657 -8656.02923281 -6173.56321028  -353.02067878]
New Q values:  [   83.81346657 -8656.02923281 -6173.56321028    77.01195922]
Reward: -1  Episode Reward:  28
xxxxx
x.  x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  729.4007691      0.         -1518.86550549     0.        ]
------
Step:23, Action:North
State  273
Old Q Values:  [  729.4007691      0.         -1518.86550549     0.        ]
New Q values:  [  292.77664358     0.         -1518.86550549     0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x.  x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[    5.38778645     0.         -5166.58487553     0.        ]
------
Step:24, Action:North
State  195
Old Q Values:  [ 30.44335764   0.         344.48474906   0.        ]
New Q values:  [ 45.51490127   0.         344.48474906   0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x.a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6           2.10733926  113.12519404    0.        ]
------
Step:25, Action:East
State  114
Old Q Values:  [-180.6           0.         1145.03833157    0.        ]
New Q values:  [-180.6           0.          608.24494157    0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x. ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6         368.99915555 -180.00807518  502.76536314]
------
Step:26, Action:West
State  130
Old Q Values:  [-180.6         368.99915555 -180.00807518  502.76536314]
New Q values:  [-180.6         368.99915555 -180.00807518  382.97962773]
Reward: -1  Episode Reward:  24
xxxxx
x.a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.          608.24494157    0.        ]
------
Step:27, Action:East
State  112
Old Q Values:  [0.         0.         3.23427096 0.        ]
New Q values:  [    0.             0.         23403.11597471     0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x.gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ -180.6        78008.07422108 -8652.84           0.        ]
------
Step:28, Action:South
State  136
Old Q Values:  [-6180.6           12.78090321  -179.38454759  -345.15807743]
New Q values:  [-6180.6          107.91111379  -179.38454759  -345.15807743]
Reward: -1  Episode Reward:  22
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5.60711409e+03  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
------
Step:29, Action:South
State  208
Old Q Values:  [  140.28871071    38.64872376 -2651.70614553 -3344.18956062]
New Q values:  [  140.28871071    40.00352947 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  21
xxxxx
x.g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   83.81346657 -8656.02923281 -6173.56321028    77.01195922]
------
Step:30, Action:North
State  288
Old Q Values:  [   83.81346657 -8656.02923281 -6173.56321028    77.01195922]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028    77.01195922]
Reward: -1  Episode Reward:  20
xxxxx
x. gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  140.28871071    40.00352947 -2651.70614553 -3344.18956062]
------
Step:31, Action:South
State  208
Old Q Values:  [  140.28871071    40.00352947 -2651.70614553 -3344.18956062]
New Q values:  [  140.28871071    38.50499956 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028    77.01195922]
------
Step:32, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028    77.01195922]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028   118.03777676]
Reward: -1  Episode Reward:  18
xxxxx
x. gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  292.77664358     0.         -1518.86550549     0.        ]
------
Step:33, Action:North
State  273
Old Q Values:  [  292.77664358     0.         -1518.86550549     0.        ]
New Q values:  [  118.12699336     0.         -1518.86550549     0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[    5.38778645     0.         -5166.58487553     0.        ]
------
Step:34, Action:North
State  192
Old Q Values:  [-3358.06354901    22.23326427 21691.29016672     0.        ]
New Q values:  [-1161.35193713    22.23326427 21691.29016672     0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.          608.24494157    0.        ]
------
Step:35, Action:East
State  114
Old Q Values:  [-180.6           0.          608.24494157    0.        ]
New Q values:  [-180.6           0.          357.59186495    0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6         368.99915555 -180.00807518  382.97962773]
------
Step:36, Action:West
State  130
Old Q Values:  [-180.6         368.99915555 -180.00807518  382.97962773]
New Q values:  [-180.6         368.99915555 -180.00807518  259.86941057]
Reward: -1  Episode Reward:  14
xxxxx
x.a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.          357.59186495    0.        ]
------
Step:37, Action:East
State  112
Old Q Values:  [    0.             0.         23403.11597471     0.        ]
New Q values:  [    0.             0.         32763.06865621     0.        ]
Reward: -1  Episode Reward:  13
xxxxx
x.gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ -180.6        78008.07422108 -8652.84           0.        ]
------
Step:38, Action:South
State  136
Old Q Values:  [-6180.6          107.91111379  -179.38454759  -345.15807743]
New Q values:  [-6180.6          145.96319802  -179.38454759  -345.15807743]
Reward: -1  Episode Reward:  12
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5.60711409e+03  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
------
Step:39, Action:South
State  208
Old Q Values:  [  140.28871071    38.50499956 -2651.70614553 -3344.18956062]
New Q values:  [  140.28871071    50.21333285 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028   118.03777676]
------
Step:40, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028   118.03777676]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028   305.19212615]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 861.92338482 -180.6         299.73930728    8.424     ]
------
Step:41, Action:East
State  273
Old Q Values:  [  118.12699336     0.         -1518.86550549     0.        ]
New Q values:  [ 118.12699336    0.         -516.58856435    0.        ]
Reward: -1  Episode Reward:  9
xxxxx
x.  x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028   305.19212615]
------
Step:42, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028   305.19212615]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028   380.0538659 ]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 861.92338482 -180.6         299.73930728    8.424     ]
------
Step:43, Action:East
State  272
Old Q Values:  [ 861.92338482 -180.6         299.73930728    8.424     ]
New Q values:  [ 861.92338482 -180.6         233.31188268    8.424     ]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028   380.0538659 ]
------
Step:44, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028   380.0538659 ]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028   186.85964437]
Reward: -1  Episode Reward:  6
xxxxx
x. gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 118.12699336    0.         -516.58856435    0.        ]
------
Step:45, Action:North
State  272
Old Q Values:  [ 861.92338482 -180.6         233.31188268    8.424     ]
New Q values:  [6851.55640394 -180.6         233.31188268    8.424     ]
Reward: -1  Episode Reward:  5
xxxxx
x.g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[-1161.35193713    22.23326427 21691.29016672     0.        ]
------
Step:46, Action:East
State  192
Old Q Values:  [-1161.35193713    22.23326427 21691.29016672     0.        ]
New Q values:  [-1161.35193713    22.23326427  8718.0026799      0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x.  x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  140.28871071    50.21333285 -2651.70614553 -3344.18956062]
------
Step:47, Action:North
State  208
Old Q Values:  [  140.28871071    50.21333285 -2651.70614553 -3344.18956062]
New Q values:  [23457.93775061    50.21333285 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  3
xxxxx
x.gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ -180.6        78008.07422108 -8652.84           0.        ]
------
Step:48, Action:South
State  136
Old Q Values:  [-6180.6          145.96319802  -179.38454759  -345.15807743]
New Q values:  [-6180.6          161.18403171  -179.38454759  -345.15807743]
Reward: -1  Episode Reward:  2
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5.60711409e+03  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
------
Step:49, Action:South
State  208
Old Q Values:  [23457.93775061    50.21333285 -2651.70614553 -3344.18956062]
New Q values:  [23457.93775061    75.54322645 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  1
xxxxx
x.g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028   186.85964437]
------
Step:50, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028   186.85964437]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028  2129.61077893]
Reward: -1  Episode Reward:  0
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[6851.55640394 -180.6         233.31188268    8.424     ]
------
Step:51, Action:North
State  272
Old Q Values:  [6851.55640394 -180.6         233.31188268    8.424     ]
New Q values:  [2740.76575195 -180.6         233.31188268    8.424     ]
Reward: -1  Episode Reward:  -1
xxxxx
xg  x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 2.47730124  0.          2.46857876 -0.84      ]
------
Step:52, Action:North
State  192
Old Q Values:  [-1161.35193713    22.23326427  8718.0026799      0.        ]
New Q values:  [-357.86321537   22.23326427 8718.0026799     0.        ]
Reward: -1  Episode Reward:  -2
xxxxx
x.a x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.          357.59186495    0.        ]
------
Step:53, Action:East
State  114
Old Q Values:  [-180.6           0.          357.59186495    0.        ]
New Q values:  [-180.6           0.          253.13649264    0.        ]
Reward: -1  Episode Reward:  -3
xxxxx
x. ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6         368.99915555 -180.00807518  259.86941057]
------
Step:54, Action:South
State  130
Old Q Values:  [-180.6         368.99915555 -180.00807518  259.86941057]
New Q values:  [-180.6        7184.3809874  -180.00807518  259.86941057]
Reward: -1  Episode Reward:  -4
xxxxx
x.  x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23457.93775061    75.54322645 -2651.70614553 -3344.18956062]
------
Step:55, Action:North
State  210
Old Q Values:  [ 359.60357426   13.44032833 -180.6           3.07790274]
New Q values:  [2298.55572593   13.44032833 -180.6           3.07790274]
Reward: -1  Episode Reward:  -5
xxxxx
x. ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        7184.3809874  -180.00807518  259.86941057]
------
Step:56, Action:South
State  130
Old Q Values:  [-180.6        7184.3809874  -180.00807518  259.86941057]
New Q values:  [-180.6        3562.71911274 -180.00807518  259.86941057]
Reward: -1  Episode Reward:  -6
xxxxx
x.  x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2298.55572593   13.44032833 -180.6           3.07790274]
------
Step:57, Action:North
State  210
Old Q Values:  [2298.55572593   13.44032833 -180.6           3.07790274]
New Q values:  [1987.63802419   13.44032833 -180.6           3.07790274]
Reward: -1  Episode Reward:  -7
xxxxx
x. ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        3562.71911274 -180.00807518  259.86941057]
------
Step:58, Action:South
State  130
Old Q Values:  [-180.6        3562.71911274 -180.00807518  259.86941057]
New Q values:  [-180.6        2020.77905235 -180.00807518  259.86941057]
Reward: -1  Episode Reward:  -8
xxxxx
x.  x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1987.63802419   13.44032833 -180.6           3.07790274]
------
Step:59, Action:North
State  210
Old Q Values:  [1987.63802419   13.44032833 -180.6           3.07790274]
New Q values:  [1400.68892538   13.44032833 -180.6           3.07790274]
Reward: -1  Episode Reward:  -9
xxxxx
x. ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        2020.77905235 -180.00807518  259.86941057]
------
Step:60, Action:South
State  130
Old Q Values:  [-180.6        2020.77905235 -180.00807518  259.86941057]
New Q values:  [-180.6        1227.91829856 -180.00807518  259.86941057]
Reward: -1  Episode Reward:  -10
xxxxx
x.  x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1400.68892538   13.44032833 -180.6           3.07790274]
------
Step:61, Action:North
State  210
Old Q Values:  [1400.68892538   13.44032833 -180.6           3.07790274]
New Q values:  [ 928.05105972   13.44032833 -180.6           3.07790274]
Reward: -1  Episode Reward:  -11
xxxxx
x. ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        1227.91829856 -180.00807518  259.86941057]
------
Step:62, Action:South
State  130
Old Q Values:  [-180.6        1227.91829856 -180.00807518  259.86941057]
New Q values:  [-180.6         768.98263734 -180.00807518  259.86941057]
Reward: -1  Episode Reward:  -12
xxxxx
x.  x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 928.05105972   13.44032833 -180.6           3.07790274]
------
Step:63, Action:North
State  210
Old Q Values:  [ 928.05105972   13.44032833 -180.6           3.07790274]
New Q values:  [ 601.31521509   13.44032833 -180.6           3.07790274]
Reward: -1  Episode Reward:  -13
xxxxx
x. ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6         768.98263734 -180.00807518  259.86941057]
------
Step:64, Action:South
State  130
Old Q Values:  [-180.6         768.98263734 -180.00807518  259.86941057]
New Q values:  [-180.6        7344.37438012 -180.00807518  259.86941057]
Reward: -1  Episode Reward:  -14
xxxxx
x.  x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23457.93775061    75.54322645 -2651.70614553 -3344.18956062]
------
Step:65, Action:North
State  208
Old Q Values:  [23457.93775061    75.54322645 -2651.70614553 -3344.18956062]
New Q values:  [11585.88741428    75.54322645 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -15
xxxxx
x. ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        7344.37438012 -180.00807518  259.86941057]
------
Step:66, Action:South
State  130
Old Q Values:  [-180.6        7344.37438012 -180.00807518  259.86941057]
New Q values:  [-180.6         412.91597633 -180.00807518  259.86941057]
Reward: -10001  Episode Reward:  -10016
xxxxx
x.  x
x  gx
x.  x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11585.88741428    75.54322645 -2651.70614553 -3344.18956062]
------
Step:1, Action:North
State  216
Old Q Values:  [-5.60711409e+03  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [-2.18909043e+03  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
Reward: 9  Episode Reward:  9
xxxxx
xg.ax
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          161.18403171  -179.38454759  -345.15807743]
------
Step:2, Action:South
State  138
Old Q Values:  [ -180.6        -2295.6066244   -180.6            6.01270445]
New Q values:  [-180.6        2556.92357452 -180.6           6.01270445]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11585.88741428    75.54322645 -2651.70614553 -3344.18956062]
------
Step:3, Action:North
State  216
Old Q Values:  [-2.18909043e+03  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [-1.09159099e+02  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2556.92357452 -180.6           6.01270445]
------
Step:4, Action:South
State  138
Old Q Values:  [-180.6        2556.92357452 -180.6           6.01270445]
New Q values:  [-180.6        1202.56399434 -180.6           6.01270445]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 601.31521509   13.44032833 -180.6           3.07790274]
------
Step:5, Action:North
State  210
Old Q Values:  [ 601.31521509   13.44032833 -180.6           3.07790274]
New Q values:  [ 600.69528434   13.44032833 -180.6           3.07790274]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1202.56399434 -180.6           6.01270445]
------
Step:6, Action:South
State  138
Old Q Values:  [-180.6        1202.56399434 -180.6           6.01270445]
New Q values:  [-180.6        3956.19182202 -180.6           6.01270445]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11585.88741428    75.54322645 -2651.70614553 -3344.18956062]
------
Step:7, Action:North
State  216
Old Q Values:  [-1.09159099e+02  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 4.09156992e+00  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
Reward: -1  Episode Reward:  3
xxxxx
xg.ax
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          161.18403171  -179.38454759  -345.15807743]
------
Step:8, Action:South
State  136
Old Q Values:  [-6180.6          161.18403171  -179.38454759  -345.15807743]
New Q values:  [-6180.6          167.27236519  -179.38454759  -345.15807743]
Reward: -1  Episode Reward:  2
xxxxx
x.g x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4.09156992e+00  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
------
Step:9, Action:South
State  208
Old Q Values:  [11585.88741428    75.54322645 -2651.70614553 -3344.18956062]
New Q values:  [11585.88741428   674.50052426 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  11
xxxxx
x.. x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028  2129.61077893]
------
Step:10, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028  2129.61077893]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028  1679.47403716]
Reward: 9  Episode Reward:  20
xxxxx
x.g x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[2740.76575195 -180.6         233.31188268    8.424     ]
------
Step:11, Action:North
State  272
Old Q Values:  [2740.76575195 -180.6         233.31188268    8.424     ]
New Q values:  [-2282.89289525  -180.6          233.31188268     8.424     ]
Reward: -9991  Episode Reward:  -9971
xxxxx
x.. x
x g x
x.  x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-1.80600000e+02  1.48226480e+00 -2.78131338e+03 -1.80600000e+02]
------
Step:1, Action:South
State  109
Old Q Values:  [-1.80600000e+02  1.48226480e+00 -2.78131338e+03 -1.80600000e+02]
New Q values:  [ -180.6            8.44656445 -2781.31337986  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x .gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    8.17886174 -445.90112872 -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [  -0.94832081    8.17886174 -445.90112872 -180.6       ]
New Q values:  [  -0.94832081    9.06157686 -445.90112872 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    1.30010722  -289.59534477 -1299.12168416  -180.6       ]
------
Step:3, Action:North
State  261
Old Q Values:  [    1.30010722  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [    2.63851595  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    9.06157686 -445.90112872 -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [  -0.94832081    9.06157686 -445.90112872 -180.6       ]
New Q values:  [  -0.94832081    3.81618553 -445.90112872 -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x . x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    2.63851595  -289.59534477 -1299.12168416  -180.6       ]
------
Step:5, Action:North
State  261
Old Q Values:  [    2.63851595  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [    3.52604946  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
x . x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.1803238   3.17003324 10.23547692  0.        ]
------
Step:6, Action:East
State  189
Old Q Values:  [ 4.56529296e+00  2.17469001e+00 -2.39590675e+03 -1.80600000e+02]
New Q values:  [   4.56529296    2.17469001 -951.34636588 -180.6       ]
Reward: 9  Episode Reward:  24
xxxxx
x . x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[    5.38778645     0.         -5166.58487553     0.        ]
------
Step:7, Action:North
State  196
Old Q Values:  [-2398.16618312   650.23146405     0.             0.        ]
New Q values:  [-953.52769933  650.23146405    0.            0.        ]
Reward: 9  Episode Reward:  33
xxxxx
x a x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[0.        0.        1.1292464 0.       ]
------
Step:8, Action:East
State  124
Old Q Values:  [0.         5.4        0.61545241 0.91053821]
New Q values:  [ 0.          5.4        49.82789052  0.91053821]
Reward: -1  Episode Reward:  32
xxxxx
x gax
x  .x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          167.27236519  -179.38454759  -345.15807743]
------
Step:9, Action:South
State  138
Old Q Values:  [-180.6        3956.19182202 -180.6           6.01270445]
New Q values:  [-180.6        1691.27548131 -180.6           6.01270445]
Reward: 9  Episode Reward:  41
xxxxx
x   x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4.09156992e+00  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
------
Step:10, Action:South
State  216
Old Q Values:  [ 4.09156992e+00  3.44662508e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 4.09156992e+00  6.47107214e+02 -6.17035694e+03  6.08663514e+00]
Reward: 9  Episode Reward:  50
xxxxx
x   x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028  1679.47403716]
------
Step:11, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028  1679.47403716]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028 60747.18317967]
Reward: 100009  Episode Reward:  100059
xxxxx
x   x
x g x
x a x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11585.88741428   674.50052426 -2651.70614553 -3344.18956062]
------
Step:1, Action:North
State  216
Old Q Values:  [ 4.09156992e+00  6.47107214e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 5.14419272e+02  6.47107214e+02 -6.17035694e+03  6.08663514e+00]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1691.27548131 -180.6           6.01270445]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6        1691.27548131 -180.6           6.01270445]
New Q values:  [-180.6         856.11877782 -180.6           6.01270445]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 600.69528434   13.44032833 -180.6           3.07790274]
------
Step:3, Action:North
State  216
Old Q Values:  [ 5.14419272e+02  6.47107214e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 4.62003342e+02  6.47107214e+02 -6.17035694e+03  6.08663514e+00]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         856.11877782 -180.6           6.01270445]
------
Step:4, Action:South
State  138
Old Q Values:  [-180.6         856.11877782 -180.6           6.01270445]
New Q values:  [-180.6         535.97967547 -180.6           6.01270445]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4.62003342e+02  6.47107214e+02 -6.17035694e+03  6.08663514e+00]
------
Step:5, Action:South
State  210
Old Q Values:  [ 600.69528434   13.44032833 -180.6           3.07790274]
New Q values:  [ 6.00695284e+02  1.82349311e+04 -1.80600000e+02  3.07790274e+00]
Reward: 9  Episode Reward:  15
xxxxx
x.  x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028 60747.18317967]
------
Step:6, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028 60747.18317967]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028 24374.26683667]
Reward: 9  Episode Reward:  24
xxxxx
x.  x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2282.89289525  -180.6          233.31188268     8.424     ]
------
Step:7, Action:East
State  272
Old Q Values:  [-2282.89289525  -180.6          233.31188268     8.424     ]
New Q values:  [-2282.89289525  -180.6         7405.00480407     8.424     ]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028 24374.26683667]
------
Step:8, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028 24374.26683667]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028 11970.60817589]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2282.89289525  -180.6         7405.00480407     8.424     ]
------
Step:9, Action:East
State  272
Old Q Values:  [-2282.89289525  -180.6         7405.00480407     8.424     ]
New Q values:  [-2282.89289525  -180.6         6552.5843744      8.424     ]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028 11970.60817589]
------
Step:10, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028 11970.60817589]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028  4823.08136837]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 118.12699336    0.         -516.58856435    0.        ]
------
Step:11, Action:North
State  272
Old Q Values:  [-2282.89289525  -180.6         6552.5843744      8.424     ]
New Q values:  [-4292.35635413  -180.6         6552.5843744      8.424     ]
Reward: -9991  Episode Reward:  -9971
xxxxx
x.  x
x.g x
x.  x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            8.44656445 -2781.31337986  -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [ 0.          5.92204016 48.93406917  0.        ]
New Q values:  [ 0.          8.91367172 48.93406917  0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x  .x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    3.81618553 -445.90112872 -180.6       ]
------
Step:2, Action:South
State  180
Old Q Values:  [-3.43106190e+03 -2.06575753e+02  3.43929072e-01  0.00000000e+00]
New Q values:  [-3.43106190e+03 -8.00903410e+02  3.43929072e-01  0.00000000e+00]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2412.24369512 -6457.4598     -6000.6        -6307.02      ]
------
Step:3, Action:East
State  260
Old Q Values:  [-2412.24369512 -6457.4598     -6000.6        -6307.02      ]
New Q values:  [-2412.24369512 -6457.4598     -1562.16328996 -6307.02      ]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  38.43571403 -180.6        2775.58903347   -6.75709867]
------
Step:4, Action:East
State  276
Old Q Values:  [  38.43571403 -180.6        2775.58903347   -6.75709867]
New Q values:  [  38.43571403 -180.6        2562.5600239    -6.75709867]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028  4823.08136837]
------
Step:5, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028  4823.08136837]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028  2697.40055452]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x ..x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  38.43571403 -180.6        2562.5600239    -6.75709867]
------
Step:6, Action:East
State  276
Old Q Values:  [  38.43571403 -180.6        2562.5600239    -6.75709867]
New Q values:  [  38.43571403 -180.6        1833.64417591   -6.75709867]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028  2697.40055452]
------
Step:7, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028  2697.40055452]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028  1628.45347458]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
x ..x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  38.43571403 -180.6        1833.64417591   -6.75709867]
------
Step:8, Action:East
State  276
Old Q Values:  [  38.43571403 -180.6        1833.64417591   -6.75709867]
New Q values:  [  38.43571403 -180.6        1221.39371274   -6.75709867]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028  1628.45347458]
------
Step:9, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028  1628.45347458]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028  1017.19950365]
Reward: -1  Episode Reward:  31
xxxxx
xg .x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  38.43571403 -180.6        1221.39371274   -6.75709867]
------
Step:10, Action:East
State  276
Old Q Values:  [  38.43571403 -180.6        1221.39371274   -6.75709867]
New Q values:  [  38.43571403 -180.6         793.11733619   -6.75709867]
Reward: -1  Episode Reward:  30
xxxxx
x g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028  1017.19950365]
------
Step:11, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028  1017.19950365]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028   441.71789947]
Reward: -1  Episode Reward:  29
xxxxx
x  gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 118.12699336    0.         -516.58856435    0.        ]
------
Step:12, Action:North
State  277
Old Q Values:  [-1.46880000e-01  0.00000000e+00  3.02996406e+03  0.00000000e+00]
New Q values:  [   5.41827       0.         3029.96406335    0.        ]
Reward: 9  Episode Reward:  38
xxxxx
x  .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[0.13244338 0.25674    0.         0.        ]
------
Step:13, Action:South
State  199
Old Q Values:  [-6.00000000e-01  9.14389219e+02  0.00000000e+00  0.00000000e+00]
New Q values:  [-6.00000000e-01  1.27414491e+03  0.00000000e+00  0.00000000e+00]
Reward: -1  Episode Reward:  37
xxxxx
x  .x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   5.41827       0.         3029.96406335    0.        ]
------
Step:14, Action:North
State  276
Old Q Values:  [  38.43571403 -180.6         793.11733619   -6.75709867]
New Q values:  [  14.77428561 -180.6         793.11733619   -6.75709867]
Reward: -1  Episode Reward:  36
xxxxx
x  .x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.7887208e-01 -5.9946000e+03  0.0000000e+00  0.0000000e+00]
------
Step:15, Action:East
State  199
Old Q Values:  [-6.00000000e-01  1.27414491e+03  0.00000000e+00  0.00000000e+00]
New Q values:  [-6.00000000e-01  1.27414491e+03  5.47587933e+03  0.00000000e+00]
Reward: 9  Episode Reward:  45
xxxxx
x  .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6.00695284e+02  1.82349311e+04 -1.80600000e+02  3.07790274e+00]
------
Step:16, Action:North
State  208
Old Q Values:  [11585.88741428   674.50052426 -2651.70614553 -3344.18956062]
New Q values:  [64763.62975861   674.50052426 -2651.70614553 -3344.18956062]
Reward: 100009  Episode Reward:  100054
xxxxx
x  ax
x  gx
x   x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028   441.71789947]
------
Step:1, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028   441.71789947]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028  2147.86247211]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-4292.35635413  -180.6         6552.5843744      8.424     ]
------
Step:2, Action:East
State  272
Old Q Values:  [-4292.35635413  -180.6         6552.5843744      8.424     ]
New Q values:  [-4292.35635413  -180.6         3264.79249139     8.424     ]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028  2147.86247211]
------
Step:3, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028  2147.86247211]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028   893.98308685]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 118.12699336    0.         -516.58856435    0.        ]
------
Step:4, Action:North
State  273
Old Q Values:  [ 118.12699336    0.         -516.58856435    0.        ]
New Q values:  [  54.26713328    0.         -516.58856435    0.        ]
Reward: 9  Episode Reward:  16
xxxxx
x .gx
x.a.x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[    5.38778645     0.         -5166.58487553     0.        ]
------
Step:5, Action:North
State  196
Old Q Values:  [-953.52769933  650.23146405    0.            0.        ]
New Q values:  [-6340.18357123   650.23146405     0.             0.        ]
Reward: -9991  Episode Reward:  -9975
xxxxx
x g.x
x. .x
x.  x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[ 0.          0.         41.53651403  0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [-0.1803238   3.17003324 10.23547692  0.        ]
New Q values:  [-0.1803238   3.17003324 25.57831783  0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6         2.56078987 53.61375687  0.        ]
------
Step:2, Action:East
State  194
Old Q Values:  [-0.6         2.56078987 53.61375687  0.        ]
New Q values:  [-6.00000000e-01  2.56078987e+00  5.49732483e+03  0.00000000e+00]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6.00695284e+02  1.82349311e+04 -1.80600000e+02  3.07790274e+00]
------
Step:3, Action:South
State  208
Old Q Values:  [64763.62975861   674.50052426 -2651.70614553 -3344.18956062]
New Q values:  [64763.62975861   543.39513576 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028   893.98308685]
------
Step:4, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028   893.98308685]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028  1342.43098216]
Reward: 9  Episode Reward:  36
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-4292.35635413  -180.6         3264.79249139     8.424     ]
------
Step:5, Action:East
State  272
Old Q Values:  [-4292.35635413  -180.6         3264.79249139     8.424     ]
New Q values:  [-4292.35635413  -180.6         1708.0462912      8.424     ]
Reward: -1  Episode Reward:  35
xxxxx
x...x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028  1342.43098216]
------
Step:6, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028  1342.43098216]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028  1048.78628022]
Reward: -1  Episode Reward:  34
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-4292.35635413  -180.6         1708.0462912      8.424     ]
------
Step:7, Action:East
State  272
Old Q Values:  [-4292.35635413  -180.6         1708.0462912      8.424     ]
New Q values:  [-4292.35635413  -180.6          997.25440055     8.424     ]
Reward: -1  Episode Reward:  33
xxxxx
x...x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028  1048.78628022]
------
Step:8, Action:West
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028  1048.78628022]
New Q values:  [   75.01199984 -8656.02923281 -6173.56321028   718.09083225]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-4292.35635413  -180.6          997.25440055     8.424     ]
------
Step:9, Action:East
State  272
Old Q Values:  [-4292.35635413  -180.6          997.25440055     8.424     ]
New Q values:  [-4292.35635413  -180.6          613.7290099      8.424     ]
Reward: -1  Episode Reward:  31
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   75.01199984 -8656.02923281 -6173.56321028   718.09083225]
------
Step:10, Action:North
State  288
Old Q Values:  [   75.01199984 -8656.02923281 -6173.56321028   718.09083225]
New Q values:  [ 5499.88412551 -8656.02923281 -6173.56321028   718.09083225]
Reward: -1  Episode Reward:  30
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6.00695284e+02  1.82349311e+04 -1.80600000e+02  3.07790274e+00]
------
Step:11, Action:North
State  208
Old Q Values:  [64763.62975861   543.39513576 -2651.70614553 -3344.18956062]
New Q values:  [26034.72669634   543.39513576 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  39
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6         412.91597633 -180.00807518  259.86941057]
------
Step:12, Action:West
State  130
Old Q Values:  [-180.6         412.91597633 -180.00807518  259.86941057]
New Q values:  [-180.6         412.91597633 -180.00807518  185.28871202]
Reward: 9  Episode Reward:  48
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.          253.13649264    0.        ]
------
Step:13, Action:East
State  112
Old Q Values:  [    0.             0.         32763.06865621     0.        ]
New Q values:  [    0.             0.         36507.04972881     0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ -180.6        78008.07422108 -8652.84           0.        ]
------
Step:14, Action:South
State  128
Old Q Values:  [ -180.6        78008.07422108 -8652.84           0.        ]
New Q values:  [ -180.6        39013.04769733 -8652.84           0.        ]
Reward: -1  Episode Reward:  46
xxxxx
x. gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26034.72669634   543.39513576 -2651.70614553 -3344.18956062]
------
Step:15, Action:South
State  208
Old Q Values:  [26034.72669634   543.39513576 -2651.70614553 -3344.18956062]
New Q values:  [26034.72669634  1866.72329196 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  45
xxxxx
x.  x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5499.88412551 -8656.02923281 -6173.56321028   718.09083225]
------
Step:16, Action:West
State  288
Old Q Values:  [ 5499.88412551 -8656.02923281 -6173.56321028   718.09083225]
New Q values:  [ 5499.88412551 -8656.02923281 -6173.56321028   470.75503587]
Reward: -1  Episode Reward:  44
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-4292.35635413  -180.6          613.7290099      8.424     ]
------
Step:17, Action:East
State  272
Old Q Values:  [-4292.35635413  -180.6          613.7290099      8.424     ]
New Q values:  [-4292.35635413  -180.6         1894.85684161     8.424     ]
Reward: -1  Episode Reward:  43
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5499.88412551 -8656.02923281 -6173.56321028   470.75503587]
------
Step:18, Action:North
State  288
Old Q Values:  [ 5499.88412551 -8656.02923281 -6173.56321028   470.75503587]
New Q values:  [10009.77165911 -8656.02923281 -6173.56321028   470.75503587]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26034.72669634  1866.72329196 -2651.70614553 -3344.18956062]
------
Step:19, Action:North
State  208
Old Q Values:  [26034.72669634  1866.72329196 -2651.70614553 -3344.18956062]
New Q values:  [22117.20498774  1866.72329196 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  41
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ -180.6        39013.04769733 -8652.84           0.        ]
------
Step:20, Action:South
State  128
Old Q Values:  [ -180.6        39013.04769733 -8652.84           0.        ]
New Q values:  [ -180.6        22239.78057525 -8652.84           0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x. gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22117.20498774  1866.72329196 -2651.70614553 -3344.18956062]
------
Step:21, Action:South
State  208
Old Q Values:  [22117.20498774  1866.72329196 -2651.70614553 -3344.18956062]
New Q values:  [22117.20498774  3749.02081451 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  39
xxxxx
x.  x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10009.77165911 -8656.02923281 -6173.56321028   470.75503587]
------
Step:22, Action:West
State  288
Old Q Values:  [10009.77165911 -8656.02923281 -6173.56321028   470.75503587]
New Q values:  [10009.77165911 -8656.02923281 -6173.56321028   203.98215433]
Reward: -1  Episode Reward:  38
xxxxx
x.  x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  54.26713328    0.         -516.58856435    0.        ]
------
Step:23, Action:North
State  273
Old Q Values:  [  54.26713328    0.         -516.58856435    0.        ]
New Q values:  [  22.72318925    0.         -516.58856435    0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[    5.38778645     0.         -5166.58487553     0.        ]
------
Step:24, Action:North
State  193
Old Q Values:  [    5.38778645     0.         -5166.58487553     0.        ]
New Q values:  [   80.95658411     0.         -5166.58487553     0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x.agx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:25, Action:South
State  115
Old Q Values:  [-180.6           2.10733926  113.12519404    0.        ]
New Q values:  [-180.6          24.52991094  113.12519404    0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   80.95658411     0.         -5166.58487553     0.        ]
------
Step:26, Action:North
State  195
Old Q Values:  [ 45.51490127   0.         344.48474906   0.        ]
New Q values:  [ 51.54351872   0.         344.48474906   0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          24.52991094  113.12519404    0.        ]
------
Step:27, Action:East
State  114
Old Q Values:  [-180.6           0.          253.13649264    0.        ]
New Q values:  [-180.6           0.          224.52938996    0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6         412.91597633 -180.00807518  185.28871202]
------
Step:28, Action:South
State  130
Old Q Values:  [-180.6         412.91597633 -180.00807518  185.28871202]
New Q values:  [-180.6        5635.0457161  -180.00807518  185.28871202]
Reward: -1  Episode Reward:  32
xxxxx
x.  x
x  ax
x  gx
xxxxx
Step:29, Action:North
State  210
Old Q Values:  [ 6.00695284e+02  1.82349311e+04 -1.80600000e+02  3.07790274e+00]
New Q values:  [ 1.93019183e+03  1.82349311e+04 -1.80600000e+02  3.07790274e+00]
Reward: -1  Episode Reward:  31
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        5635.0457161  -180.00807518  185.28871202]
------
Step:30, Action:South
State  130
Old Q Values:  [-180.6        5635.0457161  -180.00807518  185.28871202]
New Q values:  [-180.6        8888.57978276 -180.00807518  185.28871202]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22117.20498774  3749.02081451 -2651.70614553 -3344.18956062]
------
Step:31, Action:North
State  208
Old Q Values:  [22117.20498774  3749.02081451 -2651.70614553 -3344.18956062]
New Q values:  [15518.21616767  3749.02081451 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  29
xxxxx
x.gax
x   x
x   x
xxxxx
Step:32, Action:North
State  128
Old Q Values:  [ -180.6        22239.78057525 -8652.84           0.        ]
New Q values:  [ 6419.09417258 22239.78057525 -8652.84           0.        ]
Reward: -301  Episode Reward:  -272
xxxxx
xg ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 6419.09417258 22239.78057525 -8652.84           0.        ]
------
Step:33, Action:South
State  128
Old Q Values:  [ 6419.09417258 22239.78057525 -8652.84           0.        ]
New Q values:  [ 6419.09417258 13550.7770804  -8652.84           0.        ]
Reward: -1  Episode Reward:  -273
xxxxx
xg  x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15518.21616767  3749.02081451 -2651.70614553 -3344.18956062]
------
Step:34, Action:North
State  208
Old Q Values:  [15518.21616767  3749.02081451 -2651.70614553 -3344.18956062]
New Q values:  [ 8873.2604019   3749.02081451 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -274
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        8888.57978276 -180.00807518  185.28871202]
------
Step:35, Action:South
State  128
Old Q Values:  [ 6419.09417258 13550.7770804  -8652.84           0.        ]
New Q values:  [ 6419.09417258  8081.68895273 -8652.84           0.        ]
Reward: -1  Episode Reward:  -275
xxxxx
xg  x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8873.2604019   3749.02081451 -2651.70614553 -3344.18956062]
------
Step:36, Action:North
State  208
Old Q Values:  [ 8873.2604019   3749.02081451 -2651.70614553 -3344.18956062]
New Q values:  [ 5973.21084658  3749.02081451 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -276
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 6419.09417258  8081.68895273 -8652.84           0.        ]
------
Step:37, Action:South
State  128
Old Q Values:  [ 6419.09417258  8081.68895273 -8652.84           0.        ]
New Q values:  [ 6419.09417258  5024.03883507 -8652.84           0.        ]
Reward: -1  Episode Reward:  -277
xxxxx
x. gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5973.21084658  3749.02081451 -2651.70614553 -3344.18956062]
------
Step:38, Action:South
State  208
Old Q Values:  [ 5973.21084658  3749.02081451 -2651.70614553 -3344.18956062]
New Q values:  [ 5973.21084658  4501.93982354 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -278
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10009.77165911 -8656.02923281 -6173.56321028   203.98215433]
------
Step:39, Action:North
State  288
Old Q Values:  [10009.77165911 -8656.02923281 -6173.56321028   203.98215433]
New Q values:  [ 5795.27191762 -8656.02923281 -6173.56321028   203.98215433]
Reward: -1  Episode Reward:  -279
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5973.21084658  4501.93982354 -2651.70614553 -3344.18956062]
------
Step:40, Action:North
State  208
Old Q Values:  [ 5973.21084658  4501.93982354 -2651.70614553 -3344.18956062]
New Q values:  [ 5055.25827346  4501.93982354 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -280
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        8888.57978276 -180.00807518  185.28871202]
------
Step:41, Action:West
State  130
Old Q Values:  [-180.6        8888.57978276 -180.00807518  185.28871202]
New Q values:  [-180.6        8888.57978276 -180.00807518  140.8743018 ]
Reward: -1  Episode Reward:  -281
xxxxx
x.a x
x g x
x   x
xxxxx
Step:42, Action:West
State  112
Old Q Values:  [    0.             0.         36507.04972881     0.        ]
New Q values:  [    0.             0.         36507.04972881 60005.4       ]
Reward: 100009  Episode Reward:  99728
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    3.81618553 -445.90112872 -180.6       ]
------
Step:1, Action:South
State  181
Old Q Values:  [  -0.94832081    3.81618553 -445.90112872 -180.6       ]
New Q values:  [  -0.94832081    7.98428905 -445.90112872 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    3.52604946  -289.59534477 -1299.12168416  -180.6       ]
------
Step:2, Action:North
State  260
Old Q Values:  [-2412.24369512 -6457.4598     -1562.16328996 -6307.02      ]
New Q values:  [-6965.39429933 -6457.4598     -1562.16328996 -6307.02      ]
Reward: -10001  Episode Reward:  -9992
xxxxx
x...x
xg. x
x ..x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5795.27191762 -8656.02923281 -6173.56321028   203.98215433]
------
Step:1, Action:North
State  288
Old Q Values:  [ 5795.27191762 -8656.02923281 -6173.56321028   203.98215433]
New Q values:  [ 7793.98809262 -8656.02923281 -6173.56321028   203.98215433]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.93019183e+03  1.82349311e+04 -1.80600000e+02  3.07790274e+00]
------
Step:2, Action:North
State  208
Old Q Values:  [ 5055.25827346  4501.93982354 -2651.70614553 -3344.18956062]
New Q values:  [ 2188.29721203  4501.93982354 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         535.97967547 -180.6           6.01270445]
------
Step:3, Action:West
State  136
Old Q Values:  [-6180.6          167.27236519  -179.38454759  -345.15807743]
New Q values:  [-6180.6          167.27236519  -179.38454759  -131.19672001]
Reward: 9  Episode Reward:  27
xxxxx
x.agx
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.60215563e+03  4.88836988e+00]
------
Step:4, Action:West
State  123
Old Q Values:  [ -284.31459256 -6000.6           79.65792356    12.08665543]
New Q values:  [ -284.31459256 -6000.6           79.65792356    10.97401097]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-2.52351696e+02  2.46449597e+00 -2.58259038e-02 -2.52781922e+02]
------
Step:5, Action:South
State  107
Old Q Values:  [-2.52351696e+02  2.46449597e+00 -2.58259038e-02 -2.52781922e+02]
New Q values:  [-2.52351696e+02  1.40592937e+01 -2.58259038e-02 -2.52781922e+02]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xa. x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[-0.1803238   3.17003324 25.57831783  0.        ]
------
Step:6, Action:East
State  187
Old Q Values:  [-0.11058345  0.         13.89361422  0.        ]
New Q values:  [-0.11058345  0.         11.22674742  0.        ]
Reward: 9  Episode Reward:  54
xxxxx
x   x
x a x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[0.89767243 0.         0.         0.        ]
------
Step:7, Action:North
State  201
Old Q Values:  [1.47599573 0.         0.         0.024     ]
New Q values:  [23.88777536  0.          0.          0.024     ]
Reward: -1  Episode Reward:  53
xxxxx
x a x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6           79.65792356    10.97401097]
------
Step:8, Action:East
State  121
Old Q Values:  [ 0.00000000e+00  0.00000000e+00 -9.60215563e+03  4.88836988e+00]
New Q values:  [ 0.00000000e+00  0.00000000e+00 -9.79128054e+03  4.88836988e+00]
Reward: -10001  Episode Reward:  -9948
xxxxx
x  gx
x   x
x.  x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7793.98809262 -8656.02923281 -6173.56321028   203.98215433]
------
Step:1, Action:West
State  288
Old Q Values:  [ 7793.98809262 -8656.02923281 -6173.56321028   203.98215433]
New Q values:  [ 7793.98809262 -8656.02923281 -6173.56321028   655.44991422]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-4292.35635413  -180.6         1894.85684161     8.424     ]
------
Step:2, Action:East
State  272
Old Q Values:  [-4292.35635413  -180.6         1894.85684161     8.424     ]
New Q values:  [-4292.35635413  -180.6         3095.53916443     8.424     ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7793.98809262 -8656.02923281 -6173.56321028   655.44991422]
------
Step:3, Action:North
State  288
Old Q Values:  [ 7793.98809262 -8656.02923281 -6173.56321028   655.44991422]
New Q values:  [ 4467.57718411 -8656.02923281 -6173.56321028   655.44991422]
Reward: -1  Episode Reward:  7
xxxxx
xg..x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2188.29721203  4501.93982354 -2651.70614553 -3344.18956062]
------
Step:4, Action:South
State  208
Old Q Values:  [ 2188.29721203  4501.93982354 -2651.70614553 -3344.18956062]
New Q values:  [ 2188.29721203  3140.44908465 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  6
xxxxx
x.g.x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4467.57718411 -8656.02923281 -6173.56321028   655.44991422]
------
Step:5, Action:North
State  288
Old Q Values:  [ 4467.57718411 -8656.02923281 -6173.56321028   655.44991422]
New Q values:  [ 2728.56559904 -8656.02923281 -6173.56321028   655.44991422]
Reward: -1  Episode Reward:  5
xxxxx
x..gx
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2188.29721203  3140.44908465 -2651.70614553 -3344.18956062]
------
Step:6, Action:South
State  208
Old Q Values:  [ 2188.29721203  3140.44908465 -2651.70614553 -3344.18956062]
New Q values:  [ 2188.29721203  2074.14931357 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  4
xxxxx
x...x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2728.56559904 -8656.02923281 -6173.56321028   655.44991422]
------
Step:7, Action:West
State  288
Old Q Values:  [ 2728.56559904 -8656.02923281 -6173.56321028   655.44991422]
New Q values:  [ 2728.56559904 -8656.02923281 -6173.56321028   268.39692246]
Reward: -1  Episode Reward:  3
xxxxx
x...x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  22.72318925    0.         -516.58856435    0.        ]
------
Step:8, Action:North
State  272
Old Q Values:  [-4292.35635413  -180.6         3095.53916443     8.424     ]
New Q values:  [ -62.34509316 -180.6        3095.53916443    8.424     ]
Reward: 9  Episode Reward:  12
xxxxx
x...x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.56078987e+00  5.49732483e+03  0.00000000e+00]
------
Step:9, Action:East
State  195
Old Q Values:  [ 51.54351872   0.         344.48474906   0.        ]
New Q values:  [  51.54351872    0.         5607.67322519    0.        ]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.93019183e+03  1.82349311e+04 -1.80600000e+02  3.07790274e+00]
------
Step:10, Action:North
State  210
Old Q Values:  [ 1.93019183e+03  1.82349311e+04 -1.80600000e+02  3.07790274e+00]
New Q values:  [ 3.44405067e+03  1.82349311e+04 -1.80600000e+02  3.07790274e+00]
Reward: 9  Episode Reward:  20
xxxxx
x..ax
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-180.6        8888.57978276 -180.00807518  140.8743018 ]
------
Step:11, Action:South
State  130
Old Q Values:  [-180.6        8888.57978276 -180.00807518  140.8743018 ]
New Q values:  [ -180.6        -1788.67892329  -180.00807518   140.8743018 ]
Reward: -10001  Episode Reward:  -9981
xxxxx
x.. x
x. gx
x.  x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2188.29721203  2074.14931357 -2651.70614553 -3344.18956062]
------
Step:1, Action:North
State  216
Old Q Values:  [ 4.62003342e+02  6.47107214e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 2.40383046e+02  6.47107214e+02 -6.17035694e+03  6.08663514e+00]
Reward: 9  Episode Reward:  9
xxxxx
xg ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          167.27236519  -179.38454759  -131.19672001]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6         535.97967547 -180.6           6.01270445]
New Q values:  [-180.6         407.92403453 -180.6           6.01270445]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.40383046e+02  6.47107214e+02 -6.17035694e+03  6.08663514e+00]
------
Step:3, Action:South
State  210
Old Q Values:  [ 3.44405067e+03  1.82349311e+04 -1.80600000e+02  3.07790274e+00]
New Q values:  [ 3.44405067e+03  8.11794211e+03 -1.80600000e+02  3.07790274e+00]
Reward: 9  Episode Reward:  17
xxxxx
x.  x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2728.56559904 -8656.02923281 -6173.56321028   268.39692246]
------
Step:4, Action:North
State  288
Old Q Values:  [ 2728.56559904 -8656.02923281 -6173.56321028   268.39692246]
New Q values:  [ 3526.20887376 -8656.02923281 -6173.56321028   268.39692246]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x..ax
x.g x
xxxxx
Step:5, Action:North
State  210
Old Q Values:  [ 3.44405067e+03  8.11794211e+03 -1.80600000e+02  3.07790274e+00]
New Q values:  [ 1.49939748e+03  8.11794211e+03 -1.80600000e+02  3.07790274e+00]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         407.92403453 -180.6           6.01270445]
------
Step:6, Action:South
State  138
Old Q Values:  [-180.6         407.92403453 -180.6           6.01270445]
New Q values:  [-180.6        2597.95224795 -180.6           6.01270445]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.49939748e+03  8.11794211e+03 -1.80600000e+02  3.07790274e+00]
------
Step:7, Action:South
State  210
Old Q Values:  [ 1.49939748e+03  8.11794211e+03 -1.80600000e+02  3.07790274e+00]
New Q values:  [ 1499.39747686 -1695.56049235  -180.6            3.07790274]
Reward: -10001  Episode Reward:  -9987
xxxxx
x.  x
x.. x
x..gx
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    7.98428905 -445.90112872 -180.6       ]
------
Step:1, Action:South
State  183
Old Q Values:  [-0.1803238   3.17003324 25.57831783  0.        ]
New Q values:  [-0.1803238   7.72582813 25.57831783  0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    3.52604946  -289.59534477 -1299.12168416  -180.6       ]
------
Step:2, Action:North
State  261
Old Q Values:  [    3.52604946  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [    3.2057065   -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    7.98428905 -445.90112872 -180.6       ]
------
Step:3, Action:South
State  181
Old Q Values:  [  -0.94832081    7.98428905 -445.90112872 -180.6       ]
New Q values:  [  -0.94832081    3.55542757 -445.90112872 -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    3.2057065   -289.59534477 -1299.12168416  -180.6       ]
------
Step:4, Action:North
State  261
Old Q Values:  [    3.2057065   -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [    1.74891087  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    3.55542757 -445.90112872 -180.6       ]
------
Step:5, Action:South
State  183
Old Q Values:  [-0.1803238   7.72582813 25.57831783  0.        ]
New Q values:  [-0.1803238   3.01500451 25.57831783  0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    1.74891087  -289.59534477 -1299.12168416  -180.6       ]
------
Step:6, Action:North
State  261
Old Q Values:  [    1.74891087  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [ 1.16619262e+00 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  4
xxxxx
x...x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    3.55542757 -445.90112872 -180.6       ]
------
Step:7, Action:South
State  181
Old Q Values:  [  -0.94832081    3.55542757 -445.90112872 -180.6       ]
New Q values:  [  -0.94832081    1.17202881 -445.90112872 -180.6       ]
Reward: -1  Episode Reward:  3
xxxxx
x..gx
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1.16619262e+00 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
------
Step:8, Action:North
State  261
Old Q Values:  [ 1.16619262e+00 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
New Q values:  [ 2.18085691e-01 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  2
xxxxx
x.g.x
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081    1.17202881 -445.90112872 -180.6       ]
------
Step:9, Action:South
State  181
Old Q Values:  [  -0.94832081    1.17202881 -445.90112872 -180.6       ]
New Q values:  [-9.48320810e-01 -6.57627675e-02 -4.45901129e+02 -1.80600000e+02]
Reward: -1  Episode Reward:  1
xxxxx
x..gx
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2.18085691e-01 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
------
Step:10, Action:North
State  261
Old Q Values:  [ 2.18085691e-01 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
New Q values:  [-5.32494554e-01 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  0
xxxxx
x.g.x
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[-9.48320810e-01 -6.57627675e-02 -4.45901129e+02 -1.80600000e+02]
------
Step:11, Action:South
State  181
Old Q Values:  [-9.48320810e-01 -6.57627675e-02 -4.45901129e+02 -1.80600000e+02]
New Q values:  [  -0.94832081   -0.78605347 -445.90112872 -180.6       ]
Reward: -1  Episode Reward:  -1
xxxxx
x..gx
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[-5.32494554e-01 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
------
Step:12, Action:North
State  261
Old Q Values:  [-5.32494554e-01 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
New Q values:  [-1.04881386e+00 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -2
xxxxx
x...x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081   -0.78605347 -445.90112872 -180.6       ]
------
Step:13, Action:South
State  181
Old Q Values:  [  -0.94832081   -0.78605347 -445.90112872 -180.6       ]
New Q values:  [  -0.94832081   -1.22906555 -445.90112872 -180.6       ]
Reward: -1  Episode Reward:  -3
xxxxx
x..gx
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[-1.04881386e+00 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
------
Step:14, Action:North
State  261
Old Q Values:  [-1.04881386e+00 -2.89595345e+02 -1.29912168e+03 -1.80600000e+02]
New Q values:  [   -1.30402179  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  -4
xxxxx
x...x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081   -1.22906555 -445.90112872 -180.6       ]
------
Step:15, Action:North
State  183
Old Q Values:  [-0.1803238   3.01500451 25.57831783  0.        ]
New Q values:  [ 6.1762476   3.01500451 25.57831783  0.        ]
Reward: 9  Episode Reward:  5
xxxxx
xa..x
x . x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6           2.82792373    0.            0.        ]
------
Step:16, Action:South
State  103
Old Q Values:  [-180.6           2.82792373    0.            0.        ]
New Q values:  [-180.6           8.20466484    0.            0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
xa. x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 6.1762476   3.01500451 25.57831783  0.        ]
------
Step:17, Action:East
State  183
Old Q Values:  [ 6.1762476   3.01500451 25.57831783  0.        ]
New Q values:  [   6.1762476     3.01500451 1697.93329469    0.        ]
Reward: 9  Episode Reward:  13
xxxxx
x ..x
x a x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  51.54351872    0.         5607.67322519    0.        ]
------
Step:18, Action:East
State  193
Old Q Values:  [   80.95658411     0.         -5166.58487553     0.        ]
New Q values:  [   80.95658411     0.         -7410.7447866      0.        ]
Reward: -10001  Episode Reward:  -9988
xxxxx
x ..x
x  gx
x ..x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   -1.30402179  -289.59534477 -1299.12168416  -180.6       ]
------
Step:1, Action:North
State  260
Old Q Values:  [-6965.39429933 -6457.4598     -1562.16328996 -6307.02      ]
New Q values:  [-2768.29676552 -6457.4598     -1562.16328996 -6307.02      ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[ 0.          0.         41.53651403  0.        ]
------
Step:2, Action:East
State  182
Old Q Values:  [ 0.          0.         41.53651403  0.        ]
New Q values:  [   0.            0.         1671.21205411    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.56078987e+00  5.49732483e+03  0.00000000e+00]
------
Step:3, Action:East
State  192
Old Q Values:  [-357.86321537   22.23326427 8718.0026799     0.        ]
New Q values:  [-357.86321537   22.23326427 4149.09023557    0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xg ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2188.29721203  2074.14931357 -2651.70614553 -3344.18956062]
------
Step:4, Action:North
State  208
Old Q Values:  [ 2188.29721203  2074.14931357 -2651.70614553 -3344.18956062]
New Q values:  [ 2806.44713658  2074.14931357 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  36
xxxxx
xg.ax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 6419.09417258  5024.03883507 -8652.84           0.        ]
------
Step:5, Action:North
State  128
Old Q Values:  [ 6419.09417258  5024.03883507 -8652.84           0.        ]
New Q values:  [ 4312.7659208   5024.03883507 -8652.84           0.        ]
Reward: -301  Episode Reward:  -265
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 4312.7659208   5024.03883507 -8652.84           0.        ]
------
Step:6, Action:South
State  128
Old Q Values:  [ 4312.7659208   5024.03883507 -8652.84           0.        ]
New Q values:  [ 4312.7659208  2850.949675  -8652.84          0.       ]
Reward: -1  Episode Reward:  -266
xxxxx
x..gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2806.44713658  2074.14931357 -2651.70614553 -3344.18956062]
------
Step:7, Action:South
State  208
Old Q Values:  [ 2806.44713658  2074.14931357 -2651.70614553 -3344.18956062]
New Q values:  [ 2806.44713658  1892.92238755 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  -257
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3526.20887376 -8656.02923281 -6173.56321028   268.39692246]
------
Step:8, Action:North
State  288
Old Q Values:  [ 3526.20887376 -8656.02923281 -6173.56321028   268.39692246]
New Q values:  [ 2251.81769048 -8656.02923281 -6173.56321028   268.39692246]
Reward: -1  Episode Reward:  -258
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2806.44713658  1892.92238755 -2651.70614553 -3344.18956062]
------
Step:9, Action:North
State  208
Old Q Values:  [ 2806.44713658  1892.92238755 -2651.70614553 -3344.18956062]
New Q values:  [ 1164.24114517  1892.92238755 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -259
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ -180.6        -1788.67892329  -180.00807518   140.8743018 ]
------
Step:10, Action:West
State  128
Old Q Values:  [ 4312.7659208  2850.949675  -8652.84          0.       ]
New Q values:  [ 4312.7659208  2850.949675  -8652.84      18007.02     ]
Reward: 9  Episode Reward:  -250
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[    0.             0.         36507.04972881 60005.4       ]
------
Step:11, Action:East
State  114
Old Q Values:  [-180.6           0.          224.52938996    0.        ]
New Q values:  [-180.6           0.          131.47404652    0.        ]
Reward: -1  Episode Reward:  -251
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ -180.6        -1788.67892329  -180.00807518   140.8743018 ]
------
Step:12, Action:West
State  128
Old Q Values:  [ 4312.7659208  2850.949675  -8652.84      18007.02     ]
New Q values:  [ 4312.7659208  2850.949675  -8652.84      25203.828    ]
Reward: -1  Episode Reward:  -252
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[    0.             0.         36507.04972881 60005.4       ]
------
Step:13, Action:East
State  112
Old Q Values:  [    0.             0.         36507.04972881 60005.4       ]
New Q values:  [    0.             0.         22163.36829152 60005.4       ]
Reward: -1  Episode Reward:  -253
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 4312.7659208  2850.949675  -8652.84      25203.828    ]
------
Step:14, Action:North
State  130
Old Q Values:  [ -180.6        -1788.67892329  -180.00807518   140.8743018 ]
New Q values:  [ -210.57770946 -1788.67892329  -180.00807518   140.8743018 ]
Reward: -301  Episode Reward:  -554
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ -210.57770946 -1788.67892329  -180.00807518   140.8743018 ]
------
Step:15, Action:West
State  128
Old Q Values:  [ 4312.7659208  2850.949675  -8652.84      25203.828    ]
New Q values:  [ 4312.7659208  2850.949675  -8652.84      22082.5512   ]
Reward: -10001  Episode Reward:  -10555
xxxxx
x.g x
x   x
x   x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6           79.65792356    10.97401097]
------
Step:1, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.19188466e+01  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.89553213e+02  4.04786473e+00]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2597.95224795 -180.6           6.01270445]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6        2597.95224795 -180.6           6.01270445]
New Q values:  [-180.6        1612.45761545 -180.6           6.01270445]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1164.24114517  1892.92238755 -2651.70614553 -3344.18956062]
------
Step:3, Action:South
State  208
Old Q Values:  [ 1164.24114517  1892.92238755 -2651.70614553 -3344.18956062]
New Q values:  [ 1164.24114517  1432.11426217 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2251.81769048 -8656.02923281 -6173.56321028   268.39692246]
------
Step:4, Action:North
State  288
Old Q Values:  [ 2251.81769048 -8656.02923281 -6173.56321028   268.39692246]
New Q values:  [ 1349.94631925 -8656.02923281 -6173.56321028   268.39692246]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1499.39747686 -1695.56049235  -180.6            3.07790274]
------
Step:5, Action:North
State  210
Old Q Values:  [ 1499.39747686 -1695.56049235  -180.6            3.07790274]
New Q values:  [ 1082.89627538 -1695.56049235  -180.6            3.07790274]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1612.45761545 -180.6           6.01270445]
------
Step:6, Action:South
State  138
Old Q Values:  [-180.6        1612.45761545 -180.6           6.01270445]
New Q values:  [-180.6        1074.01732483 -180.6           6.01270445]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1164.24114517  1432.11426217 -2651.70614553 -3344.18956062]
------
Step:7, Action:South
State  208
Old Q Values:  [ 1164.24114517  1432.11426217 -2651.70614553 -3344.18956062]
New Q values:  [ 1164.24114517   977.22960064 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1349.94631925 -8656.02923281 -6173.56321028   268.39692246]
------
Step:8, Action:West
State  288
Old Q Values:  [ 1349.94631925 -8656.02923281 -6173.56321028   268.39692246]
New Q values:  [ 1349.94631925 -8656.02923281 -6173.56321028   119.57572576]
Reward: 9  Episode Reward:  22
xxxxx
x. gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  22.72318925    0.         -516.58856435    0.        ]
------
Step:9, Action:North
State  273
Old Q Values:  [  22.72318925    0.         -516.58856435    0.        ]
New Q values:  [  38.77625093    0.         -516.58856435    0.        ]
Reward: 9  Episode Reward:  31
xxxxx
x.  x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   80.95658411     0.         -7410.7447866      0.        ]
------
Step:10, Action:North
State  192
Old Q Values:  [-357.86321537   22.23326427 4149.09023557    0.        ]
New Q values:  [  93.12067776   22.23326427 4149.09023557    0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.89553213e+02  4.04786473e+00]
------
Step:11, Action:East
State  114
Old Q Values:  [-180.6           0.          131.47404652    0.        ]
New Q values:  [-180.6           0.           94.25190915    0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ -210.57770946 -1788.67892329  -180.00807518   140.8743018 ]
------
Step:12, Action:West
State  130
Old Q Values:  [ -210.57770946 -1788.67892329  -180.00807518   140.8743018 ]
New Q values:  [ -210.57770946 -1788.67892329  -180.00807518    84.02529346]
Reward: -1  Episode Reward:  28
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.           94.25190915    0.        ]
------
Step:13, Action:East
State  114
Old Q Values:  [-180.6           0.           94.25190915    0.        ]
New Q values:  [-180.6          0.          62.3083517    0.       ]
Reward: -1  Episode Reward:  27
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ -210.57770946 -1788.67892329  -180.00807518    84.02529346]
------
Step:14, Action:West
State  130
Old Q Values:  [ -210.57770946 -1788.67892329  -180.00807518    84.02529346]
New Q values:  [ -210.57770946 -1788.67892329  -180.00807518    51.70262289]
Reward: -1  Episode Reward:  26
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6          0.          62.3083517    0.       ]
------
Step:15, Action:East
State  114
Old Q Values:  [-180.6          0.          62.3083517    0.       ]
New Q values:  [-180.6           0.           39.83412755    0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ -210.57770946 -1788.67892329  -180.00807518    51.70262289]
------
Step:16, Action:West
State  130
Old Q Values:  [ -210.57770946 -1788.67892329  -180.00807518    51.70262289]
New Q values:  [ -210.57770946 -1788.67892329  -180.00807518    32.03128742]
Reward: -1  Episode Reward:  24
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.           39.83412755    0.        ]
------
Step:17, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.89553213e+02  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  6.37426483e+02  4.04786473e+00]
Reward: -1  Episode Reward:  23
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1074.01732483 -180.6           6.01270445]
------
Step:18, Action:South
State  138
Old Q Values:  [-180.6        1074.01732483 -180.6           6.01270445]
New Q values:  [-180.6         778.27927348 -180.6           6.01270445]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1164.24114517   977.22960064 -2651.70614553 -3344.18956062]
------
Step:19, Action:North
State  208
Old Q Values:  [ 1164.24114517   977.22960064 -2651.70614553 -3344.18956062]
New Q values:  [  698.58024011   977.22960064 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  21
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         778.27927348 -180.6           6.01270445]
------
Step:20, Action:West
State  138
Old Q Values:  [-180.6         778.27927348 -180.6           6.01270445]
New Q values:  [-180.6         778.27927348 -180.6         193.03302658]
Reward: -1  Episode Reward:  20
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  6.37426483e+02  4.04786473e+00]
------
Step:21, Action:East
State  114
Old Q Values:  [-180.6           0.           39.83412755    0.        ]
New Q values:  [-180.6           0.           24.94303725    0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ -210.57770946 -1788.67892329  -180.00807518    32.03128742]
------
Step:22, Action:West
State  136
Old Q Values:  [-6180.6          167.27236519  -179.38454759  -131.19672001]
New Q values:  [-6180.6          167.27236519  -179.38454759   -17.2511795 ]
Reward: -1  Episode Reward:  18
xxxxx
xga x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         119.42502833  -2419.37916968]
------
Step:23, Action:East
State  114
Old Q Values:  [-180.6           0.           24.94303725    0.        ]
New Q values:  [-180.6           0.           18.98660112    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ -210.57770946 -1788.67892329  -180.00807518    32.03128742]
------
Step:24, Action:West
State  138
Old Q Values:  [-180.6         778.27927348 -180.6         193.03302658]
New Q values:  [-180.6         778.27927348 -180.6         267.84115543]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  6.37426483e+02  4.04786473e+00]
------
Step:25, Action:East
State  123
Old Q Values:  [ -284.31459256 -6000.6           79.65792356    10.97401097]
New Q values:  [ -284.31459256 -6000.6          264.74695147    10.97401097]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         778.27927348 -180.6         267.84115543]
------
Step:26, Action:West
State  138
Old Q Values:  [-180.6         778.27927348 -180.6         267.84115543]
New Q values:  [-180.6         778.27927348 -180.6         185.96054761]
Reward: -1  Episode Reward:  14
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6          264.74695147    10.97401097]
------
Step:27, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  6.37426483e+02  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.87854375e+02  4.04786473e+00]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         778.27927348 -180.6         185.96054761]
------
Step:28, Action:South
State  138
Old Q Values:  [-180.6         778.27927348 -180.6         185.96054761]
New Q values:  [-180.6         635.58059201 -180.6         185.96054761]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1082.89627538 -1695.56049235  -180.6            3.07790274]
------
Step:29, Action:North
State  208
Old Q Values:  [  698.58024011   977.22960064 -2651.70614553 -3344.18956062]
New Q values:  [  469.50627365   977.22960064 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         635.58059201 -180.6         185.96054761]
------
Step:30, Action:West
State  138
Old Q Values:  [-180.6         635.58059201 -180.6         185.96054761]
New Q values:  [-180.6         635.58059201 -180.6         153.20830449]
Reward: -1  Episode Reward:  10
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6          264.74695147    10.97401097]
------
Step:31, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.87854375e+02  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.85215928e+02  4.04786473e+00]
Reward: -1  Episode Reward:  9
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         635.58059201 -180.6         153.20830449]
------
Step:32, Action:South
State  130
Old Q Values:  [ -210.57770946 -1788.67892329  -180.00807518    32.03128742]
New Q values:  [-210.57770946 -391.2026867  -180.00807518   32.03128742]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1082.89627538 -1695.56049235  -180.6            3.07790274]
------
Step:33, Action:North
State  208
Old Q Values:  [  469.50627365   977.22960064 -2651.70614553 -3344.18956062]
New Q values:  [  196.81189569   977.22960064 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-210.57770946 -391.2026867  -180.00807518   32.03128742]
------
Step:34, Action:West
State  130
Old Q Values:  [-210.57770946 -391.2026867  -180.00807518   32.03128742]
New Q values:  [-210.57770946 -391.2026867  -180.00807518   17.90849531]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.           18.98660112    0.        ]
------
Step:35, Action:East
State  114
Old Q Values:  [-180.6           0.           18.98660112    0.        ]
New Q values:  [-180.6           0.           12.36718904    0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-210.57770946 -391.2026867  -180.00807518   17.90849531]
------
Step:36, Action:West
State  138
Old Q Values:  [-180.6         635.58059201 -180.6         153.20830449]
New Q values:  [-180.6         635.58059201 -180.6         176.24810009]
Reward: -1  Episode Reward:  4
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.85215928e+02  4.04786473e+00]
------
Step:37, Action:East
State  114
Old Q Values:  [-180.6           0.           12.36718904    0.        ]
New Q values:  [-180.6           0.            9.71942421    0.        ]
Reward: -1  Episode Reward:  3
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-210.57770946 -391.2026867  -180.00807518   17.90849531]
------
Step:38, Action:West
State  136
Old Q Values:  [-6180.6          167.27236519  -179.38454759   -17.2511795 ]
New Q values:  [-6180.6          167.27236519  -179.38454759    28.3270367 ]
Reward: -1  Episode Reward:  2
xxxxx
xga x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         119.42502833  -2419.37916968]
------
Step:39, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686         119.42502833  -2419.37916968]
New Q values:  [-10156.11771313  -5995.686          97.35172089  -2419.37916968]
Reward: -1  Episode Reward:  1
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          167.27236519  -179.38454759    28.3270367 ]
------
Step:40, Action:South
State  136
Old Q Values:  [-6180.6          167.27236519  -179.38454759    28.3270367 ]
New Q values:  [-6180.6          359.47782627  -179.38454759    28.3270367 ]
Reward: -1  Episode Reward:  0
xxxxx
x. gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  196.81189569   977.22960064 -2651.70614553 -3344.18956062]
------
Step:41, Action:South
State  208
Old Q Values:  [  196.81189569   977.22960064 -2651.70614553 -3344.18956062]
New Q values:  [  196.81189569   795.27573603 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -1
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1349.94631925 -8656.02923281 -6173.56321028   119.57572576]
------
Step:42, Action:North
State  288
Old Q Values:  [ 1349.94631925 -8656.02923281 -6173.56321028   119.57572576]
New Q values:  [  777.96124851 -8656.02923281 -6173.56321028   119.57572576]
Reward: -1  Episode Reward:  -2
xxxxx
x. gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  196.81189569   795.27573603 -2651.70614553 -3344.18956062]
------
Step:43, Action:South
State  208
Old Q Values:  [  196.81189569   795.27573603 -2651.70614553 -3344.18956062]
New Q values:  [  196.81189569   550.89866897 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -3
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  777.96124851 -8656.02923281 -6173.56321028   119.57572576]
------
Step:44, Action:North
State  288
Old Q Values:  [  777.96124851 -8656.02923281 -6173.56321028   119.57572576]
New Q values:  [  475.85410009 -8656.02923281 -6173.56321028   119.57572576]
Reward: -1  Episode Reward:  -4
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  196.81189569   550.89866897 -2651.70614553 -3344.18956062]
------
Step:45, Action:South
State  208
Old Q Values:  [  196.81189569   550.89866897 -2651.70614553 -3344.18956062]
New Q values:  [  196.81189569   362.51569761 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -5
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  475.85410009 -8656.02923281 -6173.56321028   119.57572576]
------
Step:46, Action:North
State  288
Old Q Values:  [  475.85410009 -8656.02923281 -6173.56321028   119.57572576]
New Q values:  [  298.49634932 -8656.02923281 -6173.56321028   119.57572576]
Reward: -1  Episode Reward:  -6
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  196.81189569   362.51569761 -2651.70614553 -3344.18956062]
------
Step:47, Action:South
State  208
Old Q Values:  [  196.81189569   362.51569761 -2651.70614553 -3344.18956062]
New Q values:  [  196.81189569   233.95518384 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -7
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  298.49634932 -8656.02923281 -6173.56321028   119.57572576]
------
Step:48, Action:North
State  288
Old Q Values:  [  298.49634932 -8656.02923281 -6173.56321028   119.57572576]
New Q values:  [  188.98509488 -8656.02923281 -6173.56321028   119.57572576]
Reward: -1  Episode Reward:  -8
xxxxx
x. gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  196.81189569   233.95518384 -2651.70614553 -3344.18956062]
------
Step:49, Action:South
State  208
Old Q Values:  [  196.81189569   233.95518384 -2651.70614553 -3344.18956062]
New Q values:  [  196.81189569   149.677602   -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -9
xxxxx
x.  x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  188.98509488 -8656.02923281 -6173.56321028   119.57572576]
------
Step:50, Action:West
State  288
Old Q Values:  [  188.98509488 -8656.02923281 -6173.56321028   119.57572576]
New Q values:  [  188.98509488 -8656.02923281 -6173.56321028    58.86316558]
Reward: -1  Episode Reward:  -10
xxxxx
x. gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  38.77625093    0.         -516.58856435    0.        ]
------
Step:51, Action:North
State  272
Old Q Values:  [ -62.34509316 -180.6        3095.53916443    8.424     ]
New Q values:  [1219.18903341 -180.6        3095.53916443    8.424     ]
Reward: -1  Episode Reward:  -11
xxxxx
x.g x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[  93.12067776   22.23326427 4149.09023557    0.        ]
------
Step:52, Action:East
State  192
Old Q Values:  [  93.12067776   22.23326427 4149.09023557    0.        ]
New Q values:  [  93.12067776   22.23326427 1718.07966293    0.        ]
Reward: -1  Episode Reward:  -12
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  196.81189569   149.677602   -2651.70614553 -3344.18956062]
------
Step:53, Action:North
State  208
Old Q Values:  [  196.81189569   149.677602   -2651.70614553 -3344.18956062]
New Q values:  [  185.96810615   149.677602   -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -13
xxxxx
xg ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          359.47782627  -179.38454759    28.3270367 ]
------
Step:54, Action:South
State  130
Old Q Values:  [-210.57770946 -391.2026867  -180.00807518   17.90849531]
New Q values:  [-210.57770946 -101.29064283 -180.00807518   17.90849531]
Reward: -1  Episode Reward:  -14
xxxxx
x.  x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  185.96810615   149.677602   -2651.70614553 -3344.18956062]
------
Step:55, Action:North
State  208
Old Q Values:  [  185.96810615   149.677602   -2651.70614553 -3344.18956062]
New Q values:  [  181.63059034   149.677602   -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -15
xxxxx
xg ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          359.47782627  -179.38454759    28.3270367 ]
------
Step:56, Action:South
State  130
Old Q Values:  [-210.57770946 -101.29064283 -180.00807518   17.90849531]
New Q values:  [-210.57770946   13.37291997 -180.00807518   17.90849531]
Reward: -1  Episode Reward:  -16
xxxxx
x.  x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  181.63059034   149.677602   -2651.70614553 -3344.18956062]
------
Step:57, Action:North
State  210
Old Q Values:  [ 1082.89627538 -1695.56049235  -180.6            3.07790274]
New Q values:  [  437.93105874 -1695.56049235  -180.6            3.07790274]
Reward: -1  Episode Reward:  -17
xxxxx
x. ax
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-210.57770946   13.37291997 -180.00807518   17.90849531]
------
Step:58, Action:West
State  130
Old Q Values:  [-210.57770946   13.37291997 -180.00807518   17.90849531]
New Q values:  [-210.57770946   13.37291997 -180.00807518    9.47922539]
Reward: -1  Episode Reward:  -18
xxxxx
x.a x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.            9.71942421    0.        ]
------
Step:59, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.85215928e+02  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.44160549e+02  4.04786473e+00]
Reward: -1  Episode Reward:  -19
xxxxx
x. ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         635.58059201 -180.6         176.24810009]
------
Step:60, Action:South
State  138
Old Q Values:  [-180.6         635.58059201 -180.6         176.24810009]
New Q values:  [-180.6         385.01155443 -180.6         176.24810009]
Reward: -1  Episode Reward:  -20
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  437.93105874 -1695.56049235  -180.6            3.07790274]
------
Step:61, Action:North
State  210
Old Q Values:  [  437.93105874 -1695.56049235  -180.6            3.07790274]
New Q values:  [  178.58429949 -1695.56049235  -180.6            3.07790274]
Reward: -1  Episode Reward:  -21
xxxxx
x. ax
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-210.57770946   13.37291997 -180.00807518    9.47922539]
------
Step:62, Action:South
State  130
Old Q Values:  [-210.57770946   13.37291997 -180.00807518    9.47922539]
New Q values:  [-210.57770946   59.23834509 -180.00807518    9.47922539]
Reward: -1  Episode Reward:  -22
xxxxx
x.  x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  181.63059034   149.677602   -2651.70614553 -3344.18956062]
------
Step:63, Action:North
State  208
Old Q Values:  [  181.63059034   149.677602   -2651.70614553 -3344.18956062]
New Q values:  [  179.89558402   149.677602   -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -23
xxxxx
xg ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          359.47782627  -179.38454759    28.3270367 ]
------
Step:64, Action:South
State  136
Old Q Values:  [-6180.6          359.47782627  -179.38454759    28.3270367 ]
New Q values:  [-6180.6          197.15980571  -179.38454759    28.3270367 ]
Reward: -1  Episode Reward:  -24
xxxxx
x.g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  179.89558402   149.677602   -2651.70614553 -3344.18956062]
------
Step:65, Action:North
State  208
Old Q Values:  [  179.89558402   149.677602   -2651.70614553 -3344.18956062]
New Q values:  [-5869.49382468   149.677602   -2651.70614553 -3344.18956062]
Reward: -10001  Episode Reward:  -10025
xxxxx
x. gx
x.  x
x.  x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  178.58429949 -1695.56049235  -180.6            3.07790274]
------
Step:1, Action:North
State  210
Old Q Values:  [  178.58429949 -1695.56049235  -180.6            3.07790274]
New Q values:  [  192.33718612 -1695.56049235  -180.6            3.07790274]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         385.01155443 -180.6         176.24810009]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6         385.01155443 -180.6         176.24810009]
New Q values:  [-180.6         198.30790237 -180.6         176.24810009]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
xg.ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5869.49382468   149.677602   -2651.70614553 -3344.18956062]
------
Step:3, Action:South
State  208
Old Q Values:  [-5869.49382468   149.677602   -2651.70614553 -3344.18956062]
New Q values:  [-5869.49382468   121.96656926 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  17
xxxxx
xg. x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  188.98509488 -8656.02923281 -6173.56321028    58.86316558]
------
Step:4, Action:North
State  288
Old Q Values:  [  188.98509488 -8656.02923281 -6173.56321028    58.86316558]
New Q values:  [  111.58400873 -8656.02923281 -6173.56321028    58.86316558]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5869.49382468   121.96656926 -2651.70614553 -3344.18956062]
------
Step:5, Action:South
State  208
Old Q Values:  [-5869.49382468   121.96656926 -2651.70614553 -3344.18956062]
New Q values:  [-5869.49382468    81.66183033 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  111.58400873 -8656.02923281 -6173.56321028    58.86316558]
------
Step:6, Action:North
State  288
Old Q Values:  [  111.58400873 -8656.02923281 -6173.56321028    58.86316558]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028    58.86316558]
Reward: -10001  Episode Reward:  -9986
xxxxx
x.. x
x..gx
x.  x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            8.44656445 -2781.31337986  -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [ -180.6            8.44656445 -2781.31337986  -180.6       ]
New Q values:  [ -180.6            8.49412954 -2781.31337986  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x  gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  -0.94832081   -1.22906555 -445.90112872 -180.6       ]
------
Step:2, Action:North
State  181
Old Q Values:  [  -0.94832081   -1.22906555 -445.90112872 -180.6       ]
New Q values:  [   1.56891054   -1.22906555 -445.90112872 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
xag.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            8.49412954 -2781.31337986  -180.6       ]
------
Step:3, Action:South
State  111
Old Q Values:  [ 0.          8.91367172 48.93406917  0.        ]
New Q values:  [ 0.          3.43614185 48.93406917  0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x  .x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   1.56891054   -1.22906555 -445.90112872 -180.6       ]
------
Step:4, Action:North
State  181
Old Q Values:  [   1.56891054   -1.22906555 -445.90112872 -180.6       ]
New Q values:  [  14.70778497   -1.22906555 -445.90112872 -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
xa .x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[ 0.          3.43614185 48.93406917  0.        ]
------
Step:5, Action:East
State  111
Old Q Values:  [ 0.          3.43614185 48.93406917  0.        ]
New Q values:  [ 0.          3.43614185 98.39771311  0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x a.x
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6          264.74695147    10.97401097]
------
Step:6, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.44160549e+02  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  2.02556590e+02  4.04786473e+00]
Reward: 9  Episode Reward:  14
xxxxx
x  ax
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         198.30790237 -180.6         176.24810009]
------
Step:7, Action:South
State  138
Old Q Values:  [-180.6         198.30790237 -180.6         176.24810009]
New Q values:  [-180.6         278.85532529 -180.6         176.24810009]
Reward: 9  Episode Reward:  23
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.40383046e+02  6.47107214e+02 -6.17035694e+03  6.08663514e+00]
------
Step:8, Action:South
State  216
Old Q Values:  [ 2.40383046e+02  6.47107214e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 2.40383046e+02  2.81901835e+02 -6.17035694e+03  6.08663514e+00]
Reward: 9  Episode Reward:  32
xxxxx
x   x
x .gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028    58.86316558]
------
Step:9, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028    58.86316558]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028   957.60701556]
Reward: 9  Episode Reward:  41
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[1219.18903341 -180.6        3095.53916443    8.424     ]
------
Step:10, Action:East
State  272
Old Q Values:  [1219.18903341 -180.6        3095.53916443    8.424     ]
New Q values:  [1219.18903341 -180.6        1524.89777044    8.424     ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028   957.60701556]
------
Step:11, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028   957.60701556]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028   839.91213736]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[1219.18903341 -180.6        1524.89777044    8.424     ]
------
Step:12, Action:East
State  272
Old Q Values:  [1219.18903341 -180.6        1524.89777044    8.424     ]
New Q values:  [1219.18903341 -180.6         861.33274938    8.424     ]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028   839.91213736]
------
Step:13, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028   839.91213736]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028   701.12156497]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[1219.18903341 -180.6         861.33274938    8.424     ]
------
Step:14, Action:North
State  272
Old Q Values:  [1219.18903341 -180.6         861.33274938    8.424     ]
New Q values:  [ 493.81880374 -180.6         861.33274938    8.424     ]
Reward: 9  Episode Reward:  46
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 2.47730124  0.          2.46857876 -0.84      ]
------
Step:15, Action:East
State  200
Old Q Values:  [ 2.47730124  0.          2.46857876 -0.84      ]
New Q values:  [ 2.47730124  0.         84.95798214 -0.84      ]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.40383046e+02  2.81901835e+02 -6.17035694e+03  6.08663514e+00]
------
Step:16, Action:South
State  216
Old Q Values:  [ 2.40383046e+02  2.81901835e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 2.40383046e+02  3.22497204e+02 -6.17035694e+03  6.08663514e+00]
Reward: -1  Episode Reward:  44
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028   701.12156497]
------
Step:17, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028   701.12156497]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028   538.2484508 ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 493.81880374 -180.6         861.33274938    8.424     ]
------
Step:18, Action:East
State  272
Old Q Values:  [ 493.81880374 -180.6         861.33274938    8.424     ]
New Q values:  [ 493.81880374 -180.6         505.40763499    8.424     ]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028   538.2484508 ]
------
Step:19, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028   538.2484508 ]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028   226.3322556 ]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  38.77625093    0.         -516.58856435    0.        ]
------
Step:20, Action:North
State  273
Old Q Values:  [  38.77625093    0.         -516.58856435    0.        ]
New Q values:  [  22.07683298    0.         -516.58856435    0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[23.88777536  0.          0.          0.024     ]
------
Step:21, Action:North
State  201
Old Q Values:  [23.88777536  0.          0.          0.024     ]
New Q values:  [10.42162111  0.          0.          0.024     ]
Reward: -1  Episode Reward:  39
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.79128054e+03  4.88836988e+00]
------
Step:22, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686          97.35172089  -2419.37916968]
New Q values:  [-10156.11771313  -5995.686          97.35172089   -966.76649461]
Reward: -1  Episode Reward:  38
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6            5.28391089 -3895.20980426     0.        ]
------
Step:23, Action:South
State  105
Old Q Values:  [ -180.6            5.28391089 -3895.20980426     0.        ]
New Q values:  [-1.80600000e+02  1.51356435e+00 -3.89520980e+03  0.00000000e+00]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -2.39790111e+03 -1.78980000e+02]
------
Step:24, Action:South
State  177
Old Q Values:  [    0.        84007.56      60661.2566939     0.       ]
New Q values:  [     0.         111762.76711124  60661.2566939       0.        ]
Reward: 100009  Episode Reward:  100046
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5869.49382468    81.66183033 -2651.70614553 -3344.18956062]
------
Step:1, Action:South
State  208
Old Q Values:  [-5869.49382468    81.66183033 -2651.70614553 -3344.18956062]
New Q values:  [-5869.49382468   105.96440881 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028   226.3322556 ]
------
Step:2, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028   226.3322556 ]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028   247.55519274]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 493.81880374 -180.6         505.40763499    8.424     ]
------
Step:3, Action:East
State  272
Old Q Values:  [ 493.81880374 -180.6         505.40763499    8.424     ]
New Q values:  [ 493.81880374 -180.6         275.82961182    8.424     ]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028   247.55519274]
------
Step:4, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028   247.55519274]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028   105.04512699]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  22.07683298    0.         -516.58856435    0.        ]
------
Step:5, Action:North
State  273
Old Q Values:  [  22.07683298    0.         -516.58856435    0.        ]
New Q values:  [  38.51770843    0.         -516.58856435    0.        ]
Reward: 9  Episode Reward:  25
xxxxx
x. gx
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   80.95658411     0.         -7410.7447866      0.        ]
------
Step:6, Action:North
State  193
Old Q Values:  [   80.95658411     0.         -7410.7447866      0.        ]
New Q values:  [  111.20671908     0.         -7410.7447866      0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x.a.x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6          264.74695147    10.97401097]
------
Step:7, Action:East
State  123
Old Q Values:  [ -284.31459256 -6000.6          264.74695147    10.97401097]
New Q values:  [ -284.31459256 -6000.6          194.95537817    10.97401097]
Reward: 9  Episode Reward:  33
xxxxx
x. ax
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         278.85532529 -180.6         176.24810009]
------
Step:8, Action:South
State  138
Old Q Values:  [-180.6         278.85532529 -180.6         176.24810009]
New Q values:  [ -180.6        -5857.26854724  -180.6          176.24810009]
Reward: -10001  Episode Reward:  -9968
xxxxx
x.  x
x. gx
x.  x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[   6.1762476     3.01500451 1697.93329469    0.        ]
------
Step:1, Action:East
State  181
Old Q Values:  [  14.70778497   -1.22906555 -445.90112872 -180.6       ]
New Q values:  [  14.70778497   -1.22906555 -139.59843576 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  111.20671908     0.         -7410.7447866      0.        ]
------
Step:2, Action:North
State  196
Old Q Values:  [-6340.18357123   650.23146405     0.             0.        ]
New Q values:  [-2469.90645144   650.23146405     0.             0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x.a.x
x g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  2.02556590e+02  4.04786473e+00]
------
Step:3, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686          97.35172089   -966.76649461]
New Q values:  [-10156.11771313  -5995.686         103.48863007   -966.76649461]
Reward: 9  Episode Reward:  27
xxxxx
x.gax
x  .x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          197.15980571  -179.38454759    28.3270367 ]
------
Step:4, Action:South
State  136
Old Q Values:  [-6180.6          197.15980571  -179.38454759    28.3270367 ]
New Q values:  [-6180.6          181.01308339  -179.38454759    28.3270367 ]
Reward: 9  Episode Reward:  36
xxxxx
x. gx
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.40383046e+02  3.22497204e+02 -6.17035694e+03  6.08663514e+00]
------
Step:5, Action:South
State  216
Old Q Values:  [ 2.40383046e+02  3.22497204e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 2.40383046e+02  1.59912420e+02 -6.17035694e+03  6.08663514e+00]
Reward: -1  Episode Reward:  35
xxxxx
x.g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028   105.04512699]
------
Step:6, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028   105.04512699]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028   195.56369192]
Reward: 9  Episode Reward:  44
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 493.81880374 -180.6         275.82961182    8.424     ]
------
Step:7, Action:North
State  272
Old Q Values:  [ 493.81880374 -180.6         275.82961182    8.424     ]
New Q values:  [ 712.35142037 -180.6         275.82961182    8.424     ]
Reward: -1  Episode Reward:  43
xxxxx
x.g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[  93.12067776   22.23326427 1718.07966293    0.        ]
------
Step:8, Action:East
State  192
Old Q Values:  [  93.12067776   22.23326427 1718.07966293    0.        ]
New Q values:  [ 93.12067776  22.23326427 718.42118782   0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5869.49382468   105.96440881 -2651.70614553 -3344.18956062]
------
Step:9, Action:South
State  208
Old Q Values:  [-5869.49382468   105.96440881 -2651.70614553 -3344.18956062]
New Q values:  [-5869.49382468   100.4548711  -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  41
xxxxx
x.  x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028   195.56369192]
------
Step:10, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028   195.56369192]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028    89.18078929]
Reward: -1  Episode Reward:  40
xxxxx
x.  x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  38.51770843    0.         -516.58856435    0.        ]
------
Step:11, Action:North
State  272
Old Q Values:  [ 712.35142037 -180.6         275.82961182    8.424     ]
New Q values:  [1933.53801665 -180.6         275.82961182    8.424     ]
Reward: -1  Episode Reward:  39
xxxxx
x.  x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.56078987e+00  5.49732483e+03  0.00000000e+00]
------
Step:12, Action:East
State  195
Old Q Values:  [  51.54351872    0.         5607.67322519    0.        ]
New Q values:  [  51.54351872    0.         2300.17044591    0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x.  x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  192.33718612 -1695.56049235  -180.6            3.07790274]
------
Step:13, Action:North
State  210
Old Q Values:  [  192.33718612 -1695.56049235  -180.6            3.07790274]
New Q values:  [   94.10637798 -1695.56049235  -180.6            3.07790274]
Reward: -1  Episode Reward:  37
xxxxx
x. ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-210.57770946   59.23834509 -180.00807518    9.47922539]
------
Step:14, Action:South
State  130
Old Q Values:  [-210.57770946   59.23834509 -180.00807518    9.47922539]
New Q values:  [-210.57770946   51.32725143 -180.00807518    9.47922539]
Reward: -1  Episode Reward:  36
xxxxx
x.  x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   94.10637798 -1695.56049235  -180.6            3.07790274]
------
Step:15, Action:North
State  208
Old Q Values:  [-5869.49382468   100.4548711  -2651.70614553 -3344.18956062]
New Q values:  [-2332.99935444   100.4548711  -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  35
xxxxx
x. ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-210.57770946   51.32725143 -180.00807518    9.47922539]
------
Step:16, Action:West
State  128
Old Q Values:  [ 4312.7659208  2850.949675  -8652.84      22082.5512   ]
New Q values:  [ 4312.7659208   2850.949675   -8652.84        8911.82194953]
Reward: -1  Episode Reward:  34
xxxxx
x.agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:17, Action:South
State  115
Old Q Values:  [-180.6          24.52991094  113.12519404    0.        ]
New Q values:  [-180.6          42.5739801   113.12519404    0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  111.20671908     0.         -7410.7447866      0.        ]
------
Step:18, Action:North
State  193
Old Q Values:  [  111.20671908     0.         -7410.7447866      0.        ]
New Q values:  [  123.28415716     0.         -7410.7447866      0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x.agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:19, Action:South
State  115
Old Q Values:  [-180.6          42.5739801   113.12519404    0.        ]
New Q values:  [-180.6          53.41483919  113.12519404    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  123.28415716     0.         -7410.7447866      0.        ]
------
Step:20, Action:North
State  192
Old Q Values:  [ 93.12067776  22.23326427 718.42118782   0.        ]
New Q values:  [ 39.56409837  22.23326427 718.42118782   0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x.a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.            9.71942421    0.        ]
------
Step:21, Action:East
State  112
Old Q Values:  [    0.             0.         22163.36829152 60005.4       ]
New Q values:  [    0.             0.         11538.29390147 60005.4       ]
Reward: -1  Episode Reward:  29
xxxxx
x.gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 4312.7659208   2850.949675   -8652.84        8911.82194953]
------
Step:22, Action:North
State  130
Old Q Values:  [-210.57770946   51.32725143 -180.00807518    9.47922539]
New Q values:  [-249.43290836   51.32725143 -180.00807518    9.47922539]
Reward: -301  Episode Reward:  -272
xxxxx
x. ax
x g x
x.  x
xxxxx
Step:23, Action:West
State  128
Old Q Values:  [ 4312.7659208   2850.949675   -8652.84        8911.82194953]
New Q values:  [ 4312.7659208   2850.949675   -8652.84       15565.74877981]
Reward: -10001  Episode Reward:  -10273
xxxxx
x.g x
x   x
x.  x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6          194.95537817    10.97401097]
------
Step:1, Action:East
State  123
Old Q Values:  [ -284.31459256 -6000.6          194.95537817    10.97401097]
New Q values:  [ -284.31459256 -6000.6          136.2565813     10.97401097]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5857.26854724  -180.6          176.24810009]
------
Step:2, Action:West
State  138
Old Q Values:  [ -180.6        -5857.26854724  -180.6          176.24810009]
New Q values:  [ -180.6        -5857.26854724  -180.6          130.66621709]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  2.02556590e+02  4.04786473e+00]
------
Step:3, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  2.02556590e+02  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.19622501e+02  4.04786473e+00]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5857.26854724  -180.6          130.66621709]
------
Step:4, Action:West
State  138
Old Q Values:  [ -180.6        -5857.26854724  -180.6          130.66621709]
New Q values:  [ -180.6        -5857.26854724  -180.6           87.55323719]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.19622501e+02  4.04786473e+00]
------
Step:5, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.19622501e+02  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.35149716e+01  4.04786473e+00]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5857.26854724  -180.6           87.55323719]
------
Step:6, Action:West
State  138
Old Q Values:  [ -180.6        -5857.26854724  -180.6           87.55323719]
New Q values:  [ -180.6        -5857.26854724  -180.6           56.47578637]
Reward: -1  Episode Reward:  4
xxxxx
x.a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.35149716e+01  4.04786473e+00]
------
Step:7, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.35149716e+01  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.57487246e+01  4.04786473e+00]
Reward: -1  Episode Reward:  3
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -5857.26854724  -180.6           56.47578637]
------
Step:8, Action:West
State  138
Old Q Values:  [ -180.6        -5857.26854724  -180.6           56.47578637]
New Q values:  [ -180.6        -5857.26854724  -180.6           35.71493192]
Reward: -1  Episode Reward:  2
xxxxx
x.a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  4.57487246e+01  4.04786473e+00]
------
Step:9, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686         103.48863007   -966.76649461]
New Q values:  [-10156.11771313  -5995.686          95.09937704   -966.76649461]
Reward: -1  Episode Reward:  1
xxxxx
xg ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          181.01308339  -179.38454759    28.3270367 ]
------
Step:10, Action:South
State  136
Old Q Values:  [-6180.6          181.01308339  -179.38454759    28.3270367 ]
New Q values:  [-6180.6          101.94169468  -179.38454759    28.3270367 ]
Reward: -1  Episode Reward:  0
xxxxx
x.g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-2332.99935444   100.4548711  -2651.70614553 -3344.18956062]
------
Step:11, Action:South
State  208
Old Q Values:  [-2332.99935444   100.4548711  -2651.70614553 -3344.18956062]
New Q values:  [-2332.99935444    72.33618523 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  9
xxxxx
xg  x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028    89.18078929]
------
Step:12, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028    89.18078929]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028   621.13372071]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[1933.53801665 -180.6         275.82961182    8.424     ]
------
Step:13, Action:North
State  272
Old Q Values:  [1933.53801665 -180.6         275.82961182    8.424     ]
New Q values:  [-5005.658437    -180.6          275.82961182     8.424     ]
Reward: -9991  Episode Reward:  -9973
xxxxx
x.  x
x.g x
x.  x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028   621.13372071]
------
Step:1, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028   621.13372071]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028   265.40880081]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  38.51770843    0.         -516.58856435    0.        ]
------
Step:2, Action:North
State  272
Old Q Values:  [-5005.658437    -180.6          275.82961182     8.424     ]
New Q values:  [-7781.33701845  -180.6          275.82961182     8.424     ]
Reward: -9991  Episode Reward:  -9982
xxxxx
x.. x
x.g.x
x.  x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   -1.30402179  -289.59534477 -1299.12168416  -180.6       ]
------
Step:1, Action:North
State  261
Old Q Values:  [   -1.30402179  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [  514.25837969  -289.59534477 -1299.12168416  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa. x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[   6.1762476     3.01500451 1697.93329469    0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [  14.70778497   -1.22906555 -139.59843576 -180.6       ]
New Q values:  [  14.70778497   -1.22906555  -13.45412716 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  123.28415716     0.         -7410.7447866      0.        ]
------
Step:3, Action:North
State  192
Old Q Values:  [ 39.56409837  22.23326427 718.42118782   0.        ]
New Q values:  [ 24.14146661  22.23326427 718.42118782   0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x.a.x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.            9.71942421    0.        ]
------
Step:4, Action:East
State  114
Old Q Values:  [-180.6           0.            9.71942421    0.        ]
New Q values:  [-180.6           0.           24.68594511    0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x. ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836   51.32725143 -180.00807518    9.47922539]
------
Step:5, Action:South
State  138
Old Q Values:  [ -180.6        -5857.26854724  -180.6           35.71493192]
New Q values:  [ -180.6        -2271.39250495  -180.6           35.71493192]
Reward: -1  Episode Reward:  35
xxxxx
x.  x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.40383046e+02  1.59912420e+02 -6.17035694e+03  6.08663514e+00]
------
Step:6, Action:North
State  210
Old Q Values:  [   94.10637798 -1695.56049235  -180.6            3.07790274]
New Q values:  [   52.44072662 -1695.56049235  -180.6            3.07790274]
Reward: -1  Episode Reward:  34
xxxxx
x. ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836   51.32725143 -180.00807518    9.47922539]
------
Step:7, Action:South
State  130
Old Q Values:  [-249.43290836   51.32725143 -180.00807518    9.47922539]
New Q values:  [-249.43290836   35.66311856 -180.00807518    9.47922539]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   52.44072662 -1695.56049235  -180.6            3.07790274]
------
Step:8, Action:North
State  210
Old Q Values:  [   52.44072662 -1695.56049235  -180.6            3.07790274]
New Q values:  [   31.07522621 -1695.56049235  -180.6            3.07790274]
Reward: -1  Episode Reward:  32
xxxxx
x. ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836   35.66311856 -180.00807518    9.47922539]
------
Step:9, Action:South
State  138
Old Q Values:  [ -180.6        -2271.39250495  -180.6           35.71493192]
New Q values:  [-180.6        -731.60350289 -180.6          35.71493192]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  0.         591.84499697   0.           0.        ]
------
Step:10, Action:South
State  208
Old Q Values:  [-2332.99935444    72.33618523 -2651.70614553 -3344.18956062]
New Q values:  [-2332.99935444   113.95711433 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  40
xxxxx
x.  x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028   265.40880081]
------
Step:11, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028   265.40880081]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028   194.31240387]
Reward: 9  Episode Reward:  49
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-7781.33701845  -180.6          275.82961182     8.424     ]
------
Step:12, Action:East
State  272
Old Q Values:  [-7781.33701845  -180.6          275.82961182     8.424     ]
New Q values:  [-7781.33701845  -180.6          168.02556589     8.424     ]
Reward: -1  Episode Reward:  48
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028   194.31240387]
------
Step:13, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028   194.31240387]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028   127.53263131]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-7781.33701845  -180.6          168.02556589     8.424     ]
------
Step:14, Action:East
State  272
Old Q Values:  [-7781.33701845  -180.6          168.02556589     8.424     ]
New Q values:  [-7781.33701845  -180.6          104.87001575     8.424     ]
Reward: -1  Episode Reward:  46
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028   127.53263131]
------
Step:15, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028   127.53263131]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028    81.87405725]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-7781.33701845  -180.6          104.87001575     8.424     ]
------
Step:16, Action:East
State  272
Old Q Values:  [-7781.33701845  -180.6          104.87001575     8.424     ]
New Q values:  [-7781.33701845  -180.6           65.91022348     8.424     ]
Reward: -1  Episode Reward:  44
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028    81.87405725]
------
Step:17, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028    81.87405725]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028    51.92268994]
Reward: -1  Episode Reward:  43
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-7781.33701845  -180.6           65.91022348     8.424     ]
------
Step:18, Action:East
State  272
Old Q Values:  [-7781.33701845  -180.6           65.91022348     8.424     ]
New Q values:  [-7781.33701845  -180.6           41.34089637     8.424     ]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028    51.92268994]
------
Step:19, Action:West
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028    51.92268994]
New Q values:  [-5931.46784741 -8656.02923281 -6173.56321028    32.57134489]
Reward: -1  Episode Reward:  41
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-7781.33701845  -180.6           41.34089637     8.424     ]
------
Step:20, Action:East
State  272
Old Q Values:  [-7781.33701845  -180.6           41.34089637     8.424     ]
New Q values:  [-7781.33701845  -180.6           25.70776202     8.424     ]
Reward: -1  Episode Reward:  40
xxxxx
x.  x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5931.46784741 -8656.02923281 -6173.56321028    32.57134489]
------
Step:21, Action:North
State  288
Old Q Values:  [-5931.46784741 -8656.02923281 -6173.56321028    32.57134489]
New Q values:  [-2363.8645711  -8656.02923281 -6173.56321028    32.57134489]
Reward: -1  Episode Reward:  39
xxxxx
x.  x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   31.07522621 -1695.56049235  -180.6            3.07790274]
------
Step:22, Action:North
State  210
Old Q Values:  [   31.07522621 -1695.56049235  -180.6            3.07790274]
New Q values:  [   22.52902605 -1695.56049235  -180.6            3.07790274]
Reward: -1  Episode Reward:  38
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836   35.66311856 -180.00807518    9.47922539]
------
Step:23, Action:South
State  130
Old Q Values:  [-249.43290836   35.66311856 -180.00807518    9.47922539]
New Q values:  [-249.43290836   20.42395524 -180.00807518    9.47922539]
Reward: -1  Episode Reward:  37
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   22.52902605 -1695.56049235  -180.6            3.07790274]
------
Step:24, Action:North
State  210
Old Q Values:  [   22.52902605 -1695.56049235  -180.6            3.07790274]
New Q values:  [   14.53879699 -1695.56049235  -180.6            3.07790274]
Reward: -1  Episode Reward:  36
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836   20.42395524 -180.00807518    9.47922539]
------
Step:25, Action:South
State  130
Old Q Values:  [-249.43290836   20.42395524 -180.00807518    9.47922539]
New Q values:  [-249.43290836   11.93122119 -180.00807518    9.47922539]
Reward: -1  Episode Reward:  35
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   14.53879699 -1695.56049235  -180.6            3.07790274]
------
Step:26, Action:North
State  208
Old Q Values:  [-2332.99935444   113.95711433 -2651.70614553 -3344.18956062]
New Q values:  [ -930.22037542   113.95711433 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  34
xxxxx
x. ax
xg  x
x   x
xxxxx
Step:27, Action:South
State  130
Old Q Values:  [-249.43290836   11.93122119 -180.00807518    9.47922539]
New Q values:  [-249.43290836    8.53412758 -180.00807518    9.47922539]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   14.53879699 -1695.56049235  -180.6            3.07790274]
------
Step:28, Action:North
State  210
Old Q Values:  [   14.53879699 -1695.56049235  -180.6            3.07790274]
New Q values:  [    8.05928641 -1695.56049235  -180.6            3.07790274]
Reward: -1  Episode Reward:  32
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836    8.53412758 -180.00807518    9.47922539]
------
Step:29, Action:West
State  130
Old Q Values:  [-249.43290836    8.53412758 -180.00807518    9.47922539]
New Q values:  [-249.43290836    8.53412758 -180.00807518   37.12924837]
Reward: -1  Episode Reward:  31
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          53.41483919  113.12519404    0.        ]
------
Step:30, Action:East
State  114
Old Q Values:  [-180.6           0.           24.68594511    0.        ]
New Q values:  [-180.6           0.           20.41315255    0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836    8.53412758 -180.00807518   37.12924837]
------
Step:31, Action:West
State  130
Old Q Values:  [-249.43290836    8.53412758 -180.00807518   37.12924837]
New Q values:  [-249.43290836    8.53412758 -180.00807518   48.18925756]
Reward: -1  Episode Reward:  29
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          53.41483919  113.12519404    0.        ]
------
Step:32, Action:East
State  114
Old Q Values:  [-180.6           0.           20.41315255    0.        ]
New Q values:  [-180.6           0.           22.02203829    0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836    8.53412758 -180.00807518   48.18925756]
------
Step:33, Action:West
State  130
Old Q Values:  [-249.43290836    8.53412758 -180.00807518   48.18925756]
New Q values:  [-249.43290836    8.53412758 -180.00807518   52.61326124]
Reward: -1  Episode Reward:  27
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          53.41483919  113.12519404    0.        ]
------
Step:34, Action:East
State  115
Old Q Values:  [-180.6          53.41483919  113.12519404    0.        ]
New Q values:  [-180.6          53.41483919   60.43405599    0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836    8.53412758 -180.00807518   52.61326124]
------
Step:35, Action:West
State  130
Old Q Values:  [-249.43290836    8.53412758 -180.00807518   52.61326124]
New Q values:  [-249.43290836    8.53412758 -180.00807518   38.57552129]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          53.41483919   60.43405599    0.        ]
------
Step:36, Action:East
State  115
Old Q Values:  [-180.6          53.41483919   60.43405599    0.        ]
New Q values:  [-180.6          53.41483919   35.14627878    0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836    8.53412758 -180.00807518   38.57552129]
------
Step:37, Action:West
State  130
Old Q Values:  [-249.43290836    8.53412758 -180.00807518   38.57552129]
New Q values:  [-249.43290836    8.53412758 -180.00807518   30.85466027]
Reward: -1  Episode Reward:  23
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          53.41483919   35.14627878    0.        ]
------
Step:38, Action:South
State  115
Old Q Values:  [-180.6          53.41483919   35.14627878    0.        ]
New Q values:  [-180.6          57.75118283   35.14627878    0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  123.28415716     0.         -7410.7447866      0.        ]
------
Step:39, Action:North
State  193
Old Q Values:  [  123.28415716     0.         -7410.7447866      0.        ]
New Q values:  [  128.1151324     0.        -7410.7447866     0.       ]
Reward: -1  Episode Reward:  21
xxxxx
x.agx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:40, Action:South
State  112
Old Q Values:  [    0.             0.         11538.29390147 60005.4       ]
New Q values:  [    0.           214.92635634 11538.29390147 60005.4       ]
Reward: -1  Episode Reward:  20
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 24.14146661  22.23326427 718.42118782   0.        ]
------
Step:41, Action:East
State  192
Old Q Values:  [ 24.14146661  22.23326427 718.42118782   0.        ]
New Q values:  [ 24.14146661  22.23326427 320.95560943   0.        ]
Reward: -1  Episode Reward:  19
xxxxx
xg  x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -930.22037542   113.95711433 -2651.70614553 -3344.18956062]
------
Step:42, Action:South
State  208
Old Q Values:  [ -930.22037542   113.95711433 -2651.70614553 -3344.18956062]
New Q values:  [ -930.22037542    54.7542492  -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  18
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2363.8645711  -8656.02923281 -6173.56321028    32.57134489]
------
Step:43, Action:West
State  288
Old Q Values:  [-2363.8645711  -8656.02923281 -6173.56321028    32.57134489]
New Q values:  [-2363.8645711  -8656.02923281 -6173.56321028    20.14086656]
Reward: -1  Episode Reward:  17
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-7781.33701845  -180.6           25.70776202     8.424     ]
------
Step:44, Action:East
State  272
Old Q Values:  [-7781.33701845  -180.6           25.70776202     8.424     ]
New Q values:  [-7781.33701845  -180.6           15.72536477     8.424     ]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2363.8645711  -8656.02923281 -6173.56321028    20.14086656]
------
Step:45, Action:West
State  288
Old Q Values:  [-2363.8645711  -8656.02923281 -6173.56321028    20.14086656]
New Q values:  [-2363.8645711  -8656.02923281 -6173.56321028    12.17395606]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-7781.33701845  -180.6           15.72536477     8.424     ]
------
Step:46, Action:East
State  272
Old Q Values:  [-7781.33701845  -180.6           15.72536477     8.424     ]
New Q values:  [-7781.33701845  -180.6            9.34233273     8.424     ]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2363.8645711  -8656.02923281 -6173.56321028    12.17395606]
------
Step:47, Action:North
State  288
Old Q Values:  [-2363.8645711  -8656.02923281 -6173.56321028    12.17395606]
New Q values:  [ -943.72804252 -8656.02923281 -6173.56321028    12.17395606]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    8.05928641 -1695.56049235  -180.6            3.07790274]
------
Step:48, Action:North
State  208
Old Q Values:  [ -930.22037542    54.7542492  -2651.70614553 -3344.18956062]
New Q values:  [ -363.43175209    54.7542492  -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  12
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836    8.53412758 -180.00807518   30.85466027]
------
Step:49, Action:West
State  130
Old Q Values:  [-249.43290836    8.53412758 -180.00807518   30.85466027]
New Q values:  [-249.43290836    8.53412758 -180.00807518   18.3484756 ]
Reward: -1  Episode Reward:  11
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.           22.02203829    0.        ]
------
Step:50, Action:East
State  114
Old Q Values:  [-180.6           0.           22.02203829    0.        ]
New Q values:  [-180.6           0.           13.71335799    0.        ]
Reward: -1  Episode Reward:  10
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836    8.53412758 -180.00807518   18.3484756 ]
------
Step:51, Action:West
State  128
Old Q Values:  [ 4312.7659208   2850.949675   -8652.84       15565.74877981]
New Q values:  [ 4312.7659208   2850.949675   -8652.84       24227.31951192]
Reward: -1  Episode Reward:  9
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[    0.           214.92635634 11538.29390147 60005.4       ]
------
Step:52, Action:East
State  114
Old Q Values:  [-180.6           0.           13.71335799    0.        ]
New Q values:  [-180.6           0.           10.38988588    0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-249.43290836    8.53412758 -180.00807518   18.3484756 ]
------
Step:53, Action:West
State  128
Old Q Values:  [ 4312.7659208   2850.949675   -8652.84       24227.31951192]
New Q values:  [ 4312.7659208   2850.949675   -8652.84       27691.94780477]
Reward: -1  Episode Reward:  7
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[    0.           214.92635634 11538.29390147 60005.4       ]
------
Step:54, Action:East
State  112
Old Q Values:  [    0.           214.92635634 11538.29390147 60005.4       ]
New Q values:  [    0.           214.92635634 12922.30190202 60005.4       ]
Reward: -1  Episode Reward:  6
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 4312.7659208   2850.949675   -8652.84       27691.94780477]
------
Step:55, Action:North
State  130
Old Q Values:  [-249.43290836    8.53412758 -180.00807518   18.3484756 ]
New Q values:  [-274.86862066    8.53412758 -180.00807518   18.3484756 ]
Reward: -301  Episode Reward:  -295
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    8.53412758 -180.00807518   18.3484756 ]
------
Step:56, Action:West
State  130
Old Q Values:  [-274.86862066    8.53412758 -180.00807518   18.3484756 ]
New Q values:  [-274.86862066    8.53412758 -180.00807518    9.856356  ]
Reward: -1  Episode Reward:  -296
xxxxx
x.a x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.           10.38988588    0.        ]
------
Step:57, Action:East
State  114
Old Q Values:  [-180.6           0.           10.38988588    0.        ]
New Q values:  [-180.6           0.            6.51286115    0.        ]
Reward: -1  Episode Reward:  -297
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    8.53412758 -180.00807518    9.856356  ]
------
Step:58, Action:West
State  130
Old Q Values:  [-274.86862066    8.53412758 -180.00807518    9.856356  ]
New Q values:  [-274.86862066    8.53412758 -180.00807518    5.29640075]
Reward: -1  Episode Reward:  -298
xxxxx
x.a x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.            6.51286115    0.        ]
------
Step:59, Action:East
State  114
Old Q Values:  [-180.6           0.            6.51286115    0.        ]
New Q values:  [-180.6           0.            4.56538273    0.        ]
Reward: -1  Episode Reward:  -299
xxxxx
x. ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    8.53412758 -180.00807518    5.29640075]
------
Step:60, Action:South
State  130
Old Q Values:  [-274.86862066    8.53412758 -180.00807518    5.29640075]
New Q values:  [-274.86862066    5.23143695 -180.00807518    5.29640075]
Reward: -1  Episode Reward:  -300
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    8.05928641 -1695.56049235  -180.6            3.07790274]
------
Step:61, Action:North
State  210
Old Q Values:  [    8.05928641 -1695.56049235  -180.6            3.07790274]
New Q values:  [    4.21263479 -1695.56049235  -180.6            3.07790274]
Reward: -1  Episode Reward:  -301
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    5.23143695 -180.00807518    5.29640075]
------
Step:62, Action:West
State  130
Old Q Values:  [-274.86862066    5.23143695 -180.00807518    5.29640075]
New Q values:  [-274.86862066    5.23143695 -180.00807518    2.88817512]
Reward: -1  Episode Reward:  -302
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.            4.56538273    0.        ]
------
Step:63, Action:East
State  114
Old Q Values:  [-180.6           0.            4.56538273    0.        ]
New Q values:  [-180.6           0.            2.79558418    0.        ]
Reward: -1  Episode Reward:  -303
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    5.23143695 -180.00807518    2.88817512]
------
Step:64, Action:South
State  130
Old Q Values:  [-274.86862066    5.23143695 -180.00807518    2.88817512]
New Q values:  [-274.86862066   17.91884954 -180.00807518    2.88817512]
Reward: -1  Episode Reward:  -304
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -363.43175209    54.7542492  -2651.70614553 -3344.18956062]
------
Step:65, Action:South
State  208
Old Q Values:  [ -363.43175209    54.7542492  -2651.70614553 -3344.18956062]
New Q values:  [ -363.43175209    24.9538865  -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -305
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -943.72804252 -8656.02923281 -6173.56321028    12.17395606]
------
Step:66, Action:West
State  288
Old Q Values:  [ -943.72804252 -8656.02923281 -6173.56321028    12.17395606]
New Q values:  [-9.43728043e+02 -8.65602923e+03 -6.17356321e+03  7.07228224e+00]
Reward: -1  Episode Reward:  -306
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-7781.33701845  -180.6            9.34233273     8.424     ]
------
Step:67, Action:East
State  272
Old Q Values:  [-7781.33701845  -180.6            9.34233273     8.424     ]
New Q values:  [-7.78133702e+03 -1.80600000e+02  5.25861776e+00  8.42400000e+00]
Reward: -1  Episode Reward:  -307
xxxxx
x.  x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-9.43728043e+02 -8.65602923e+03 -6.17356321e+03  7.07228224e+00]
------
Step:68, Action:North
State  288
Old Q Values:  [-9.43728043e+02 -8.65602923e+03 -6.17356321e+03  7.07228224e+00]
New Q values:  [-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  7.07228224e+00]
Reward: -1  Episode Reward:  -308
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -363.43175209    24.9538865  -2651.70614553 -3344.18956062]
------
Step:69, Action:South
State  208
Old Q Values:  [ -363.43175209    24.9538865  -2651.70614553 -3344.18956062]
New Q values:  [ -363.43175209    11.50323927 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -309
xxxxx
x.  x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  7.07228224e+00]
------
Step:70, Action:West
State  288
Old Q Values:  [-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  7.07228224e+00]
New Q values:  [ -370.60505106 -8656.02923281 -6173.56321028    13.78422542]
Reward: -1  Episode Reward:  -310
xxxxx
x.  x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  38.51770843    0.         -516.58856435    0.        ]
------
Step:71, Action:North
State  272
Old Q Values:  [-7.78133702e+03 -1.80600000e+02  5.25861776e+00  8.42400000e+00]
New Q values:  [-1463.93735889  -180.6            5.25861776     8.424     ]
Reward: -1  Episode Reward:  -311
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.56078987e+00  5.49732483e+03  0.00000000e+00]
------
Step:72, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.56078987e+00  5.49732483e+03  0.00000000e+00]
New Q values:  [-6.00000000e-01  2.56078987e+00  2.19959372e+03  0.00000000e+00]
Reward: -1  Episode Reward:  -312
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    4.21263479 -1695.56049235  -180.6            3.07790274]
------
Step:73, Action:North
State  208
Old Q Values:  [ -363.43175209    11.50323927 -2651.70614553 -3344.18956062]
New Q values:  [ -140.59704597    11.50323927 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -313
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066   17.91884954 -180.00807518    2.88817512]
------
Step:74, Action:South
State  130
Old Q Values:  [-274.86862066   17.91884954 -180.00807518    2.88817512]
New Q values:  [-274.86862066   10.0185116  -180.00807518    2.88817512]
Reward: -1  Episode Reward:  -314
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -140.59704597    11.50323927 -2651.70614553 -3344.18956062]
------
Step:75, Action:South
State  208
Old Q Values:  [ -140.59704597    11.50323927 -2651.70614553 -3344.18956062]
New Q values:  [ -140.59704597     8.13656334 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -315
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -370.60505106 -8656.02923281 -6173.56321028    13.78422542]
------
Step:76, Action:West
State  288
Old Q Values:  [ -370.60505106 -8656.02923281 -6173.56321028    13.78422542]
New Q values:  [-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  7.44089017e+00]
Reward: -1  Episode Reward:  -316
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            5.25861776     8.424     ]
------
Step:77, Action:East
State  272
Old Q Values:  [-1463.93735889  -180.6            5.25861776     8.424     ]
New Q values:  [-1463.93735889  -180.6            3.73571416     8.424     ]
Reward: -1  Episode Reward:  -317
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  7.44089017e+00]
------
Step:78, Action:West
State  288
Old Q Values:  [-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  7.44089017e+00]
New Q values:  [-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  4.90355607e+00]
Reward: -1  Episode Reward:  -318
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            3.73571416     8.424     ]
------
Step:79, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            3.73571416     8.424     ]
New Q values:  [-1463.93735889  -180.6            3.73571416     3.34776   ]
Reward: -1  Episode Reward:  -319
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[0.     0.     1.9272 0.    ]
------
Step:80, Action:East
State  257
Old Q Values:  [-6.0000000e-01 -1.8060000e+02  6.0514477e+04  0.0000000e+00]
New Q values:  [-6.00000000e-01 -1.80600000e+02  2.42063115e+04  0.00000000e+00]
Reward: -1  Episode Reward:  -320
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            3.73571416     3.34776   ]
------
Step:81, Action:East
State  273
Old Q Values:  [  38.51770843    0.         -516.58856435    0.        ]
New Q values:  [  38.51770843    0.         -205.76435892    0.        ]
Reward: -1  Episode Reward:  -321
xxxxx
x.  x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  4.90355607e+00]
------
Step:82, Action:West
State  288
Old Q Values:  [-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  4.90355607e+00]
New Q values:  [-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  2.48213667e+00]
Reward: -1  Episode Reward:  -322
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            3.73571416     3.34776   ]
------
Step:83, Action:East
State  272
Old Q Values:  [-1463.93735889  -180.6            3.73571416     3.34776   ]
New Q values:  [-1463.93735889  -180.6            1.63892666     3.34776   ]
Reward: -1  Episode Reward:  -323
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  2.48213667e+00]
------
Step:84, Action:West
State  288
Old Q Values:  [-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  2.48213667e+00]
New Q values:  [ -370.60505106 -8656.02923281 -6173.56321028    11.9481672 ]
Reward: -1  Episode Reward:  -324
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  38.51770843    0.         -205.76435892    0.        ]
------
Step:85, Action:North
State  273
Old Q Values:  [  38.51770843    0.         -205.76435892    0.        ]
New Q values:  [  53.24162309    0.         -205.76435892    0.        ]
Reward: -1  Episode Reward:  -325
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  128.1151324     0.        -7410.7447866     0.       ]
------
Step:86, Action:North
State  192
Old Q Values:  [ 24.14146661  22.23326427 320.95560943   0.        ]
New Q values:  [  9.8952619   22.23326427 320.95560943   0.        ]
Reward: -1  Episode Reward:  -326
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.            2.79558418    0.        ]
------
Step:87, Action:East
State  115
Old Q Values:  [-180.6          57.75118283   35.14627878    0.        ]
New Q values:  [-180.6          57.75118283   16.46406499    0.        ]
Reward: -1  Episode Reward:  -327
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066   10.0185116  -180.00807518    2.88817512]
------
Step:88, Action:West
State  130
Old Q Values:  [-274.86862066   10.0185116  -180.00807518    2.88817512]
New Q values:  [-274.86862066   10.0185116  -180.00807518    1.3939453 ]
Reward: -1  Episode Reward:  -328
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6           0.            2.79558418    0.        ]
------
Step:89, Action:East
State  114
Old Q Values:  [-180.6           0.            2.79558418    0.        ]
New Q values:  [-180.6           0.            3.52378715    0.        ]
Reward: -1  Episode Reward:  -329
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066   10.0185116  -180.00807518    1.3939453 ]
------
Step:90, Action:South
State  130
Old Q Values:  [-274.86862066   10.0185116  -180.00807518    1.3939453 ]
New Q values:  [-274.86862066    5.84837364 -180.00807518    1.3939453 ]
Reward: -1  Episode Reward:  -330
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -140.59704597     8.13656334 -2651.70614553 -3344.18956062]
------
Step:91, Action:South
State  208
Old Q Values:  [ -140.59704597     8.13656334 -2651.70614553 -3344.18956062]
New Q values:  [ -140.59704597     6.23907549 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -331
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -370.60505106 -8656.02923281 -6173.56321028    11.9481672 ]
------
Step:92, Action:West
State  288
Old Q Values:  [ -370.60505106 -8656.02923281 -6173.56321028    11.9481672 ]
New Q values:  [-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  5.18359488e+00]
Reward: -1  Episode Reward:  -332
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.63892666     3.34776   ]
------
Step:93, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892    0.        ]
New Q values:  [  53.24162309    0.         -205.76435892 7261.29345877]
Reward: -1  Episode Reward:  -333
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[-6.00000000e-01 -1.80600000e+02  2.42063115e+04  0.00000000e+00]
------
Step:94, Action:East
State  257
Old Q Values:  [-6.00000000e-01 -1.80600000e+02  2.42063115e+04  0.00000000e+00]
New Q values:  [-6.00000000e-01 -1.80600000e+02  9.68292894e+03  0.00000000e+00]
Reward: -1  Episode Reward:  -334
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.63892666     3.34776   ]
------
Step:95, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.63892666     3.34776   ]
New Q values:  [-1.46393736e+03 -1.80600000e+02  1.63892666e+00  2.90561779e+03]
Reward: -1  Episode Reward:  -335
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[-6.00000000e-01 -1.80600000e+02  9.68292894e+03  0.00000000e+00]
------
Step:96, Action:East
State  256
Old Q Values:  [0.     0.     1.9272 0.    ]
New Q values:  [  0.           0.         871.85621577   0.        ]
Reward: -1  Episode Reward:  -336
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1.46393736e+03 -1.80600000e+02  1.63892666e+00  2.90561779e+03]
------
Step:97, Action:West
State  272
Old Q Values:  [-1.46393736e+03 -1.80600000e+02  1.63892666e+00  2.90561779e+03]
New Q values:  [-1.46393736e+03 -1.80600000e+02  1.63892666e+00  4.06652580e+03]
Reward: -1  Episode Reward:  -337
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[-6.00000000e-01 -1.80600000e+02  9.68292894e+03  0.00000000e+00]
------
Step:98, Action:East
State  256
Old Q Values:  [  0.           0.         871.85621577   0.        ]
New Q values:  [   0.            0.         1568.10022519    0.        ]
Reward: -1  Episode Reward:  -338
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1.46393736e+03 -1.80600000e+02  1.63892666e+00  4.06652580e+03]
------
Step:99, Action:West
State  272
Old Q Values:  [-1.46393736e+03 -1.80600000e+02  1.63892666e+00  4.06652580e+03]
New Q values:  [-1.46393736e+03 -1.80600000e+02  1.63892666e+00  2.09644039e+03]
Reward: -1  Episode Reward:  -339
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[   0.            0.         1568.10022519    0.        ]
------
Step:100, Action:East
State  257
Old Q Values:  [-6.00000000e-01 -1.80600000e+02  9.68292894e+03  0.00000000e+00]
New Q values:  [-6.00000000e-01 -1.80600000e+02  4.50150369e+03  0.00000000e+00]
Reward: -1  Episode Reward:  -340
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1.46393736e+03 -1.80600000e+02  1.63892666e+00  2.09644039e+03]
------
Step:101, Action:West
State  272
Old Q Values:  [-1.46393736e+03 -1.80600000e+02  1.63892666e+00  2.09644039e+03]
New Q values:  [-1463.93735889  -180.6            1.63892666  1308.40622198]
Reward: -1  Episode Reward:  -341
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[   0.            0.         1568.10022519    0.        ]
------
Step:102, Action:East
State  256
Old Q Values:  [   0.            0.         1568.10022519    0.        ]
New Q values:  [   0.            0.         1019.16195667    0.        ]
Reward: -1  Episode Reward:  -342
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.63892666  1308.40622198]
------
Step:103, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.63892666  1308.40622198]
New Q values:  [-1.46393736e+03 -1.80600000e+02  1.63892666e+00  1.87321360e+03]
Reward: -1  Episode Reward:  -343
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[-6.00000000e-01 -1.80600000e+02  4.50150369e+03  0.00000000e+00]
------
Step:104, Action:East
State  257
Old Q Values:  [-6.00000000e-01 -1.80600000e+02  4.50150369e+03  0.00000000e+00]
New Q values:  [-6.00000000e-01 -1.80600000e+02  3.97838951e+03  0.00000000e+00]
Reward: -1  Episode Reward:  -344
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892 7261.29345877]
------
Step:105, Action:West
State  272
Old Q Values:  [-1.46393736e+03 -1.80600000e+02  1.63892666e+00  1.87321360e+03]
New Q values:  [-1.46393736e+03 -1.80600000e+02  1.63892666e+00  1.94220229e+03]
Reward: -1  Episode Reward:  -345
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[-6.00000000e-01 -1.80600000e+02  3.97838951e+03  0.00000000e+00]
------
Step:106, Action:East
State  256
Old Q Values:  [   0.            0.         1019.16195667    0.        ]
New Q values:  [  0.           0.         989.72547051   0.        ]
Reward: -1  Episode Reward:  -346
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1.46393736e+03 -1.80600000e+02  1.63892666e+00  1.94220229e+03]
------
Step:107, Action:West
State  272
Old Q Values:  [-1.46393736e+03 -1.80600000e+02  1.63892666e+00  1.94220229e+03]
New Q values:  [-1463.93735889  -180.6            1.63892666  1073.19855828]
Reward: -1  Episode Reward:  -347
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[  0.           0.         989.72547051   0.        ]
------
Step:108, Action:East
State  256
Old Q Values:  [  0.           0.         989.72547051   0.        ]
New Q values:  [  0.           0.         717.24975569   0.        ]
Reward: -1  Episode Reward:  -348
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.63892666  1073.19855828]
------
Step:109, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.63892666  1073.19855828]
New Q values:  [-1463.93735889  -180.6            1.63892666   643.85435002]
Reward: -1  Episode Reward:  -349
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[  0.           0.         717.24975569   0.        ]
------
Step:110, Action:East
State  256
Old Q Values:  [  0.           0.         717.24975569   0.        ]
New Q values:  [  0.           0.         479.45620728   0.        ]
Reward: -1  Episode Reward:  -350
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.63892666   643.85435002]
------
Step:111, Action:East
State  272
Old Q Values:  [-1463.93735889  -180.6            1.63892666   643.85435002]
New Q values:  [-1463.93735889  -180.6            1.61064913   643.85435002]
Reward: -1  Episode Reward:  -351
xxxxx
x.  x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  5.18359488e+00]
------
Step:112, Action:North
State  288
Old Q Values:  [-3.70605051e+02 -8.65602923e+03 -6.17356321e+03  5.18359488e+00]
New Q values:  [-1.46970298e+02 -8.65602923e+03 -6.17356321e+03  5.18359488e+00]
Reward: -1  Episode Reward:  -352
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -140.59704597     6.23907549 -2651.70614553 -3344.18956062]
------
Step:113, Action:South
State  208
Old Q Values:  [ -140.59704597     6.23907549 -2651.70614553 -3344.18956062]
New Q values:  [ -140.59704597     3.45070866 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -353
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1.46970298e+02 -8.65602923e+03 -6.17356321e+03  5.18359488e+00]
------
Step:114, Action:West
State  288
Old Q Values:  [-1.46970298e+02 -8.65602923e+03 -6.17356321e+03  5.18359488e+00]
New Q values:  [ -146.97029777 -8656.02923281 -6173.56321028  2179.86147558]
Reward: -1  Episode Reward:  -354
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892 7261.29345877]
------
Step:115, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913   643.85435002]
New Q values:  [-1463.93735889  -180.6            1.61064913  1450.4585943 ]
Reward: -1  Episode Reward:  -355
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[-6.00000000e-01 -1.80600000e+02  3.97838951e+03  0.00000000e+00]
------
Step:116, Action:East
State  257
Old Q Values:  [-6.00000000e-01 -1.80600000e+02  3.97838951e+03  0.00000000e+00]
New Q values:  [-6.00000000e-01 -1.80600000e+02  3.76914384e+03  0.00000000e+00]
Reward: -1  Episode Reward:  -356
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892 7261.29345877]
------
Step:117, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892 7261.29345877]
New Q values:  [  53.24162309    0.         -205.76435892 4034.66053651]
Reward: -1  Episode Reward:  -357
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[-6.00000000e-01 -1.80600000e+02  3.76914384e+03  0.00000000e+00]
------
Step:118, Action:East
State  257
Old Q Values:  [-6.00000000e-01 -1.80600000e+02  3.76914384e+03  0.00000000e+00]
New Q values:  [-6.0000000e-01 -1.8060000e+02  2.7174557e+03  0.0000000e+00]
Reward: -1  Episode Reward:  -358
xxxxx
x.  x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892 4034.66053651]
------
Step:119, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892 4034.66053651]
New Q values:  [  53.24162309    0.         -205.76435892 2428.50092409]
Reward: -1  Episode Reward:  -359
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[-6.0000000e-01 -1.8060000e+02  2.7174557e+03  0.0000000e+00]
------
Step:120, Action:East
State  257
Old Q Values:  [-6.0000000e-01 -1.8060000e+02  2.7174557e+03  0.0000000e+00]
New Q values:  [-6.00000000e-01 -1.80600000e+02  1.81493256e+03  0.00000000e+00]
Reward: -1  Episode Reward:  -360
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892 2428.50092409]
------
Step:121, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913  1450.4585943 ]
New Q values:  [-1463.93735889  -180.6            1.61064913  1124.06320468]
Reward: -1  Episode Reward:  -361
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[-6.00000000e-01 -1.80600000e+02  1.81493256e+03  0.00000000e+00]
------
Step:122, Action:East
State  257
Old Q Values:  [-6.00000000e-01 -1.80600000e+02  1.81493256e+03  0.00000000e+00]
New Q values:  [-6.0000000e-01 -1.8060000e+02  1.4539233e+03  0.0000000e+00]
Reward: -1  Episode Reward:  -362
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892 2428.50092409]
------
Step:123, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892 2428.50092409]
New Q values:  [  53.24162309    0.         -205.76435892 1406.97735959]
Reward: -1  Episode Reward:  -363
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[-6.0000000e-01 -1.8060000e+02  1.4539233e+03  0.0000000e+00]
------
Step:124, Action:East
State  257
Old Q Values:  [-6.0000000e-01 -1.8060000e+02  1.4539233e+03  0.0000000e+00]
New Q values:  [-6.00000000e-01 -1.80600000e+02  1.00306253e+03  0.00000000e+00]
Reward: -1  Episode Reward:  -364
xxxxx
x.  x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892 1406.97735959]
------
Step:125, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913  1124.06320468]
New Q values:  [-1463.93735889  -180.6            1.61064913   749.94404022]
Reward: -1  Episode Reward:  -365
xxxxx
x.  x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[-6.00000000e-01 -1.80600000e+02  1.00306253e+03  0.00000000e+00]
------
Step:126, Action:West
State  256
Old Q Values:  [  0.           0.         479.45620728   0.        ]
New Q values:  [    0.             0.           479.45620728 -6036.76313782]
Reward: -10301  Episode Reward:  -10666
xxxxx
x.  x
x   x
xg  x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -146.97029777 -8656.02923281 -6173.56321028  2179.86147558]
------
Step:1, Action:West
State  288
Old Q Values:  [ -146.97029777 -8656.02923281 -6173.56321028  2179.86147558]
New Q values:  [ -146.97029777 -8656.02923281 -6173.56321028  1102.3278023 ]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.61064913   749.94404022]
------
Step:2, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892 1406.97735959]
New Q values:  [  53.24162309    0.         -205.76435892  722.46845774]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  514.25837969  -289.59534477 -1299.12168416  -180.6       ]
------
Step:3, Action:North
State  261
Old Q Values:  [  514.25837969  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [  215.51568737  -289.59534477 -1299.12168416  -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  14.70778497   -1.22906555  -13.45412716 -180.6       ]
------
Step:4, Action:North
State  181
Old Q Values:  [  14.70778497   -1.22906555  -13.45412716 -180.6       ]
New Q values:  [  13.83135285   -1.22906555  -13.45412716 -180.6       ]
Reward: 9  Episode Reward:  36
xxxxx
xa.gx
x ..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            8.49412954 -2781.31337986  -180.6       ]
------
Step:5, Action:South
State  103
Old Q Values:  [-180.6           8.20466484    0.            0.        ]
New Q values:  [-180.6           6.83127179    0.            0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x . x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  13.83135285   -1.22906555  -13.45412716 -180.6       ]
------
Step:6, Action:North
State  181
Old Q Values:  [  13.83135285   -1.22906555  -13.45412716 -180.6       ]
New Q values:  [   6.98192268   -1.22906555  -13.45412716 -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
xa. x
x g.x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6           6.83127179    0.            0.        ]
------
Step:7, Action:South
State  110
Old Q Values:  [-180.6        -532.22453432    2.76177191    0.        ]
New Q values:  [-1.80600000e+02 -6.21338664e+03  2.76177191e+00  0.00000000e+00]
Reward: -10001  Episode Reward:  -9967
xxxxx
x . x
xg..x
x   x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892  722.46845774]
------
Step:1, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913   749.94404022]
New Q values:  [-1463.93735889  -180.6            1.61064913   370.0323223 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  215.51568737  -289.59534477 -1299.12168416  -180.6       ]
------
Step:2, Action:North
State  261
Old Q Values:  [  215.51568737  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [  600.98626335  -289.59534477 -1299.12168416  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[   6.1762476     3.01500451 1697.93329469    0.        ]
------
Step:3, Action:East
State  181
Old Q Values:  [   6.98192268   -1.22906555  -13.45412716 -180.6       ]
New Q values:  [   6.98192268   -1.22906555   38.45288886 -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  128.1151324     0.        -7410.7447866     0.       ]
------
Step:4, Action:North
State  199
Old Q Values:  [-6.00000000e-01  1.27414491e+03  5.47587933e+03  0.00000000e+00]
New Q values:  [  22.48535485 1274.14490661 5475.87932557    0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x.a.x
x  .x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          57.75118283   16.46406499    0.        ]
------
Step:5, Action:South
State  114
Old Q Values:  [-180.6           0.            3.52378715    0.        ]
New Q values:  [-180.6          -0.6           3.52378715    0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x. .x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.7887208e-01 -5.9946000e+03  0.0000000e+00  0.0000000e+00]
------
Step:6, Action:East
State  195
Old Q Values:  [  51.54351872    0.         2300.17044591    0.        ]
New Q values:  [ 51.54351872   0.         926.7319688    0.        ]
Reward: 9  Episode Reward:  44
xxxxx
x. .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    4.21263479 -1695.56049235  -180.6            3.07790274]
------
Step:7, Action:North
State  208
Old Q Values:  [ -140.59704597     3.45070866 -2651.70614553 -3344.18956062]
New Q values:  [  -49.0843063      3.45070866 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  53
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    5.84837364 -180.00807518    1.3939453 ]
------
Step:8, Action:West
State  130
Old Q Values:  [-274.86862066    5.84837364 -180.00807518    1.3939453 ]
New Q values:  [-274.86862066    5.84837364 -180.00807518   17.28293297]
Reward: -1  Episode Reward:  52
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          57.75118283   16.46406499    0.        ]
------
Step:9, Action:South
State  115
Old Q Values:  [-180.6          57.75118283   16.46406499    0.        ]
New Q values:  [-180.6          60.93501285   16.46406499    0.        ]
Reward: -1  Episode Reward:  51
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  128.1151324     0.        -7410.7447866     0.       ]
------
Step:10, Action:North
State  192
Old Q Values:  [  9.8952619   22.23326427 320.95560943   0.        ]
New Q values:  [  4.4152409   22.23326427 320.95560943   0.        ]
Reward: -1  Episode Reward:  50
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6          -0.6           3.52378715    0.        ]
------
Step:11, Action:East
State  112
Old Q Values:  [    0.           214.92635634 12922.30190202 60005.4       ]
New Q values:  [    0.           214.92635634 13475.90510224 60005.4       ]
Reward: -1  Episode Reward:  49
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 4312.7659208   2850.949675   -8652.84       27691.94780477]
------
Step:12, Action:North
State  128
Old Q Values:  [ 4312.7659208   2850.949675   -8652.84       27691.94780477]
New Q values:  [ 9852.09070975  2850.949675   -8652.84       27691.94780477]
Reward: -301  Episode Reward:  -252
xxxxx
xg ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 9852.09070975  2850.949675   -8652.84       27691.94780477]
------
Step:13, Action:West
State  130
Old Q Values:  [-274.86862066    5.84837364 -180.00807518   17.28293297]
New Q values:  [-274.86862066    5.84837364 -180.00807518    7.37030933]
Reward: -1  Episode Reward:  -253
xxxxx
x.a x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6          -0.6           3.52378715    0.        ]
------
Step:14, Action:East
State  114
Old Q Values:  [-180.6          -0.6           3.52378715    0.        ]
New Q values:  [-180.6          -0.6           3.02060766    0.        ]
Reward: -1  Episode Reward:  -254
xxxxx
x. ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    5.84837364 -180.00807518    7.37030933]
------
Step:15, Action:West
State  130
Old Q Values:  [-274.86862066    5.84837364 -180.00807518    7.37030933]
New Q values:  [-274.86862066    5.84837364 -180.00807518    3.25430603]
Reward: -1  Episode Reward:  -255
xxxxx
x.a x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6          -0.6           3.02060766    0.        ]
------
Step:16, Action:East
State  114
Old Q Values:  [-180.6          -0.6           3.02060766    0.        ]
New Q values:  [-180.6          -0.6           2.36275516    0.        ]
Reward: -1  Episode Reward:  -256
xxxxx
x. ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    5.84837364 -180.00807518    3.25430603]
------
Step:17, Action:South
State  130
Old Q Values:  [-274.86862066    5.84837364 -180.00807518    3.25430603]
New Q values:  [-274.86862066    3.00313989 -180.00807518    3.25430603]
Reward: -1  Episode Reward:  -257
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    4.21263479 -1695.56049235  -180.6            3.07790274]
------
Step:18, Action:North
State  210
Old Q Values:  [    4.21263479 -1695.56049235  -180.6            3.07790274]
New Q values:  [    2.06134572 -1695.56049235  -180.6            3.07790274]
Reward: -1  Episode Reward:  -258
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    3.00313989 -180.00807518    3.25430603]
------
Step:19, Action:West
State  130
Old Q Values:  [-274.86862066    3.00313989 -180.00807518    3.25430603]
New Q values:  [-274.86862066    3.00313989 -180.00807518    1.41054896]
Reward: -1  Episode Reward:  -259
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6          -0.6           2.36275516    0.        ]
------
Step:20, Action:East
State  114
Old Q Values:  [-180.6          -0.6           2.36275516    0.        ]
New Q values:  [-180.6          -0.6           1.24604403    0.        ]
Reward: -1  Episode Reward:  -260
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    3.00313989 -180.00807518    1.41054896]
------
Step:21, Action:South
State  130
Old Q Values:  [-274.86862066    3.00313989 -180.00807518    1.41054896]
New Q values:  [-274.86862066    1.63646856 -180.00807518    1.41054896]
Reward: -1  Episode Reward:  -261
xxxxx
x.  x
x gax
x   x
xxxxx
Step:22, Action:South
State  208
Old Q Values:  [  -49.0843063      3.45070866 -2651.70614553 -3344.18956062]
New Q values:  [  -49.0843063    331.47862415 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  -262
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -146.97029777 -8656.02923281 -6173.56321028  1102.3278023 ]
------
Step:23, Action:West
State  288
Old Q Values:  [ -146.97029777 -8656.02923281 -6173.56321028  1102.3278023 ]
New Q values:  [ -146.97029777 -8656.02923281 -6173.56321028   551.34081761]
Reward: -1  Episode Reward:  -263
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.61064913   370.0323223 ]
------
Step:24, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913   370.0323223 ]
New Q values:  [-1463.93735889  -180.6            1.61064913   448.33168726]
Reward: -1  Episode Reward:  -264
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[-6.00000000e-01 -1.80600000e+02  1.00306253e+03  0.00000000e+00]
------
Step:25, Action:East
State  257
Old Q Values:  [-6.00000000e-01 -1.80600000e+02  1.00306253e+03  0.00000000e+00]
New Q values:  [  -0.6        -180.6         535.12451731    0.        ]
Reward: -1  Episode Reward:  -265
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.61064913   448.33168726]
------
Step:26, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913   448.33168726]
New Q values:  [-1463.93735889  -180.6            1.61064913   322.56953709]
Reward: -1  Episode Reward:  -266
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.           479.45620728 -6036.76313782]
------
Step:27, Action:East
State  257
Old Q Values:  [  -0.6        -180.6         535.12451731    0.        ]
New Q values:  [  -0.6        -180.6         310.22066805    0.        ]
Reward: -1  Episode Reward:  -267
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.61064913   322.56953709]
------
Step:28, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892  722.46845774]
New Q values:  [  53.24162309    0.         -205.76435892  381.45358351]
Reward: -1  Episode Reward:  -268
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[  -0.6        -180.6         310.22066805    0.        ]
------
Step:29, Action:East
State  257
Old Q Values:  [  -0.6        -180.6         310.22066805    0.        ]
New Q values:  [  -0.6        -180.6         237.92434227    0.        ]
Reward: -1  Episode Reward:  -269
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892  381.45358351]
------
Step:30, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913   322.56953709]
New Q values:  [-1463.93735889  -180.6            1.61064913   199.80511752]
Reward: -1  Episode Reward:  -270
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[  -0.6        -180.6         237.92434227    0.        ]
------
Step:31, Action:East
State  257
Old Q Values:  [  -0.6        -180.6         237.92434227    0.        ]
New Q values:  [  -0.6        -180.6         209.00581196    0.        ]
Reward: -1  Episode Reward:  -271
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892  381.45358351]
------
Step:32, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913   199.80511752]
New Q values:  [-1463.93735889  -180.6            1.61064913   142.0237906 ]
Reward: -1  Episode Reward:  -272
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[  -0.6        -180.6         209.00581196    0.        ]
------
Step:33, Action:East
State  257
Old Q Values:  [  -0.6        -180.6         209.00581196    0.        ]
New Q values:  [  -0.6        -180.6         197.43839984    0.        ]
Reward: -1  Episode Reward:  -273
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892  381.45358351]
------
Step:34, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913   142.0237906 ]
New Q values:  [-1463.93735889  -180.6            1.61064913   115.44103619]
Reward: -1  Episode Reward:  -274
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[  -0.6        -180.6         197.43839984    0.        ]
------
Step:35, Action:East
State  256
Old Q Values:  [    0.             0.           479.45620728 -6036.76313782]
New Q values:  [    0.             0.           225.81479377 -6036.76313782]
Reward: -1  Episode Reward:  -275
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.61064913   115.44103619]
------
Step:36, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913   115.44103619]
New Q values:  [-1463.93735889  -180.6            1.61064913   113.32085261]
Reward: -1  Episode Reward:  -276
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.           225.81479377 -6036.76313782]
------
Step:37, Action:East
State  256
Old Q Values:  [    0.             0.           225.81479377 -6036.76313782]
New Q values:  [    0.             0.           123.72217329 -6036.76313782]
Reward: -1  Episode Reward:  -277
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.61064913   113.32085261]
------
Step:38, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913   113.32085261]
New Q values:  [-1463.93735889  -180.6            1.61064913    81.84499303]
Reward: -1  Episode Reward:  -278
xxxxx
xg  x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.           123.72217329 -6036.76313782]
------
Step:39, Action:East
State  256
Old Q Values:  [    0.             0.           123.72217329 -6036.76313782]
New Q values:  [    0.             0.            73.44236722 -6036.76313782]
Reward: -1  Episode Reward:  -279
xxxxx
x.  x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.61064913    81.84499303]
------
Step:40, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913    81.84499303]
New Q values:  [-1463.93735889  -180.6            1.61064913    54.17070738]
Reward: -1  Episode Reward:  -280
xxxxx
xg  x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.            73.44236722 -6036.76313782]
------
Step:41, Action:East
State  257
Old Q Values:  [  -0.6        -180.6         197.43839984    0.        ]
New Q values:  [  -0.6        -180.6          94.62657215    0.        ]
Reward: -1  Episode Reward:  -281
xxxxx
x.g x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.61064913    54.17070738]
------
Step:42, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892  381.45358351]
New Q values:  [  53.24162309    0.         -205.76435892  180.36940505]
Reward: -1  Episode Reward:  -282
xxxxx
x. gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[  -0.6        -180.6          94.62657215    0.        ]
------
Step:43, Action:East
State  257
Old Q Values:  [  -0.6        -180.6          94.62657215    0.        ]
New Q values:  [  -0.6        -180.6          91.36145037    0.        ]
Reward: -1  Episode Reward:  -283
xxxxx
x.  x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892  180.36940505]
------
Step:44, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892  180.36940505]
New Q values:  [  53.24162309    0.         -205.76435892   98.95619713]
Reward: -1  Episode Reward:  -284
xxxxx
x.  x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[  -0.6        -180.6          91.36145037    0.        ]
------
Step:45, Action:East
State  257
Old Q Values:  [  -0.6        -180.6          91.36145037    0.        ]
New Q values:  [-6.00000000e-01 -1.80600000e+02 -5.94780421e+03  0.00000000e+00]
Reward: -10001  Episode Reward:  -10285
xxxxx
x.  x
x   x
x g x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  600.98626335  -289.59534477 -1299.12168416  -180.6       ]
------
Step:1, Action:North
State  261
Old Q Values:  [  600.98626335  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [  755.17449375  -289.59534477 -1299.12168416  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[   6.1762476     3.01500451 1697.93329469    0.        ]
------
Step:2, Action:East
State  182
Old Q Values:  [   0.            0.         1671.21205411    0.        ]
New Q values:  [   0.            0.         1333.76293817    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.56078987e+00  2.19959372e+03  0.00000000e+00]
------
Step:3, Action:East
State  192
Old Q Values:  [  4.4152409   22.23326427 320.95560943   0.        ]
New Q values:  [  4.4152409   22.23326427 233.22583102   0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xg ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063    331.47862415 -2651.70614553 -3344.18956062]
------
Step:4, Action:South
State  208
Old Q Values:  [  -49.0843063    331.47862415 -2651.70614553 -3344.18956062]
New Q values:  [  -49.0843063    297.39369494 -2651.70614553 -3344.18956062]
Reward: -1  Episode Reward:  26
xxxxx
xg..x
x   x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -146.97029777 -8656.02923281 -6173.56321028   551.34081761]
------
Step:5, Action:West
State  288
Old Q Values:  [ -146.97029777 -8656.02923281 -6173.56321028   551.34081761]
New Q values:  [ -146.97029777 -8656.02923281 -6173.56321028   242.18753926]
Reward: 9  Episode Reward:  35
xxxxx
x...x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.61064913    54.17070738]
------
Step:6, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913    54.17070738]
New Q values:  [-1463.93735889  -180.6            1.61064913   247.62063108]
Reward: -1  Episode Reward:  34
xxxxx
x...x
x g x
xa  x
xxxxx
Step:7, Action:North
State  257
Old Q Values:  [-6.00000000e-01 -1.80600000e+02 -5.94780421e+03  0.00000000e+00]
New Q values:  [33527.99013337  -180.6        -5947.80420764     0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x.g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[     0.         111762.76711124  60661.2566939       0.        ]
------
Step:8, Action:South
State  181
Old Q Values:  [   6.98192268   -1.22906555   38.45288886 -180.6       ]
New Q values:  [   6.98192268  225.46072191   38.45288886 -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  755.17449375  -289.59534477 -1299.12168416  -180.6       ]
------
Step:9, Action:North
State  261
Old Q Values:  [  755.17449375  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [  810.84978591  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  31
xxxxx
x...x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[   6.1762476     3.01500451 1697.93329469    0.        ]
------
Step:10, Action:East
State  183
Old Q Values:  [   6.1762476     3.01500451 1697.93329469    0.        ]
New Q values:  [  6.1762476    3.01500451 956.59290852   0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x...x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[ 51.54351872   0.         926.7319688    0.        ]
------
Step:11, Action:East
State  193
Old Q Values:  [  128.1151324     0.        -7410.7447866     0.       ]
New Q values:  [  128.1151324      0.         -8875.67980616     0.        ]
Reward: -10001  Episode Reward:  -9971
xxxxx
x...x
x  gx
x   x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063    297.39369494 -2651.70614553 -3344.18956062]
------
Step:1, Action:South
State  208
Old Q Values:  [  -49.0843063    297.39369494 -2651.70614553 -3344.18956062]
New Q values:  [  -49.0843063    197.01373975 -2651.70614553 -3344.18956062]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -146.97029777 -8656.02923281 -6173.56321028   242.18753926]
------
Step:2, Action:West
State  288
Old Q Values:  [ -146.97029777 -8656.02923281 -6173.56321028   242.18753926]
New Q values:  [ -146.97029777 -8656.02923281 -6173.56321028   131.96187484]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892   98.95619713]
------
Step:3, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913   247.62063108]
New Q values:  [-1.46393736e+03 -1.80600000e+02  1.61064913e+00  1.01628453e+04]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[33527.99013337  -180.6        -5947.80420764     0.        ]
------
Step:4, Action:North
State  261
Old Q Values:  [  810.84978591  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [  616.71778692  -289.59534477 -1299.12168416  -180.6       ]
Reward: 9  Episode Reward:  36
xxxxx
x. .x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476    3.01500451 956.59290852   0.        ]
------
Step:5, Action:East
State  178
Old Q Values:  [0.  0.  5.4 0. ]
New Q values:  [  0.           0.         667.43811653   0.        ]
Reward: 9  Episode Reward:  45
xxxxx
x. .x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.56078987e+00  2.19959372e+03  0.00000000e+00]
------
Step:6, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.56078987e+00  2.19959372e+03  0.00000000e+00]
New Q values:  [-6.00000000e-01  2.56078987e+00  8.80160860e+02  0.00000000e+00]
Reward: -1  Episode Reward:  44
xxxxx
x. .x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6            3.07790274]
------
Step:7, Action:West
State  210
Old Q Values:  [    2.06134572 -1695.56049235  -180.6            3.07790274]
New Q values:  [    2.06134572 -1695.56049235  -180.6          278.65075173]
Reward: -1  Episode Reward:  43
xxxxx
x. .x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[ 51.54351872   0.         926.7319688    0.        ]
------
Step:8, Action:East
State  195
Old Q Values:  [ 51.54351872   0.         926.7319688    0.        ]
New Q values:  [ 51.54351872   0.         453.68801304   0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x. .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6          278.65075173]
------
Step:9, Action:West
State  208
Old Q Values:  [  -49.0843063    197.01373975 -2651.70614553 -3344.18956062]
New Q values:  [  -49.0843063    197.01373975 -2651.70614553 -1299.84128453]
Reward: -1  Episode Reward:  41
xxxxx
x. .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  128.1151324      0.         -8875.67980616     0.        ]
------
Step:10, Action:North
State  193
Old Q Values:  [  128.1151324      0.         -8875.67980616     0.        ]
New Q values:  [  130.04752249     0.         -8875.67980616     0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x.agx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:11, Action:South
State  115
Old Q Values:  [-180.6          60.93501285   16.46406499    0.        ]
New Q values:  [-180.6          62.78826189   16.46406499    0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x. .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  130.04752249     0.         -8875.67980616     0.        ]
------
Step:12, Action:North
State  192
Old Q Values:  [  4.4152409   22.23326427 233.22583102   0.        ]
New Q values:  [  1.53990957  22.23326427 233.22583102   0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x.a.x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6          -0.6           1.24604403    0.        ]
------
Step:13, Action:East
State  112
Old Q Values:  [    0.           214.92635634 13475.90510224 60005.4       ]
New Q values:  [    0.           214.92635634 13703.34638233 60005.4       ]
Reward: 9  Episode Reward:  47
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 9852.09070975  2850.949675   -8652.84       27691.94780477]
------
Step:14, Action:North
State  128
Old Q Values:  [ 9852.09070975  2850.949675   -8652.84       27691.94780477]
New Q values:  [ 6067.82062533  2850.949675   -8652.84       27691.94780477]
Reward: -10301  Episode Reward:  -10254
xxxxx
x. gx
x   x
x   x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  130.04752249     0.         -8875.67980616     0.        ]
------
Step:1, Action:North
State  193
Old Q Values:  [  130.04752249     0.         -8875.67980616     0.        ]
New Q values:  [   98.29598338     0.         -8875.67980616     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6          136.2565813     10.97401097]
------
Step:2, Action:East
State  121
Old Q Values:  [ 0.00000000e+00  0.00000000e+00 -9.79128054e+03  4.88836988e+00]
New Q values:  [ 0.00000000e+00  0.00000000e+00 -9.88652971e+03  4.88836988e+00]
Reward: -10001  Episode Reward:  -9992
xxxxx
x. gx
x. .x
x...x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[ 51.54351872   0.         453.68801304   0.        ]
------
Step:1, Action:East
State  193
Old Q Values:  [   98.29598338     0.         -8875.67980616     0.        ]
New Q values:  [   98.29598338     0.         -9485.76780054     0.        ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. gx
x.. x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  616.71778692  -289.59534477 -1299.12168416  -180.6       ]
------
Step:1, Action:North
State  261
Old Q Values:  [  616.71778692  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [  319.72533134  -289.59534477 -1299.12168416  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268  225.46072191   38.45288886 -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [   6.98192268  225.46072191   38.45288886 -180.6       ]
New Q values:  [   6.98192268  185.50188816   38.45288886 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  319.72533134  -289.59534477 -1299.12168416  -180.6       ]
------
Step:3, Action:North
State  260
Old Q Values:  [-2768.29676552 -6457.4598     -1562.16328996 -6307.02      ]
New Q values:  [-7107.81552749 -6457.4598     -1562.16328996 -6307.02      ]
Reward: -10001  Episode Reward:  -9993
xxxxx
x...x
xg .x
x ..x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          101.94169468  -179.38454759    28.3270367 ]
------
Step:1, Action:South
State  136
Old Q Values:  [-6180.6          101.94169468  -179.38454759    28.3270367 ]
New Q values:  [-6180.6          105.2807998   -179.38454759    28.3270367 ]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063    197.01373975 -2651.70614553 -1299.84128453]
------
Step:2, Action:South
State  208
Old Q Values:  [  -49.0843063    197.01373975 -2651.70614553 -1299.84128453]
New Q values:  [  -49.0843063    123.79405835 -2651.70614553 -1299.84128453]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -146.97029777 -8656.02923281 -6173.56321028   131.96187484]
------
Step:3, Action:West
State  288
Old Q Values:  [ -146.97029777 -8656.02923281 -6173.56321028   131.96187484]
New Q values:  [ -146.97029777 -8656.02923281 -6173.56321028  3107.03833767]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1.46393736e+03 -1.80600000e+02  1.61064913e+00  1.01628453e+04]
------
Step:4, Action:West
State  272
Old Q Values:  [-1.46393736e+03 -1.80600000e+02  1.61064913e+00  1.01628453e+04]
New Q values:  [-1.46393736e+03 -1.80600000e+02  1.61064913e+00  4.09257083e+03]
Reward: 9  Episode Reward:  36
xxxxx
x.  x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.            73.44236722 -6036.76313782]
------
Step:5, Action:East
State  257
Old Q Values:  [33527.99013337  -180.6        -5947.80420764     0.        ]
New Q values:  [33527.99013337  -180.6        -1151.95043491     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x.  x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1.46393736e+03 -1.80600000e+02  1.61064913e+00  4.09257083e+03]
------
Step:6, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892   98.95619713]
New Q values:  [   53.24162309     0.          -205.76435892 10097.37951886]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[33527.99013337  -180.6        -1151.95043491     0.        ]
------
Step:7, Action:North
State  257
Old Q Values:  [33527.99013337  -180.6        -1151.95043491     0.        ]
New Q values:  [46945.42618672  -180.6        -1151.95043491     0.        ]
Reward: 9  Episode Reward:  43
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[     0.         111762.76711124  60661.2566939       0.        ]
------
Step:8, Action:South
State  177
Old Q Values:  [     0.         111762.76711124  60661.2566939       0.        ]
New Q values:  [    0.         58788.13470051 60661.2566939      0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[46945.42618672  -180.6        -1151.95043491     0.        ]
------
Step:9, Action:North
State  257
Old Q Values:  [46945.42618672  -180.6        -1151.95043491     0.        ]
New Q values:  [36975.94748286  -180.6        -1151.95043491     0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         58788.13470051 60661.2566939      0.        ]
------
Step:10, Action:South
State  177
Old Q Values:  [    0.         58788.13470051 60661.2566939      0.        ]
New Q values:  [    0.         34607.43812506 60661.2566939      0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x.g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[36975.94748286  -180.6        -1151.95043491     0.        ]
------
Step:11, Action:North
State  260
Old Q Values:  [-7107.81552749 -6457.4598     -1562.16328996 -6307.02      ]
New Q values:  [-2843.62303227 -6457.4598     -1562.16328996 -6307.02      ]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3.43106190e+03 -8.00903410e+02  3.43929072e-01  0.00000000e+00]
------
Step:12, Action:East
State  176
Old Q Values:  [    0.             0.         62208.25273208     0.        ]
New Q values:  [    0.             0.         24958.66884214     0.        ]
Reward: 9  Episode Reward:  48
xxxxx
x.  x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[  1.53990957  22.23326427 233.22583102   0.        ]
------
Step:13, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.56078987e+00  8.80160860e+02  0.00000000e+00]
New Q values:  [ -0.6          2.56078987 435.05956933   0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6          278.65075173]
------
Step:14, Action:West
State  210
Old Q Values:  [    2.06134572 -1695.56049235  -180.6          278.65075173]
New Q values:  [    2.06134572 -1695.56049235  -180.6          241.37817149]
Reward: -1  Episode Reward:  46
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ -0.6          2.56078987 435.05956933   0.        ]
------
Step:15, Action:East
State  195
Old Q Values:  [ 51.54351872   0.         453.68801304   0.        ]
New Q values:  [ 51.54351872   0.         253.28865666   0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x.  x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6          241.37817149]
------
Step:16, Action:West
State  208
Old Q Values:  [  -49.0843063    123.79405835 -2651.70614553 -1299.84128453]
New Q values:  [  -49.0843063    123.79405835 -2651.70614553  -491.0477188 ]
Reward: -1  Episode Reward:  44
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   98.29598338     0.         -9485.76780054     0.        ]
------
Step:17, Action:North
State  195
Old Q Values:  [ 51.54351872   0.         253.28865666   0.        ]
New Q values:  [ 38.85388605   0.         253.28865666   0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          62.78826189   16.46406499    0.        ]
------
Step:18, Action:South
State  115
Old Q Values:  [-180.6          62.78826189   16.46406499    0.        ]
New Q values:  [-180.6          54.00409977   16.46406499    0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   98.29598338     0.         -9485.76780054     0.        ]
------
Step:19, Action:North
State  193
Old Q Values:  [   98.29598338     0.         -9485.76780054     0.        ]
New Q values:  [  118.11986289     0.         -9485.76780054     0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x.agx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:20, Action:South
State  115
Old Q Values:  [-180.6          54.00409977   16.46406499    0.        ]
New Q values:  [-180.6          56.43759877   16.46406499    0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  118.11986289     0.         -9485.76780054     0.        ]
------
Step:21, Action:North
State  193
Old Q Values:  [  118.11986289     0.         -9485.76780054     0.        ]
New Q values:  [  126.04941469     0.         -9485.76780054     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x.agx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:22, Action:South
State  115
Old Q Values:  [-180.6          56.43759877   16.46406499    0.        ]
New Q values:  [-180.6          59.78986392   16.46406499    0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  126.04941469     0.         -9485.76780054     0.        ]
------
Step:23, Action:North
State  192
Old Q Values:  [  1.53990957  22.23326427 233.22583102   0.        ]
New Q values:  [  0.38977704  22.23326427 233.22583102   0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-180.6          -0.6           1.24604403    0.        ]
------
Step:24, Action:East
State  115
Old Q Values:  [-180.6          59.78986392   16.46406499    0.        ]
New Q values:  [-180.6          59.78986392    6.47656656    0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    1.63646856 -180.00807518    1.41054896]
------
Step:25, Action:West
State  128
Old Q Values:  [ 6067.82062533  2850.949675   -8652.84       27691.94780477]
New Q values:  [ 6067.82062533  2850.949675   -8652.84       11155.58059144]
Reward: -1  Episode Reward:  35
xxxxx
x.agx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:26, Action:South
State  112
Old Q Values:  [    0.           214.92635634 13703.34638233 60005.4       ]
New Q values:  [    0.           155.33829184 13703.34638233 60005.4       ]
Reward: -1  Episode Reward:  34
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[  0.38977704  22.23326427 233.22583102   0.        ]
------
Step:27, Action:East
State  192
Old Q Values:  [  0.38977704  22.23326427 233.22583102   0.        ]
New Q values:  [  0.38977704  22.23326427 129.82854991   0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063    123.79405835 -2651.70614553  -491.0477188 ]
------
Step:28, Action:South
State  208
Old Q Values:  [  -49.0843063    123.79405835 -2651.70614553  -491.0477188 ]
New Q values:  [  -49.0843063    981.02912464 -2651.70614553  -491.0477188 ]
Reward: -1  Episode Reward:  32
xxxxx
x.  x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -146.97029777 -8656.02923281 -6173.56321028  3107.03833767]
------
Step:29, Action:West
State  288
Old Q Values:  [ -146.97029777 -8656.02923281 -6173.56321028  3107.03833767]
New Q values:  [ -146.97029777 -8656.02923281 -6173.56321028  4271.42919073]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   53.24162309     0.          -205.76435892 10097.37951886]
------
Step:30, Action:West
State  273
Old Q Values:  [   53.24162309     0.          -205.76435892 10097.37951886]
New Q values:  [   53.24162309     0.          -205.76435892 15131.1360524 ]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[36975.94748286  -180.6        -1151.95043491     0.        ]
------
Step:31, Action:North
State  257
Old Q Values:  [36975.94748286  -180.6        -1151.95043491     0.        ]
New Q values:  [32988.15600131  -180.6        -1151.95043491     0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         34607.43812506 60661.2566939      0.        ]
------
Step:32, Action:South
State  177
Old Q Values:  [    0.         34607.43812506 60661.2566939      0.        ]
New Q values:  [    0.         23738.82205042 60661.2566939      0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x.  x
x  gx
xa  x
xxxxx
Step:33, Action:West
State  257
Old Q Values:  [32988.15600131  -180.6        -1151.95043491     0.        ]
New Q values:  [32988.15600131  -180.6        -1151.95043491  9715.84680039]
Reward: -301  Episode Reward:  -273
xxxxx
x. gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[32988.15600131  -180.6        -1151.95043491  9715.84680039]
------
Step:34, Action:North
State  257
Old Q Values:  [32988.15600131  -180.6        -1151.95043491  9715.84680039]
New Q values:  [31393.03940869  -180.6        -1151.95043491  9715.84680039]
Reward: -1  Episode Reward:  -274
xxxxx
x.g x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         23738.82205042 60661.2566939      0.        ]
------
Step:35, Action:East
State  177
Old Q Values:  [    0.         23738.82205042 60661.2566939      0.        ]
New Q values:  [    0.         23738.82205042 24301.71750196     0.        ]
Reward: -1  Episode Reward:  -275
xxxxx
x. gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[  126.04941469     0.         -9485.76780054     0.        ]
------
Step:36, Action:North
State  193
Old Q Values:  [  126.04941469     0.         -9485.76780054     0.        ]
New Q values:  [   67.75672505     0.         -9485.76780054     0.        ]
Reward: -1  Episode Reward:  -276
xxxxx
x.a x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          59.78986392    6.47656656    0.        ]
------
Step:37, Action:South
State  114
Old Q Values:  [-180.6          -0.6           1.24604403    0.        ]
New Q values:  [-1.80600000e+02 -5.96189144e+03  1.24604403e+00  0.00000000e+00]
Reward: -10001  Episode Reward:  -10277
xxxxx
x.  x
x g x
x   x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -146.97029777 -8656.02923281 -6173.56321028  4271.42919073]
------
Step:1, Action:North
State  288
Old Q Values:  [ -146.97029777 -8656.02923281 -6173.56321028  4271.42919073]
New Q values:  [   19.02533234 -8656.02923281 -6173.56321028  4271.42919073]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6          241.37817149]
------
Step:2, Action:West
State  210
Old Q Values:  [    2.06134572 -1695.56049235  -180.6          241.37817149]
New Q values:  [    2.06134572 -1695.56049235  -180.6          232.4691394 ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ -0.6          2.56078987 435.05956933   0.        ]
------
Step:3, Action:East
State  192
Old Q Values:  [  0.38977704  22.23326427 129.82854991   0.        ]
New Q values:  [  0.38977704  22.23326427 345.64015736   0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063    981.02912464 -2651.70614553  -491.0477188 ]
------
Step:4, Action:South
State  208
Old Q Values:  [  -49.0843063    981.02912464 -2651.70614553  -491.0477188 ]
New Q values:  [  -49.0843063   1673.24040708 -2651.70614553  -491.0477188 ]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   19.02533234 -8656.02923281 -6173.56321028  4271.42919073]
------
Step:5, Action:West
State  288
Old Q Values:  [   19.02533234 -8656.02923281 -6173.56321028  4271.42919073]
New Q values:  [   19.02533234 -8656.02923281 -6173.56321028  6247.31249201]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   53.24162309     0.          -205.76435892 15131.1360524 ]
------
Step:6, Action:West
State  273
Old Q Values:  [   53.24162309     0.          -205.76435892 15131.1360524 ]
New Q values:  [   53.24162309     0.          -205.76435892 15475.76624357]
Reward: 9  Episode Reward:  24
xxxxx
x...x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[31393.03940869  -180.6        -1151.95043491  9715.84680039]
------
Step:7, Action:North
State  257
Old Q Values:  [31393.03940869  -180.6        -1151.95043491  9715.84680039]
New Q values:  [19853.13101407  -180.6        -1151.95043491  9715.84680039]
Reward: 9  Episode Reward:  33
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         23738.82205042 24301.71750196     0.        ]
------
Step:8, Action:East
State  177
Old Q Values:  [    0.         23738.82205042 24301.71750196     0.        ]
New Q values:  [    0.         23738.82205042  9823.77904799     0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x.g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[  0.38977704  22.23326427 345.64015736   0.        ]
------
Step:9, Action:East
State  192
Old Q Values:  [  0.38977704  22.23326427 345.64015736   0.        ]
New Q values:  [3.89777037e-01 2.22332643e+01 6.39628185e+02 0.00000000e+00]
Reward: -1  Episode Reward:  31
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063   1673.24040708 -2651.70614553  -491.0477188 ]
------
Step:10, Action:South
State  208
Old Q Values:  [  -49.0843063   1673.24040708 -2651.70614553  -491.0477188 ]
New Q values:  [  -49.0843063   2542.88991043 -2651.70614553  -491.0477188 ]
Reward: -1  Episode Reward:  30
xxxxx
x...x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   19.02533234 -8656.02923281 -6173.56321028  6247.31249201]
------
Step:11, Action:West
State  288
Old Q Values:  [   19.02533234 -8656.02923281 -6173.56321028  6247.31249201]
New Q values:  [   19.02533234 -8656.02923281 -6173.56321028  3726.09624495]
Reward: -1  Episode Reward:  29
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1.46393736e+03 -1.80600000e+02  1.61064913e+00  4.09257083e+03]
------
Step:12, Action:West
State  272
Old Q Values:  [-1.46393736e+03 -1.80600000e+02  1.61064913e+00  4.09257083e+03]
New Q values:  [-1463.93735889  -180.6            1.61064913  1167.77934387]
Reward: -1  Episode Reward:  28
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2843.62303227 -6457.4598     -1562.16328996 -6307.02      ]
------
Step:13, Action:East
State  260
Old Q Values:  [-2843.62303227 -6457.4598     -1562.16328996 -6307.02      ]
New Q values:  [-2843.62303227 -6457.4598      -275.13151282 -6307.02      ]
Reward: -1  Episode Reward:  27
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6            1.61064913  1167.77934387]
------
Step:14, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6            1.61064913  1167.77934387]
New Q values:  [-1.46393736e+03 -1.80600000e+02  1.61064913e+00  6.42245104e+03]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[19853.13101407  -180.6        -1151.95043491  9715.84680039]
------
Step:15, Action:North
State  257
Old Q Values:  [19853.13101407  -180.6        -1151.95043491  9715.84680039]
New Q values:  [15062.29902075  -180.6        -1151.95043491  9715.84680039]
Reward: -1  Episode Reward:  25
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         23738.82205042  9823.77904799     0.        ]
------
Step:16, Action:South
State  181
Old Q Values:  [   6.98192268  185.50188816   38.45288886 -180.6       ]
New Q values:  [   6.98192268  169.51835467   38.45288886 -180.6       ]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  319.72533134  -289.59534477 -1299.12168416  -180.6       ]
------
Step:17, Action:North
State  261
Old Q Values:  [  319.72533134  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [  414.26800509  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  23
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476    3.01500451 956.59290852   0.        ]
------
Step:18, Action:East
State  181
Old Q Values:  [   6.98192268  169.51835467   38.45288886 -180.6       ]
New Q values:  [   6.98192268  169.51835467   35.10817306 -180.6       ]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[   67.75672505     0.         -9485.76780054     0.        ]
------
Step:19, Action:North
State  193
Old Q Values:  [   67.75672505     0.         -9485.76780054     0.        ]
New Q values:  [  111.90415955     0.         -9485.76780054     0.        ]
Reward: 9  Episode Reward:  31
xxxxx
x.agx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:20, Action:South
State  112
Old Q Values:  [    0.           155.33829184 13703.34638233 60005.4       ]
New Q values:  [    0.           253.42377226 13703.34638233 60005.4       ]
Reward: -1  Episode Reward:  30
xxxxx
x.g.x
x a x
x   x
xxxxx
Step:21, Action:North
State  193
Old Q Values:  [  111.90415955     0.         -9485.76780054     0.        ]
New Q values:  [-5922.26708831     0.         -9485.76780054     0.        ]
Reward: -10001  Episode Reward:  -9971
xxxxx
x. gx
x a x
x   x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[ 0.          3.43614185 98.39771311  0.        ]
------
Step:1, Action:East
State  107
Old Q Values:  [-2.52351696e+02  1.40592937e+01 -2.58259038e-02 -2.52781922e+02]
New Q values:  [-252.35169558   14.05929374   46.26664403 -252.78192178]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -284.31459256 -6000.6          136.2565813     10.97401097]
------
Step:2, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.57487246e+01  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.44139694e+01  4.04786473e+00]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -731.60350289 -180.6          35.71493192]
------
Step:3, Action:West
State  136
Old Q Values:  [-6180.6          105.2807998   -179.38454759    28.3270367 ]
New Q values:  [-6180.6          105.2807998   -179.38454759 -5960.73937221]
Reward: -10001  Episode Reward:  -9983
xxxxx
x g x
x...x
x.. x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          105.2807998   -179.38454759 -5960.73937221]
------
Step:1, Action:South
State  136
Old Q Values:  [-6180.6          105.2807998   -179.38454759 -5960.73937221]
New Q values:  [-6180.6          119.62723386  -179.38454759 -5960.73937221]
Reward: 9  Episode Reward:  9
xxxxx
x g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.40383046e+02  1.59912420e+02 -6.17035694e+03  6.08663514e+00]
------
Step:2, Action:North
State  216
Old Q Values:  [ 2.40383046e+02  1.59912420e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 1.06267698e+02  1.59912420e+02 -6.17035694e+03  6.08663514e+00]
Reward: -1  Episode Reward:  8
xxxxx
x .ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -731.60350289 -180.6          35.71493192]
------
Step:3, Action:West
State  136
Old Q Values:  [-6180.6          119.62723386  -179.38454759 -5960.73937221]
New Q values:  [-6180.6          119.62723386  -179.38454759 -8350.36593577]
Reward: -9991  Episode Reward:  -9983
xxxxx
x g x
x.. x
x...x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   53.24162309     0.          -205.76435892 15475.76624357]
------
Step:1, Action:West
State  273
Old Q Values:  [   53.24162309     0.          -205.76435892 15475.76624357]
New Q values:  [  53.24162309    0.         -205.76435892 6319.98689895]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  414.26800509  -289.59534477 -1299.12168416  -180.6       ]
------
Step:2, Action:North
State  261
Old Q Values:  [  414.26800509  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [  458.08507459  -289.59534477 -1299.12168416  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476    3.01500451 956.59290852   0.        ]
------
Step:3, Action:East
State  181
Old Q Values:  [   6.98192268  169.51835467   35.10817306 -180.6       ]
New Q values:  [   6.98192268  169.51835467   19.44326922 -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831     0.         -9485.76780054     0.        ]
------
Step:4, Action:South
State  195
Old Q Values:  [ 38.85388605   0.         253.28865666   0.        ]
New Q values:  [  38.85388605 1895.39606969  253.28865666    0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892 6319.98689895]
------
Step:5, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892 6319.98689895]
New Q values:  [  53.24162309    0.         -205.76435892 2664.82028196]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  458.08507459  -289.59534477 -1299.12168416  -180.6       ]
------
Step:6, Action:North
State  261
Old Q Values:  [  458.08507459  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [  233.48953624  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  24
xxxxx
x..gx
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268  169.51835467   19.44326922 -180.6       ]
------
Step:7, Action:South
State  181
Old Q Values:  [   6.98192268  169.51835467   19.44326922 -180.6       ]
New Q values:  [   6.98192268  137.25420274   19.44326922 -180.6       ]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  233.48953624  -289.59534477 -1299.12168416  -180.6       ]
------
Step:8, Action:North
State  261
Old Q Values:  [  233.48953624  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [  133.97207532  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  22
xxxxx
x..gx
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268  137.25420274   19.44326922 -180.6       ]
------
Step:9, Action:South
State  181
Old Q Values:  [   6.98192268  137.25420274   19.44326922 -180.6       ]
New Q values:  [   6.98192268   94.49330369   19.44326922 -180.6       ]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  133.97207532  -289.59534477 -1299.12168416  -180.6       ]
------
Step:10, Action:North
State  261
Old Q Values:  [  133.97207532  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [   81.33682123  -289.59534477 -1299.12168416  -180.6       ]
Reward: -1  Episode Reward:  20
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268   94.49330369   19.44326922 -180.6       ]
------
Step:11, Action:South
State  180
Old Q Values:  [-3.43106190e+03 -8.00903410e+02  3.43929072e-01  0.00000000e+00]
New Q values:  [-3.43106190e+03 -4.03500818e+02  3.43929072e-01  0.00000000e+00]
Reward: -1  Episode Reward:  19
xxxxx
x...x
xg .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2843.62303227 -6457.4598      -275.13151282 -6307.02      ]
------
Step:12, Action:East
State  261
Old Q Values:  [   81.33682123  -289.59534477 -1299.12168416  -180.6       ]
New Q values:  [  81.33682123 -289.59534477 1406.48663887 -180.6       ]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1.46393736e+03 -1.80600000e+02  1.61064913e+00  6.42245104e+03]
------
Step:13, Action:West
State  272
Old Q Values:  [-1.46393736e+03 -1.80600000e+02  1.61064913e+00  6.42245104e+03]
New Q values:  [-1.46393736e+03 -1.80600000e+02  1.61064913e+00  2.99032641e+03]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  81.33682123 -289.59534477 1406.48663887 -180.6       ]
------
Step:14, Action:North
State  260
Old Q Values:  [-2843.62303227 -6457.4598      -275.13151282 -6307.02      ]
New Q values:  [ -737.92033146 -6457.4598      -275.13151282 -6307.02      ]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xa .x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[   0.            0.         1333.76293817    0.        ]
------
Step:15, Action:East
State  180
Old Q Values:  [-3.43106190e+03 -4.03500818e+02  3.43929072e-01  0.00000000e+00]
New Q values:  [-3431.06190038  -403.50081769   191.42602715     0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x...x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.22332643e+01 6.39628185e+02 0.00000000e+00]
------
Step:16, Action:East
State  194
Old Q Values:  [ -0.6          2.56078987 435.05956933   0.        ]
New Q values:  [ -0.6          2.56078987 249.16456955   0.        ]
Reward: 9  Episode Reward:  24
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6          232.4691394 ]
------
Step:17, Action:West
State  210
Old Q Values:  [    2.06134572 -1695.56049235  -180.6          232.4691394 ]
New Q values:  [    2.06134572 -1695.56049235  -180.6          167.13702662]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ -0.6          2.56078987 249.16456955   0.        ]
------
Step:18, Action:East
State  195
Old Q Values:  [  38.85388605 1895.39606969  253.28865666    0.        ]
New Q values:  [  38.85388605 1895.39606969  150.85657065    0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6          167.13702662]
------
Step:19, Action:West
State  210
Old Q Values:  [    2.06134572 -1695.56049235  -180.6          167.13702662]
New Q values:  [    2.06134572 -1695.56049235  -180.6          141.00418152]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ -0.6          2.56078987 249.16456955   0.        ]
------
Step:20, Action:East
State  194
Old Q Values:  [ -0.6          2.56078987 249.16456955   0.        ]
New Q values:  [ -0.6          2.56078987 141.36708228   0.        ]
Reward: -1  Episode Reward:  20
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6          141.00418152]
------
Step:21, Action:West
State  210
Old Q Values:  [    2.06134572 -1695.56049235  -180.6          141.00418152]
New Q values:  [    2.06134572 -1695.56049235  -180.6           98.21179729]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ -0.6          2.56078987 141.36708228   0.        ]
------
Step:22, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.22332643e+01 6.39628185e+02 0.00000000e+00]
New Q values:  [3.89777037e-01 2.22332643e+01 1.01811825e+03 0.00000000e+00]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063   2542.88991043 -2651.70614553  -491.0477188 ]
------
Step:23, Action:South
State  208
Old Q Values:  [  -49.0843063   2542.88991043 -2651.70614553  -491.0477188 ]
New Q values:  [  -49.0843063   2134.38483766 -2651.70614553  -491.0477188 ]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   19.02533234 -8656.02923281 -6173.56321028  3726.09624495]
------
Step:24, Action:West
State  288
Old Q Values:  [   19.02533234 -8656.02923281 -6173.56321028  3726.09624495]
New Q values:  [   19.02533234 -8656.02923281 -6173.56321028  2289.28458257]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892 2664.82028196]
------
Step:25, Action:West
State  272
Old Q Values:  [-1.46393736e+03 -1.80600000e+02  1.61064913e+00  2.99032641e+03]
New Q values:  [-1.46393736e+03 -1.80600000e+02  1.61064913e+00  1.61747656e+03]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  81.33682123 -289.59534477 1406.48663887 -180.6       ]
------
Step:26, Action:North
State  261
Old Q Values:  [  81.33682123 -289.59534477 1406.48663887 -180.6       ]
New Q values:  [  60.2827196  -289.59534477 1406.48663887 -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268   94.49330369   19.44326922 -180.6       ]
------
Step:27, Action:South
State  180
Old Q Values:  [-3431.06190038  -403.50081769   191.42602715     0.        ]
New Q values:  [-3431.06190038  -244.53978092   191.42602715     0.        ]
Reward: -1  Episode Reward:  13
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -737.92033146 -6457.4598      -275.13151282 -6307.02      ]
------
Step:28, Action:East
State  260
Old Q Values:  [ -737.92033146 -6457.4598      -275.13151282 -6307.02      ]
New Q values:  [ -737.92033146 -6457.4598       374.59036137 -6307.02      ]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1.46393736e+03 -1.80600000e+02  1.61064913e+00  1.61747656e+03]
------
Step:29, Action:East
State  272
Old Q Values:  [-1.46393736e+03 -1.80600000e+02  1.61064913e+00  1.61747656e+03]
New Q values:  [-1463.93735889  -180.6          686.82963442  1617.47655501]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   19.02533234 -8656.02923281 -6173.56321028  2289.28458257]
------
Step:30, Action:North
State  288
Old Q Values:  [   19.02533234 -8656.02923281 -6173.56321028  2289.28458257]
New Q values:  [   36.47367212 -8656.02923281 -6173.56321028  2289.28458257]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6           98.21179729]
------
Step:31, Action:West
State  208
Old Q Values:  [  -49.0843063   2134.38483766 -2651.70614553  -491.0477188 ]
New Q values:  [  -49.0843063   2134.38483766 -2651.70614553   108.41638663]
Reward: -1  Episode Reward:  9
xxxxx
x...x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.22332643e+01 1.01811825e+03 0.00000000e+00]
------
Step:32, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.22332643e+01 1.01811825e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 2.22332643e+01 1.04696275e+03 0.00000000e+00]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063   2134.38483766 -2651.70614553   108.41638663]
------
Step:33, Action:South
State  208
Old Q Values:  [  -49.0843063   2134.38483766 -2651.70614553   108.41638663]
New Q values:  [  -49.0843063   1539.93930983 -2651.70614553   108.41638663]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   36.47367212 -8656.02923281 -6173.56321028  2289.28458257]
------
Step:34, Action:West
State  288
Old Q Values:  [   36.47367212 -8656.02923281 -6173.56321028  2289.28458257]
New Q values:  [   36.47367212 -8656.02923281 -6173.56321028  1400.35679953]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6          686.82963442  1617.47655501]
------
Step:35, Action:East
State  272
Old Q Values:  [-1463.93735889  -180.6          686.82963442  1617.47655501]
New Q values:  [-1463.93735889  -180.6          694.23889363  1617.47655501]
Reward: -1  Episode Reward:  5
xxxxx
x...x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   36.47367212 -8656.02923281 -6173.56321028  1400.35679953]
------
Step:36, Action:West
State  288
Old Q Values:  [   36.47367212 -8656.02923281 -6173.56321028  1400.35679953]
New Q values:  [   36.47367212 -8656.02923281 -6173.56321028  1044.78568631]
Reward: -1  Episode Reward:  4
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6          694.23889363  1617.47655501]
------
Step:37, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6          694.23889363  1617.47655501]
New Q values:  [-1463.93735889  -180.6          694.23889363  1068.33661366]
Reward: -1  Episode Reward:  3
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  60.2827196  -289.59534477 1406.48663887 -180.6       ]
------
Step:38, Action:North
State  261
Old Q Values:  [  60.2827196  -289.59534477 1406.48663887 -180.6       ]
New Q values:  [ 310.4909604  -289.59534477 1406.48663887 -180.6       ]
Reward: -1  Episode Reward:  2
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476    3.01500451 956.59290852   0.        ]
------
Step:39, Action:East
State  181
Old Q Values:  [   6.98192268   94.49330369   19.44326922 -180.6       ]
New Q values:  [   6.98192268   94.49330369    7.17730769 -180.6       ]
Reward: -1  Episode Reward:  1
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831     0.         -9485.76780054     0.        ]
------
Step:40, Action:South
State  195
Old Q Values:  [  38.85388605 1895.39606969  150.85657065    0.        ]
New Q values:  [  38.85388605 1557.00451246  150.85657065    0.        ]
Reward: -1  Episode Reward:  0
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892 2664.82028196]
------
Step:41, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892 2664.82028196]
New Q values:  [  53.24162309    0.         -205.76435892 1487.27410444]
Reward: -1  Episode Reward:  -1
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 310.4909604  -289.59534477 1406.48663887 -180.6       ]
------
Step:42, Action:East
State  257
Old Q Values:  [15062.29902075  -180.6        -1151.95043491  9715.84680039]
New Q values:  [15062.29902075  -180.6          -15.19794263  9715.84680039]
Reward: -1  Episode Reward:  -2
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892 1487.27410444]
------
Step:43, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892 1487.27410444]
New Q values:  [  53.24162309    0.         -205.76435892 1016.25563344]
Reward: -1  Episode Reward:  -3
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 310.4909604  -289.59534477 1406.48663887 -180.6       ]
------
Step:44, Action:East
State  261
Old Q Values:  [ 310.4909604  -289.59534477 1406.48663887 -180.6       ]
New Q values:  [ 310.4909604  -289.59534477  882.49563965 -180.6       ]
Reward: -1  Episode Reward:  -4
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6          694.23889363  1068.33661366]
------
Step:45, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892 1016.25563344]
New Q values:  [  53.24162309    0.         -205.76435892  670.65094527]
Reward: -1  Episode Reward:  -5
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 310.4909604  -289.59534477  882.49563965 -180.6       ]
------
Step:46, Action:East
State  261
Old Q Values:  [ 310.4909604  -289.59534477  882.49563965 -180.6       ]
New Q values:  [ 310.4909604  -289.59534477  553.59353944 -180.6       ]
Reward: -1  Episode Reward:  -6
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892  670.65094527]
------
Step:47, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6          694.23889363  1068.33661366]
New Q values:  [-1463.93735889  -180.6          694.23889363   592.8127073 ]
Reward: -1  Episode Reward:  -7
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 310.4909604  -289.59534477  553.59353944 -180.6       ]
------
Step:48, Action:North
State  261
Old Q Values:  [ 310.4909604  -289.59534477  553.59353944 -180.6       ]
New Q values:  [ 410.57425671 -289.59534477  553.59353944 -180.6       ]
Reward: -1  Episode Reward:  -8
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476    3.01500451 956.59290852   0.        ]
------
Step:49, Action:East
State  181
Old Q Values:  [   6.98192268   94.49330369    7.17730769 -180.6       ]
New Q values:  [   6.98192268   94.49330369    2.27092308 -180.6       ]
Reward: -1  Episode Reward:  -9
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831     0.         -9485.76780054     0.        ]
------
Step:50, Action:South
State  193
Old Q Values:  [-5922.26708831     0.         -9485.76780054     0.        ]
New Q values:  [-5922.26708831   200.59528358 -9485.76780054     0.        ]
Reward: -1  Episode Reward:  -10
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892  670.65094527]
------
Step:51, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6          694.23889363   592.8127073 ]
New Q values:  [-1463.93735889  -180.6          694.23889363  4755.21478914]
Reward: -1  Episode Reward:  -11
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[15062.29902075  -180.6          -15.19794263  9715.84680039]
------
Step:52, Action:North
State  257
Old Q Values:  [15062.29902075  -180.6          -15.19794263  9715.84680039]
New Q values:  [13145.96622343  -180.6          -15.19794263  9715.84680039]
Reward: -1  Episode Reward:  -12
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         23738.82205042  9823.77904799     0.        ]
------
Step:53, Action:South
State  181
Old Q Values:  [   6.98192268   94.49330369    2.27092308 -180.6       ]
New Q values:  [   6.98192268  203.27538331    2.27092308 -180.6       ]
Reward: -1  Episode Reward:  -13
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 410.57425671 -289.59534477  553.59353944 -180.6       ]
------
Step:54, Action:East
State  261
Old Q Values:  [ 410.57425671 -289.59534477  553.59353944 -180.6       ]
New Q values:  [ 410.57425671 -289.59534477 1647.40185252 -180.6       ]
Reward: -1  Episode Reward:  -14
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6          694.23889363  4755.21478914]
------
Step:55, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6          694.23889363  4755.21478914]
New Q values:  [-1463.93735889  -180.6          694.23889363  2013.86302407]
Reward: -1  Episode Reward:  -15
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -737.92033146 -6457.4598       374.59036137 -6307.02      ]
------
Step:56, Action:East
State  260
Old Q Values:  [ -737.92033146 -6457.4598       374.59036137 -6307.02      ]
New Q values:  [ -737.92033146 -6457.4598       753.39505177 -6307.02      ]
Reward: -1  Episode Reward:  -16
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6          694.23889363  2013.86302407]
------
Step:57, Action:East
State  272
Old Q Values:  [-1463.93735889  -180.6          694.23889363  2013.86302407]
New Q values:  [-1463.93735889  -180.6          590.53126335  2013.86302407]
Reward: -1  Episode Reward:  -17
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   36.47367212 -8656.02923281 -6173.56321028  1044.78568631]
------
Step:58, Action:North
State  288
Old Q Values:  [   36.47367212 -8656.02923281 -6173.56321028  1044.78568631]
New Q values:  [   43.45300804 -8656.02923281 -6173.56321028  1044.78568631]
Reward: -1  Episode Reward:  -18
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6           98.21179729]
------
Step:59, Action:West
State  210
Old Q Values:  [    2.06134572 -1695.56049235  -180.6           98.21179729]
New Q values:  [    2.06134572 -1695.56049235  -180.6           81.0948436 ]
Reward: -1  Episode Reward:  -19
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ -0.6          2.56078987 141.36708228   0.        ]
------
Step:60, Action:East
State  194
Old Q Values:  [ -0.6          2.56078987 141.36708228   0.        ]
New Q values:  [-0.6         2.56078987 80.27528599  0.        ]
Reward: -1  Episode Reward:  -20
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6           81.0948436 ]
------
Step:61, Action:West
State  210
Old Q Values:  [    2.06134572 -1695.56049235  -180.6           81.0948436 ]
New Q values:  [    2.06134572 -1695.56049235  -180.6           55.92052324]
Reward: -1  Episode Reward:  -21
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6         2.56078987 80.27528599  0.        ]
------
Step:62, Action:East
State  194
Old Q Values:  [-0.6         2.56078987 80.27528599  0.        ]
New Q values:  [-0.6         2.56078987 48.28627137  0.        ]
Reward: -1  Episode Reward:  -22
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6           55.92052324]
------
Step:63, Action:West
State  208
Old Q Values:  [  -49.0843063   1539.93930983 -2651.70614553   108.41638663]
New Q values:  [  -49.0843063   1539.93930983 -2651.70614553   356.8553797 ]
Reward: -1  Episode Reward:  -23
xxxxx
x...x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.22332643e+01 1.04696275e+03 0.00000000e+00]
------
Step:64, Action:East
State  194
Old Q Values:  [-0.6         2.56078987 48.28627137  0.        ]
New Q values:  [-0.6         2.56078987 35.49066552  0.        ]
Reward: -1  Episode Reward:  -24
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6           55.92052324]
------
Step:65, Action:West
State  210
Old Q Values:  [    2.06134572 -1695.56049235  -180.6           55.92052324]
New Q values:  [    2.06134572 -1695.56049235  -180.6           32.41540895]
Reward: -1  Episode Reward:  -25
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6         2.56078987 35.49066552  0.        ]
------
Step:66, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.22332643e+01 1.04696275e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 2.22332643e+01 8.80166893e+02 0.00000000e+00]
Reward: -1  Episode Reward:  -26
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063   1539.93930983 -2651.70614553   356.8553797 ]
------
Step:67, Action:South
State  208
Old Q Values:  [  -49.0843063   1539.93930983 -2651.70614553   356.8553797 ]
New Q values:  [  -49.0843063    928.81142983 -2651.70614553   356.8553797 ]
Reward: -1  Episode Reward:  -27
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   43.45300804 -8656.02923281 -6173.56321028  1044.78568631]
------
Step:68, Action:West
State  288
Old Q Values:  [   43.45300804 -8656.02923281 -6173.56321028  1044.78568631]
New Q values:  [   43.45300804 -8656.02923281 -6173.56321028   618.50955811]
Reward: -1  Episode Reward:  -28
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892  670.65094527]
------
Step:69, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892  670.65094527]
New Q values:  [  53.24162309    0.         -205.76435892  761.88093386]
Reward: -1  Episode Reward:  -29
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 410.57425671 -289.59534477 1647.40185252 -180.6       ]
------
Step:70, Action:East
State  261
Old Q Values:  [ 410.57425671 -289.59534477 1647.40185252 -180.6       ]
New Q values:  [ 410.57425671 -289.59534477  886.92502117 -180.6       ]
Reward: -1  Episode Reward:  -30
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892  761.88093386]
------
Step:71, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892  761.88093386]
New Q values:  [  53.24162309    0.         -205.76435892  570.2298799 ]
Reward: -1  Episode Reward:  -31
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 410.57425671 -289.59534477  886.92502117 -180.6       ]
------
Step:72, Action:East
State  257
Old Q Values:  [13145.96622343  -180.6          -15.19794263  9715.84680039]
New Q values:  [13145.96622343  -180.6          164.38978692  9715.84680039]
Reward: -1  Episode Reward:  -32
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.         -205.76435892  570.2298799 ]
------
Step:73, Action:West
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892  570.2298799 ]
New Q values:  [  53.24162309    0.         -205.76435892  493.56945831]
Reward: -1  Episode Reward:  -33
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 410.57425671 -289.59534477  886.92502117 -180.6       ]
------
Step:74, Action:East
State  261
Old Q Values:  [ 410.57425671 -289.59534477  886.92502117 -180.6       ]
New Q values:  [ 410.57425671 -289.59534477  958.32891569 -180.6       ]
Reward: -1  Episode Reward:  -34
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6          590.53126335  2013.86302407]
------
Step:75, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6          590.53126335  2013.86302407]
New Q values:  [-1463.93735889  -180.6          590.53126335  1092.44388433]
Reward: -1  Episode Reward:  -35
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 410.57425671 -289.59534477  958.32891569 -180.6       ]
------
Step:76, Action:North
State  261
Old Q Values:  [ 410.57425671 -289.59534477  958.32891569 -180.6       ]
New Q values:  [ 224.61231768 -289.59534477  958.32891569 -180.6       ]
Reward: -1  Episode Reward:  -36
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268  203.27538331    2.27092308 -180.6       ]
------
Step:77, Action:South
State  183
Old Q Values:  [  6.1762476    3.01500451 956.59290852   0.        ]
New Q values:  [  6.1762476  288.10467651 956.59290852   0.        ]
Reward: -1  Episode Reward:  -37
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 224.61231768 -289.59534477  958.32891569 -180.6       ]
------
Step:78, Action:North
State  261
Old Q Values:  [ 224.61231768 -289.59534477  958.32891569 -180.6       ]
New Q values:  [ 376.22279963 -289.59534477  958.32891569 -180.6       ]
Reward: -1  Episode Reward:  -38
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  288.10467651 956.59290852   0.        ]
------
Step:79, Action:East
State  181
Old Q Values:  [   6.98192268  203.27538331    2.27092308 -180.6       ]
New Q values:  [   6.98192268  203.27538331   60.4869543  -180.6       ]
Reward: -1  Episode Reward:  -39
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   200.59528358 -9485.76780054     0.        ]
------
Step:80, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.22332643e+01 8.80166893e+02 0.00000000e+00]
New Q values:  [3.89777037e-01 3.36026471e+02 8.80166893e+02 0.00000000e+00]
Reward: -1  Episode Reward:  -40
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6          590.53126335  1092.44388433]
------
Step:81, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6          590.53126335  1092.44388433]
New Q values:  [-1463.93735889  -180.6          590.53126335   723.87622844]
Reward: -1  Episode Reward:  -41
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 376.22279963 -289.59534477  958.32891569 -180.6       ]
------
Step:82, Action:North
State  261
Old Q Values:  [ 376.22279963 -289.59534477  958.32891569 -180.6       ]
New Q values:  [ 210.87173484 -289.59534477  958.32891569 -180.6       ]
Reward: -1  Episode Reward:  -42
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268  203.27538331   60.4869543  -180.6       ]
------
Step:83, Action:South
State  177
Old Q Values:  [    0.         23738.82205042  9823.77904799     0.        ]
New Q values:  [    0.         13438.71868719  9823.77904799     0.        ]
Reward: -1  Episode Reward:  -43
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[13145.96622343  -180.6          164.38978692  9715.84680039]
------
Step:84, Action:North
State  257
Old Q Values:  [13145.96622343  -180.6          164.38978692  9715.84680039]
New Q values:  [9289.40209553 -180.6         164.38978692 9715.84680039]
Reward: -1  Episode Reward:  -44
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         13438.71868719  9823.77904799     0.        ]
------
Step:85, Action:South
State  177
Old Q Values:  [    0.         13438.71868719  9823.77904799     0.        ]
New Q values:  [   0.         8289.641515   9823.77904799    0.        ]
Reward: -1  Episode Reward:  -45
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[9289.40209553 -180.6         164.38978692 9715.84680039]
------
Step:86, Action:West
State  257
Old Q Values:  [9289.40209553 -180.6         164.38978692 9715.84680039]
New Q values:  [9289.40209553 -180.6         164.38978692 6620.49276028]
Reward: -301  Episode Reward:  -346
xxxxx
x..gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[9289.40209553 -180.6         164.38978692 6620.49276028]
------
Step:87, Action:North
State  257
Old Q Values:  [9289.40209553 -180.6         164.38978692 6620.49276028]
New Q values:  [6662.29455261 -180.6         164.38978692 6620.49276028]
Reward: -1  Episode Reward:  -347
xxxxx
x.g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         8289.641515   9823.77904799    0.        ]
------
Step:88, Action:East
State  180
Old Q Values:  [-3431.06190038  -244.53978092   191.42602715     0.        ]
New Q values:  [-3431.06190038  -244.53978092   340.02047876     0.        ]
Reward: -1  Episode Reward:  -348
xxxxx
xg..x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.36026471e+02 8.80166893e+02 0.00000000e+00]
------
Step:89, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.36026471e+02 8.80166893e+02 0.00000000e+00]
New Q values:  [3.89777037e-01 3.36026471e+02 6.30110186e+02 0.00000000e+00]
Reward: -1  Episode Reward:  -349
xxxxx
x.g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063    928.81142983 -2651.70614553   356.8553797 ]
------
Step:90, Action:South
State  208
Old Q Values:  [  -49.0843063    928.81142983 -2651.70614553   356.8553797 ]
New Q values:  [  -49.0843063    556.47743936 -2651.70614553   356.8553797 ]
Reward: -1  Episode Reward:  -350
xxxxx
x...x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   43.45300804 -8656.02923281 -6173.56321028   618.50955811]
------
Step:91, Action:West
State  288
Old Q Values:  [   43.45300804 -8656.02923281 -6173.56321028   618.50955811]
New Q values:  [   43.45300804 -8656.02923281 -6173.56321028 -5536.03330823]
Reward: -10001  Episode Reward:  -10351
xxxxx
x...x
x   x
x g x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -737.92033146 -6457.4598       753.39505177 -6307.02      ]
------
Step:1, Action:East
State  261
Old Q Values:  [ 210.87173484 -289.59534477  958.32891569 -180.6       ]
New Q values:  [ 210.87173484 -289.59534477  626.66676713 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  14.77428561 -180.6         793.11733619   -6.75709867]
------
Step:2, Action:East
State  273
Old Q Values:  [  53.24162309    0.         -205.76435892  493.56945831]
New Q values:  [ 53.24162309   0.         -63.86984116 493.56945831]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   43.45300804 -8656.02923281 -6173.56321028 -5536.03330823]
------
Step:3, Action:North
State  288
Old Q Values:  [   43.45300804 -8656.02923281 -6173.56321028 -5536.03330823]
New Q values:  [  189.72443502 -8656.02923281 -6173.56321028 -5536.03330823]
Reward: 9  Episode Reward:  27
xxxxx
x .gx
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063    556.47743936 -2651.70614553   356.8553797 ]
------
Step:4, Action:South
State  208
Old Q Values:  [  -49.0843063    556.47743936 -2651.70614553   356.8553797 ]
New Q values:  [  -49.0843063    278.90830625 -2651.70614553   356.8553797 ]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  189.72443502 -8656.02923281 -6173.56321028 -5536.03330823]
------
Step:5, Action:North
State  288
Old Q Values:  [  189.72443502 -8656.02923281 -6173.56321028 -5536.03330823]
New Q values:  [  182.34638792 -8656.02923281 -6173.56321028 -5536.03330823]
Reward: -1  Episode Reward:  25
xxxxx
xg..x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063    278.90830625 -2651.70614553   356.8553797 ]
------
Step:6, Action:West
State  208
Old Q Values:  [  -49.0843063    278.90830625 -2651.70614553   356.8553797 ]
New Q values:  [  -49.0843063    278.90830625 -2651.70614553   337.17520773]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.36026471e+02 6.30110186e+02 0.00000000e+00]
------
Step:7, Action:East
State  194
Old Q Values:  [-0.6         2.56078987 35.49066552  0.        ]
New Q values:  [-0.6         2.56078987 23.32088889  0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6           32.41540895]
------
Step:8, Action:West
State  208
Old Q Values:  [  -49.0843063    278.90830625 -2651.70614553   337.17520773]
New Q values:  [  -49.0843063    278.90830625 -2651.70614553   323.30313894]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.36026471e+02 6.30110186e+02 0.00000000e+00]
------
Step:9, Action:East
State  194
Old Q Values:  [-0.6         2.56078987 23.32088889  0.        ]
New Q values:  [-0.6         2.56078987 18.45297824  0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6           32.41540895]
------
Step:10, Action:West
State  210
Old Q Values:  [    2.06134572 -1695.56049235  -180.6           32.41540895]
New Q values:  [    2.06134572 -1695.56049235  -180.6           17.90205705]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x.a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6         2.56078987 18.45297824  0.        ]
------
Step:11, Action:East
State  195
Old Q Values:  [  38.85388605 1557.00451246  150.85657065    0.        ]
New Q values:  [  38.85388605 1557.00451246   65.11324538    0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x. ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6           17.90205705]
------
Step:12, Action:West
State  208
Old Q Values:  [  -49.0843063    278.90830625 -2651.70614553   323.30313894]
New Q values:  [  -49.0843063    278.90830625 -2651.70614553   188.89984065]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x.agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   200.59528358 -9485.76780054     0.        ]
------
Step:13, Action:South
State  195
Old Q Values:  [  38.85388605 1557.00451246   65.11324538    0.        ]
New Q values:  [ 38.85388605 770.27264248  65.11324538   0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x.  x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116 493.56945831]
------
Step:14, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6          590.53126335   723.87622844]
New Q values:  [-1463.93735889  -180.6          590.53126335   476.95052152]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 210.87173484 -289.59534477  626.66676713 -180.6       ]
------
Step:15, Action:North
State  261
Old Q Values:  [ 210.87173484 -289.59534477  626.66676713 -180.6       ]
New Q values:  [ 376.72656649 -289.59534477  626.66676713 -180.6       ]
Reward: 9  Episode Reward:  35
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  288.10467651 956.59290852   0.        ]
------
Step:16, Action:East
State  183
Old Q Values:  [  6.1762476  288.10467651 956.59290852   0.        ]
New Q values:  [  6.1762476  288.10467651 387.57305688   0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6         2.56078987 18.45297824  0.        ]
------
Step:17, Action:East
State  195
Old Q Values:  [ 38.85388605 770.27264248  65.11324538   0.        ]
New Q values:  [ 38.85388605 770.27264248  30.81591527   0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    2.06134572 -1695.56049235  -180.6           17.90205705]
------
Step:18, Action:West
State  208
Old Q Values:  [  -49.0843063    278.90830625 -2651.70614553   188.89984065]
New Q values:  [  -49.0843063    278.90830625 -2651.70614553   135.13852133]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   200.59528358 -9485.76780054     0.        ]
------
Step:19, Action:South
State  195
Old Q Values:  [ 38.85388605 770.27264248  30.81591527   0.        ]
New Q values:  [ 38.85388605 455.57989448  30.81591527   0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116 493.56945831]
------
Step:20, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116 493.56945831]
New Q values:  [ 53.24162309   0.         -63.86984116 384.82781346]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 376.72656649 -289.59534477  626.66676713 -180.6       ]
------
Step:21, Action:East
State  261
Old Q Values:  [ 376.72656649 -289.59534477  626.66676713 -180.6       ]
New Q values:  [ 376.72656649 -289.59534477  365.51505089 -180.6       ]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116 384.82781346]
------
Step:22, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116 384.82781346]
New Q values:  [ 53.24162309   0.         -63.86984116 266.34909533]
Reward: -1  Episode Reward:  28
xxxxx
x .gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 376.72656649 -289.59534477  365.51505089 -180.6       ]
------
Step:23, Action:North
State  261
Old Q Values:  [ 376.72656649 -289.59534477  365.51505089 -180.6       ]
New Q values:  [ 211.07324159 -289.59534477  365.51505089 -180.6       ]
Reward: -1  Episode Reward:  27
xxxxx
x g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268  203.27538331   60.4869543  -180.6       ]
------
Step:24, Action:South
State  181
Old Q Values:  [   6.98192268  203.27538331   60.4869543  -180.6       ]
New Q values:  [   6.98192268  190.36466859   60.4869543  -180.6       ]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 211.07324159 -289.59534477  365.51505089 -180.6       ]
------
Step:25, Action:East
State  260
Old Q Values:  [ -737.92033146 -6457.4598       753.39505177 -6307.02      ]
New Q values:  [ -737.92033146 -6457.4598       477.91739971 -6307.02      ]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6          590.53126335   476.95052152]
------
Step:26, Action:East
State  272
Old Q Values:  [-1463.93735889  -180.6          590.53126335   476.95052152]
New Q values:  [-1463.93735889  -180.6          290.31642171   476.95052152]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  182.34638792 -8656.02923281 -6173.56321028 -5536.03330823]
------
Step:27, Action:North
State  288
Old Q Values:  [  182.34638792 -8656.02923281 -6173.56321028 -5536.03330823]
New Q values:  [  156.01104704 -8656.02923281 -6173.56321028 -5536.03330823]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063    278.90830625 -2651.70614553   135.13852133]
------
Step:28, Action:South
State  210
Old Q Values:  [    2.06134572 -1695.56049235  -180.6           17.90205705]
New Q values:  [   2.06134572 -632.02088283 -180.6          17.90205705]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  156.01104704 -8656.02923281 -6173.56321028 -5536.03330823]
------
Step:29, Action:North
State  288
Old Q Values:  [  156.01104704 -8656.02923281 -6173.56321028 -5536.03330823]
New Q values:  [  145.47691069 -8656.02923281 -6173.56321028 -5536.03330823]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063    278.90830625 -2651.70614553   135.13852133]
------
Step:30, Action:South
State  210
Old Q Values:  [   2.06134572 -632.02088283 -180.6          17.90205705]
New Q values:  [   2.06134572 -209.76527992 -180.6          17.90205705]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  145.47691069 -8656.02923281 -6173.56321028 -5536.03330823]
------
Step:31, Action:North
State  288
Old Q Values:  [  145.47691069 -8656.02923281 -6173.56321028 -5536.03330823]
New Q values:  [  141.26325615 -8656.02923281 -6173.56321028 -5536.03330823]
Reward: -1  Episode Reward:  19
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063    278.90830625 -2651.70614553   135.13852133]
------
Step:32, Action:South
State  208
Old Q Values:  [  -49.0843063    278.90830625 -2651.70614553   135.13852133]
New Q values:  [  -49.0843063    153.34229935 -2651.70614553   135.13852133]
Reward: -1  Episode Reward:  18
xxxxx
xg..x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  141.26325615 -8656.02923281 -6173.56321028 -5536.03330823]
------
Step:33, Action:North
State  288
Old Q Values:  [  141.26325615 -8656.02923281 -6173.56321028 -5536.03330823]
New Q values:  [  101.90799227 -8656.02923281 -6173.56321028 -5536.03330823]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063    153.34229935 -2651.70614553   135.13852133]
------
Step:34, Action:South
State  208
Old Q Values:  [  -49.0843063    153.34229935 -2651.70614553   135.13852133]
New Q values:  [  -49.0843063     91.30931742 -2651.70614553   135.13852133]
Reward: -1  Episode Reward:  16
xxxxx
xg..x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  101.90799227 -8656.02923281 -6173.56321028 -5536.03330823]
------
Step:35, Action:North
State  288
Old Q Values:  [  101.90799227 -8656.02923281 -6173.56321028 -5536.03330823]
New Q values:  [   80.70475331 -8656.02923281 -6173.56321028 -5536.03330823]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063     91.30931742 -2651.70614553   135.13852133]
------
Step:36, Action:West
State  208
Old Q Values:  [  -49.0843063     91.30931742 -2651.70614553   135.13852133]
New Q values:  [  -49.0843063     91.30931742 -2651.70614553   242.48846438]
Reward: -1  Episode Reward:  14
xxxxx
xg..x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.36026471e+02 6.30110186e+02 0.00000000e+00]
------
Step:37, Action:East
State  196
Old Q Values:  [-2469.90645144   650.23146405     0.             0.        ]
New Q values:  [-2469.90645144   650.23146405    72.14653931     0.        ]
Reward: -1  Episode Reward:  13
xxxxx
x g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063     91.30931742 -2651.70614553   242.48846438]
------
Step:38, Action:West
State  208
Old Q Values:  [  -49.0843063     91.30931742 -2651.70614553   242.48846438]
New Q values:  [  -49.0843063     91.30931742 -2651.70614553 -5714.5715584 ]
Reward: -10001  Episode Reward:  -9988
xxxxx
x ..x
x g x
x   x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   200.59528358 -9485.76780054     0.        ]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.36026471e+02 6.30110186e+02 0.00000000e+00]
New Q values:  [3.89777037e-01 2.82895745e+02 6.30110186e+02 0.00000000e+00]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6          290.31642171   476.95052152]
------
Step:2, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6          290.31642171   476.95052152]
New Q values:  [-1463.93735889  -180.6          290.31642171   305.83472387]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 211.07324159 -289.59534477  365.51505089 -180.6       ]
------
Step:3, Action:East
State  257
Old Q Values:  [6662.29455261 -180.6         164.38978692 6620.49276028]
New Q values:  [6662.29455261 -180.6         145.06064337 6620.49276028]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x. gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116 266.34909533]
------
Step:4, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116 266.34909533]
New Q values:  [ 53.24162309   0.         -63.86984116 215.5941534 ]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 211.07324159 -289.59534477  365.51505089 -180.6       ]
------
Step:5, Action:East
State  257
Old Q Values:  [6662.29455261 -180.6         145.06064337 6620.49276028]
New Q values:  [6662.29455261 -180.6         149.17467451 6620.49276028]
Reward: -1  Episode Reward:  15
xxxxx
x.g x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6          290.31642171   305.83472387]
------
Step:6, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6          290.31642171   305.83472387]
New Q values:  [-1463.93735889  -180.6          290.31642171   265.10910946]
Reward: -1  Episode Reward:  14
xxxxx
xg. x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -737.92033146 -6457.4598       477.91739971 -6307.02      ]
------
Step:7, Action:East
State  260
Old Q Values:  [ -737.92033146 -6457.4598       477.91739971 -6307.02      ]
New Q values:  [ -737.92033146 -6457.4598       277.6618864  -6307.02      ]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -180.6          290.31642171   265.10910946]
------
Step:8, Action:East
State  272
Old Q Values:  [-1463.93735889  -180.6          290.31642171   265.10910946]
New Q values:  [-1463.93735889  -180.6          145.73799468   265.10910946]
Reward: 9  Episode Reward:  22
xxxxx
x.. x
x. .x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   80.70475331 -8656.02923281 -6173.56321028 -5536.03330823]
------
Step:9, Action:North
State  288
Old Q Values:  [   80.70475331 -8656.02923281 -6173.56321028 -5536.03330823]
New Q values:  [   43.05251844 -8656.02923281 -6173.56321028 -5536.03330823]
Reward: 9  Episode Reward:  31
xxxxx
x.. x
x. ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   2.06134572 -209.76527992 -180.6          17.90205705]
------
Step:10, Action:West
State  208
Old Q Values:  [  -49.0843063     91.30931742 -2651.70614553 -5714.5715584 ]
New Q values:  [  -49.0843063     91.30931742 -2651.70614553 -8097.39556751]
Reward: -10001  Episode Reward:  -9970
xxxxx
x.. x
x.g x
x   x
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3431.06190038  -244.53978092   340.02047876     0.        ]
------
Step:1, Action:East
State  180
Old Q Values:  [-3431.06190038  -244.53978092   340.02047876     0.        ]
New Q values:  [-3431.06190038  -244.53978092   336.47763072     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   650.23146405    72.14653931     0.        ]
------
Step:2, Action:South
State  196
Old Q Values:  [-2469.90645144   650.23146405    72.14653931     0.        ]
New Q values:  [-2469.90645144   503.42778648    72.14653931     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  14.77428561 -180.6         793.11733619   -6.75709867]
------
Step:3, Action:East
State  272
Old Q Values:  [-1463.93735889  -180.6          145.73799468   265.10910946]
New Q values:  [-1463.93735889  -180.6           76.6109534    265.10910946]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   43.05251844 -8656.02923281 -6173.56321028 -5536.03330823]
------
Step:4, Action:North
State  288
Old Q Values:  [   43.05251844 -8656.02923281 -6173.56321028 -5536.03330823]
New Q values:  [   50.0138026  -8656.02923281 -6173.56321028 -5536.03330823]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063     91.30931742 -2651.70614553 -8097.39556751]
------
Step:5, Action:South
State  210
Old Q Values:  [   2.06134572 -209.76527992 -180.6          17.90205705]
New Q values:  [   2.06134572  -69.50197119 -180.6          17.90205705]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   50.0138026  -8656.02923281 -6173.56321028 -5536.03330823]
------
Step:6, Action:North
State  288
Old Q Values:  [   50.0138026  -8656.02923281 -6173.56321028 -5536.03330823]
New Q values:  [   24.77613816 -8656.02923281 -6173.56321028 -5536.03330823]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   2.06134572  -69.50197119 -180.6          17.90205705]
------
Step:7, Action:West
State  210
Old Q Values:  [   2.06134572  -69.50197119 -180.6          17.90205705]
New Q values:  [   2.06134572  -69.50197119 -180.6          12.09671629]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6         2.56078987 18.45297824  0.        ]
------
Step:8, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.82895745e+02 6.30110186e+02 0.00000000e+00]
New Q values:  [  0.38977704 282.89574486 278.83686969   0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063     91.30931742 -2651.70614553 -8097.39556751]
------
Step:9, Action:South
State  208
Old Q Values:  [  -49.0843063     91.30931742 -2651.70614553 -8097.39556751]
New Q values:  [  -49.0843063     43.35656841 -2651.70614553 -8097.39556751]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   24.77613816 -8656.02923281 -6173.56321028 -5536.03330823]
------
Step:10, Action:West
State  288
Old Q Values:  [   24.77613816 -8656.02923281 -6173.56321028 -5536.03330823]
New Q values:  [   24.77613816 -8656.02923281 -6173.56321028 -2150.33507727]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116 215.5941534 ]
------
Step:11, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116 215.5941534 ]
New Q values:  [ 53.24162309   0.         -63.86984116 201.29217663]
Reward: 9  Episode Reward:  39
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 211.07324159 -289.59534477  365.51505089 -180.6       ]
------
Step:12, Action:East
State  261
Old Q Values:  [ 211.07324159 -289.59534477  365.51505089 -180.6       ]
New Q values:  [ 211.07324159 -289.59534477  205.99367335 -180.6       ]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116 201.29217663]
------
Step:13, Action:West
State  272
Old Q Values:  [-1463.93735889  -180.6           76.6109534    265.10910946]
New Q values:  [-1463.93735889  -180.6           76.6109534    168.76561626]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 211.07324159 -289.59534477  205.99367335 -180.6       ]
------
Step:14, Action:North
State  260
Old Q Values:  [ -737.92033146 -6457.4598       277.6618864  -6307.02      ]
New Q values:  [  104.36074887 -6457.4598       277.6618864  -6307.02      ]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[   0.            0.         1333.76293817    0.        ]
------
Step:15, Action:East
State  182
Old Q Values:  [   0.            0.         1333.76293817    0.        ]
New Q values:  [  0.           0.         538.44106874   0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-0.6         2.56078987 18.45297824  0.        ]
------
Step:16, Action:East
State  192
Old Q Values:  [  0.38977704 282.89574486 278.83686969   0.        ]
New Q values:  [  0.38977704 282.89574486 123.9417184    0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063     43.35656841 -2651.70614553 -8097.39556751]
------
Step:17, Action:South
State  208
Old Q Values:  [  -49.0843063     43.35656841 -2651.70614553 -8097.39556751]
New Q values:  [  -49.0843063     24.17546881 -2651.70614553 -8097.39556751]
Reward: -1  Episode Reward:  33
xxxxx
xg..x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   24.77613816 -8656.02923281 -6173.56321028 -2150.33507727]
------
Step:18, Action:North
State  288
Old Q Values:  [   24.77613816 -8656.02923281 -6173.56321028 -2150.33507727]
New Q values:  [   16.56309591 -8656.02923281 -6173.56321028 -2150.33507727]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063     24.17546881 -2651.70614553 -8097.39556751]
------
Step:19, Action:South
State  210
Old Q Values:  [   2.06134572  -69.50197119 -180.6          12.09671629]
New Q values:  [   2.06134572  -23.4318597  -180.6          12.09671629]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   16.56309591 -8656.02923281 -6173.56321028 -2150.33507727]
------
Step:20, Action:North
State  288
Old Q Values:  [   16.56309591 -8656.02923281 -6173.56321028 -2150.33507727]
New Q values:  [   13.27787901 -8656.02923281 -6173.56321028 -2150.33507727]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063     24.17546881 -2651.70614553 -8097.39556751]
------
Step:21, Action:South
State  208
Old Q Values:  [  -49.0843063     24.17546881 -2651.70614553 -8097.39556751]
New Q values:  [  -49.0843063     13.05355123 -2651.70614553 -8097.39556751]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   13.27787901 -8656.02923281 -6173.56321028 -2150.33507727]
------
Step:22, Action:North
State  288
Old Q Values:  [   13.27787901 -8656.02923281 -6173.56321028 -2150.33507727]
New Q values:  [ 8.62721697e+00 -8.65602923e+03 -6.17356321e+03 -2.15033508e+03]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063     13.05355123 -2651.70614553 -8097.39556751]
------
Step:23, Action:South
State  208
Old Q Values:  [  -49.0843063     13.05355123 -2651.70614553 -8097.39556751]
New Q values:  [-4.90843063e+01  7.20958558e+00 -2.65170615e+03 -8.09739557e+03]
Reward: -1  Episode Reward:  27
xxxxx
xg..x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8.62721697e+00 -8.65602923e+03 -6.17356321e+03 -2.15033508e+03]
------
Step:24, Action:North
State  288
Old Q Values:  [ 8.62721697e+00 -8.65602923e+03 -6.17356321e+03 -2.15033508e+03]
New Q values:  [ 5.01376246e+00 -8.65602923e+03 -6.17356321e+03 -2.15033508e+03]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4.90843063e+01  7.20958558e+00 -2.65170615e+03 -8.09739557e+03]
------
Step:25, Action:South
State  208
Old Q Values:  [-4.90843063e+01  7.20958558e+00 -2.65170615e+03 -8.09739557e+03]
New Q values:  [-4.90843063e+01  3.78796297e+00 -2.65170615e+03 -8.09739557e+03]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5.01376246e+00 -8.65602923e+03 -6.17356321e+03 -2.15033508e+03]
------
Step:26, Action:North
State  288
Old Q Values:  [ 5.01376246e+00 -8.65602923e+03 -6.17356321e+03 -2.15033508e+03]
New Q values:  [-5997.45810612 -8656.02923281 -6173.56321028 -2150.33507727]
Reward: -10001  Episode Reward:  -9976
xxxxx
x ..x
x  gx
x   x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          119.62723386  -179.38454759 -8350.36593577]
------
Step:1, Action:South
State  138
Old Q Values:  [-180.6        -731.60350289 -180.6          35.71493192]
New Q values:  [-180.6        -239.26767529 -180.6          35.71493192]
Reward: 9  Episode Reward:  9
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.06267698e+02  1.59912420e+02 -6.17035694e+03  6.08663514e+00]
------
Step:2, Action:South
State  208
Old Q Values:  [-4.90843063e+01  3.78796297e+00 -2.65170615e+03 -8.09739557e+03]
New Q values:  [  -49.0843063   -638.18533799 -2651.70614553 -8097.39556751]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5997.45810612 -8656.02923281 -6173.56321028 -2150.33507727]
------
Step:3, Action:West
State  288
Old Q Values:  [-5997.45810612 -8656.02923281 -6173.56321028 -2150.33507727]
New Q values:  [-5997.45810612 -8656.02923281 -6173.56321028  -804.10434603]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x.. x
x.a x
xxxxx
Step:4, Action:South
State  272
Old Q Values:  [-1463.93735889  -180.6           76.6109534    168.76561626]
New Q values:  [-1463.93735889  -202.21031512    76.6109534    168.76561626]
Reward: -301  Episode Reward:  -274
xxxxx
x.g x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -202.21031512    76.6109534    168.76561626]
------
Step:5, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116 201.29217663]
New Q values:  [  53.24162309    0.          -63.86984116 2084.60523643]
Reward: 9  Episode Reward:  -265
xxxxx
x. gx
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[6662.29455261 -180.6         149.17467451 6620.49276028]
------
Step:6, Action:North
State  257
Old Q Values:  [6662.29455261 -180.6         149.17467451 6620.49276028]
New Q values:  [5617.45153544 -180.6         149.17467451 6620.49276028]
Reward: 9  Episode Reward:  -256
xxxxx
x.g x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         8289.641515   9823.77904799    0.        ]
------
Step:7, Action:East
State  176
Old Q Values:  [    0.             0.         24958.66884214     0.        ]
New Q values:  [    0.             0.         10073.73626031     0.        ]
Reward: 9  Episode Reward:  -247
xxxxx
xg  x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[  0.38977704 282.89574486 123.9417184    0.        ]
------
Step:8, Action:South
State  192
Old Q Values:  [  0.38977704 282.89574486 123.9417184    0.        ]
New Q values:  [  0.38977704 163.18798282 123.9417184    0.        ]
Reward: -1  Episode Reward:  -248
xxxxx
x.g x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -202.21031512    76.6109534    168.76561626]
------
Step:9, Action:West
State  272
Old Q Values:  [-1463.93735889  -202.21031512    76.6109534    168.76561626]
New Q values:  [-1463.93735889  -202.21031512    76.6109534     88.93895667]
Reward: -1  Episode Reward:  -249
xxxxx
xg  x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.            73.44236722 -6036.76313782]
------
Step:10, Action:East
State  257
Old Q Values:  [5617.45153544 -180.6         149.17467451 6620.49276028]
New Q values:  [5617.45153544 -180.6          85.75155681 6620.49276028]
Reward: -1  Episode Reward:  -250
xxxxx
x.g x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -202.21031512    76.6109534     88.93895667]
------
Step:11, Action:West
State  272
Old Q Values:  [-1463.93735889  -202.21031512    76.6109534     88.93895667]
New Q values:  [-1463.93735889  -202.21031512    76.6109534     57.00829284]
Reward: -1  Episode Reward:  -251
xxxxx
xg  x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.            73.44236722 -6036.76313782]
------
Step:12, Action:East
State  256
Old Q Values:  [    0.             0.            73.44236722 -6036.76313782]
New Q values:  [    0.             0.            51.76023291 -6036.76313782]
Reward: -1  Episode Reward:  -252
xxxxx
x.  x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -202.21031512    76.6109534     57.00829284]
------
Step:13, Action:East
State  272
Old Q Values:  [-1463.93735889  -202.21031512    76.6109534     57.00829284]
New Q values:  [-1463.93735889  -202.21031512  -211.18692245    57.00829284]
Reward: -1  Episode Reward:  -253
xxxxx
x.  x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5997.45810612 -8656.02923281 -6173.56321028  -804.10434603]
------
Step:14, Action:West
State  288
Old Q Values:  [-5997.45810612 -8656.02923281 -6173.56321028  -804.10434603]
New Q values:  [-5997.45810612 -8656.02923281 -6173.56321028 -6305.13925056]
Reward: -10001  Episode Reward:  -10254
xxxxx
x.  x
x   x
x g x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5997.45810612 -8656.02923281 -6173.56321028 -6305.13925056]
------
Step:1, Action:East
State  288
Old Q Values:  [-5997.45810612 -8656.02923281 -6173.56321028 -6305.13925056]
New Q values:  [-5997.45810612 -8656.02923281 -4449.26271595 -6305.13925056]
Reward: -301  Episode Reward:  -301
xxxxx
x..gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5997.45810612 -8656.02923281 -4449.26271595 -6305.13925056]
------
Step:2, Action:East
State  288
Old Q Values:  [-5997.45810612 -8656.02923281 -4449.26271595 -6305.13925056]
New Q values:  [-5997.45810612 -8656.02923281 -3295.08390117 -6305.13925056]
Reward: -301  Episode Reward:  -602
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5997.45810612 -8656.02923281 -3295.08390117 -6305.13925056]
------
Step:3, Action:East
State  288
Old Q Values:  [-5997.45810612 -8656.02923281 -3295.08390117 -6305.13925056]
New Q values:  [-5997.45810612 -8656.02923281 -2487.15873082 -6305.13925056]
Reward: -301  Episode Reward:  -903
xxxxx
x..gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5997.45810612 -8656.02923281 -2487.15873082 -6305.13925056]
------
Step:4, Action:East
State  288
Old Q Values:  [-5997.45810612 -8656.02923281 -2487.15873082 -6305.13925056]
New Q values:  [-5997.45810612 -8656.02923281 -1921.61111157 -6305.13925056]
Reward: -301  Episode Reward:  -1204
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5997.45810612 -8656.02923281 -1921.61111157 -6305.13925056]
------
Step:5, Action:East
State  288
Old Q Values:  [-5997.45810612 -8656.02923281 -1921.61111157 -6305.13925056]
New Q values:  [-5997.45810612 -8656.02923281 -7525.7277781  -6305.13925056]
Reward: -10301  Episode Reward:  -11505
xxxxx
x...x
x.. x
x..gx
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268  190.36466859   60.4869543  -180.6       ]
------
Step:1, Action:South
State  181
Old Q Values:  [   6.98192268  190.36466859   60.4869543  -180.6       ]
New Q values:  [   6.98192268  144.86783991   60.4869543  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 211.07324159 -289.59534477  205.99367335 -180.6       ]
------
Step:2, Action:North
State  261
Old Q Values:  [ 211.07324159 -289.59534477  205.99367335 -180.6       ]
New Q values:  [ 127.28964861 -289.59534477  205.99367335 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268  144.86783991   60.4869543  -180.6       ]
------
Step:3, Action:South
State  181
Old Q Values:  [   6.98192268  144.86783991   60.4869543  -180.6       ]
New Q values:  [   6.98192268  119.14523797   60.4869543  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 127.28964861 -289.59534477  205.99367335 -180.6       ]
------
Step:4, Action:East
State  261
Old Q Values:  [ 127.28964861 -289.59534477  205.99367335 -180.6       ]
New Q values:  [ 127.28964861 -289.59534477  713.17904027 -180.6       ]
Reward: 9  Episode Reward:  16
xxxxx
x..gx
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.          -63.86984116 2084.60523643]
------
Step:5, Action:West
State  272
Old Q Values:  [-1463.93735889  -202.21031512  -211.18692245    57.00829284]
New Q values:  [-1463.93735889  -202.21031512  -211.18692245   236.15702922]
Reward: -1  Episode Reward:  15
xxxxx
x.g x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 127.28964861 -289.59534477  713.17904027 -180.6       ]
------
Step:6, Action:East
State  261
Old Q Values:  [ 127.28964861 -289.59534477  713.17904027 -180.6       ]
New Q values:  [ 127.28964861 -289.59534477  355.51872487 -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -202.21031512  -211.18692245   236.15702922]
------
Step:7, Action:West
State  272
Old Q Values:  [-1463.93735889  -202.21031512  -211.18692245   236.15702922]
New Q values:  [-1463.93735889  -202.21031512  -211.18692245   200.51842915]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 127.28964861 -289.59534477  355.51872487 -180.6       ]
------
Step:8, Action:North
State  261
Old Q Values:  [ 127.28964861 -289.59534477  355.51872487 -180.6       ]
New Q values:  [ 166.58777651 -289.59534477  355.51872487 -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
x.. x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  288.10467651 387.57305688   0.        ]
------
Step:9, Action:East
State  177
Old Q Values:  [   0.         8289.641515   9823.77904799    0.        ]
New Q values:  [   0.         8289.641515   3995.09020427    0.        ]
Reward: 9  Episode Reward:  21
xxxxx
x.. x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   200.59528358 -9485.76780054     0.        ]
------
Step:10, Action:South
State  192
Old Q Values:  [  0.38977704 163.18798282 123.9417184    0.        ]
New Q values:  [  0.38977704 124.83072187 123.9417184    0.        ]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -202.21031512  -211.18692245   200.51842915]
------
Step:11, Action:West
State  272
Old Q Values:  [-1463.93735889  -202.21031512  -211.18692245   200.51842915]
New Q values:  [-1463.93735889  -202.21031512  -211.18692245   162.90593758]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  104.36074887 -6457.4598       277.6618864  -6307.02      ]
------
Step:12, Action:East
State  260
Old Q Values:  [  104.36074887 -6457.4598       277.6618864  -6307.02      ]
New Q values:  [  104.36074887 -6457.4598       159.33653583 -6307.02      ]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889  -202.21031512  -211.18692245   162.90593758]
------
Step:13, Action:South
State  272
Old Q Values:  [-1463.93735889  -202.21031512  -211.18692245   162.90593758]
New Q values:  [-1463.93735889 -6212.61234477  -211.18692245   162.90593758]
Reward: -10301  Episode Reward:  -10283
xxxxx
x.. x
x  .x
x g.x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[ 0.          3.43614185 98.39771311  0.        ]
------
Step:1, Action:East
State  107
Old Q Values:  [-252.35169558   14.05929374   46.26664403 -252.78192178]
New Q values:  [-252.35169558   14.05929374   34.23084843 -252.78192178]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x...x
x.g x
xxxxx
Step:2, Action:North
State  123
Old Q Values:  [ -284.31459256 -6000.6          136.2565813     10.97401097]
New Q values:  [ -253.44886264 -6000.6          136.2565813     10.97401097]
Reward: -301  Episode Reward:  -292
xxxxx
x a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6          136.2565813     10.97401097]
------
Step:3, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.44139694e+01  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  2.98800673e+01  4.04786473e+00]
Reward: 9  Episode Reward:  -283
xxxxx
x  ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6          35.71493192]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6          35.71493192]
New Q values:  [-180.6        -239.26767529 -180.6          54.56294716]
Reward: -1  Episode Reward:  -284
xxxxx
x a x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6          136.2565813     10.97401097]
------
Step:5, Action:East
State  123
Old Q Values:  [ -253.44886264 -6000.6          136.2565813     10.97401097]
New Q values:  [ -253.44886264 -6000.6           70.27151667    10.97401097]
Reward: -1  Episode Reward:  -285
xxxxx
x  ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6          54.56294716]
------
Step:6, Action:West
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6          54.56294716]
New Q values:  [-180.6        -239.26767529 -180.6          42.30663386]
Reward: -1  Episode Reward:  -286
xxxxx
x a x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6           70.27151667    10.97401097]
------
Step:7, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  2.98800673e+01  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  2.40440171e+01  4.04786473e+00]
Reward: -1  Episode Reward:  -287
xxxxx
x  ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6          42.30663386]
------
Step:8, Action:West
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6          42.30663386]
New Q values:  [-180.6        -239.26767529 -180.6          23.53585867]
Reward: -1  Episode Reward:  -288
xxxxx
x a x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  2.40440171e+01  4.04786473e+00]
------
Step:9, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  2.40440171e+01  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.60783644e+01  4.04786473e+00]
Reward: -1  Episode Reward:  -289
xxxxx
x  ax
xg..x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6          23.53585867]
------
Step:10, Action:West
State  136
Old Q Values:  [-6180.6          119.62723386  -179.38454759 -8350.36593577]
New Q values:  [-6180.6          119.62723386  -179.38454759 -3312.21656119]
Reward: -1  Episode Reward:  -290
xxxxx
xga x
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686          95.09937704   -966.76649461]
------
Step:11, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686          95.09937704   -966.76649461]
New Q values:  [-10156.11771313  -5995.686          73.32792098   -966.76649461]
Reward: -1  Episode Reward:  -291
xxxxx
x gax
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          119.62723386  -179.38454759 -3312.21656119]
------
Step:12, Action:South
State  136
Old Q Values:  [-6180.6          119.62723386  -179.38454759 -3312.21656119]
New Q values:  [-6180.6           38.52560166  -179.38454759 -3312.21656119]
Reward: 9  Episode Reward:  -282
xxxxx
x  gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -49.0843063   -638.18533799 -2651.70614553 -8097.39556751]
------
Step:13, Action:South
State  208
Old Q Values:  [  -49.0843063   -638.18533799 -2651.70614553 -8097.39556751]
New Q values:  [  -49.0843063  -2055.11156703 -2651.70614553 -8097.39556751]
Reward: -1  Episode Reward:  -283
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5997.45810612 -8656.02923281 -7525.7277781  -6305.13925056]
------
Step:14, Action:North
State  288
Old Q Values:  [-5997.45810612 -8656.02923281 -7525.7277781  -6305.13925056]
New Q values:  [-2351.60951658 -8656.02923281 -7525.7277781  -6305.13925056]
Reward: -1  Episode Reward:  -284
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.06267698e+02  1.59912420e+02 -6.17035694e+03  6.08663514e+00]
------
Step:15, Action:South
State  216
Old Q Values:  [ 1.06267698e+02  1.59912420e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 1.06267698e+02 -6.42117887e+02 -6.17035694e+03  6.08663514e+00]
Reward: -1  Episode Reward:  -285
xxxxx
x   x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2351.60951658 -8656.02923281 -7525.7277781  -6305.13925056]
------
Step:16, Action:North
State  288
Old Q Values:  [-2351.60951658 -8656.02923281 -7525.7277781  -6305.13925056]
New Q values:  [ -909.36349718 -8656.02923281 -7525.7277781  -6305.13925056]
Reward: -1  Episode Reward:  -286
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.06267698e+02 -6.42117887e+02 -6.17035694e+03  6.08663514e+00]
------
Step:17, Action:North
State  208
Old Q Values:  [  -49.0843063  -2055.11156703 -2651.70614553 -8097.39556751]
New Q values:  [   -8.67604202 -2055.11156703 -2651.70614553 -8097.39556751]
Reward: -1  Episode Reward:  -287
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           38.52560166  -179.38454759 -3312.21656119]
------
Step:18, Action:South
State  136
Old Q Values:  [-6180.6           38.52560166  -179.38454759 -3312.21656119]
New Q values:  [-6180.6           12.20742806  -179.38454759 -3312.21656119]
Reward: -1  Episode Reward:  -288
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   -8.67604202 -2055.11156703 -2651.70614553 -8097.39556751]
------
Step:19, Action:North
State  216
Old Q Values:  [ 1.06267698e+02 -6.42117887e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 4.89678369e+01 -6.42117887e+02 -6.17035694e+03  6.08663514e+00]
Reward: -1  Episode Reward:  -289
xxxxx
x  ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6          23.53585867]
------
Step:20, Action:West
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6          23.53585867]
New Q values:  [-180.6        -239.26767529 -180.6          13.6378528 ]
Reward: -1  Episode Reward:  -290
xxxxx
x a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.60783644e+01  4.04786473e+00]
------
Step:21, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.60783644e+01  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  9.92270162e+00  4.04786473e+00]
Reward: -1  Episode Reward:  -291
xxxxx
x  ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6          13.6378528 ]
------
Step:22, Action:West
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6          13.6378528 ]
New Q values:  [-180.6        -239.26767529 -180.6           7.83195161]
Reward: -1  Episode Reward:  -292
xxxxx
x a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  9.92270162e+00  4.04786473e+00]
------
Step:23, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  9.92270162e+00  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  5.71866613e+00  4.04786473e+00]
Reward: -1  Episode Reward:  -293
xxxxx
x  ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6           7.83195161]
------
Step:24, Action:West
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6           7.83195161]
New Q values:  [-180.6        -239.26767529 -180.6           4.24838048]
Reward: -1  Episode Reward:  -294
xxxxx
x a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  5.71866613e+00  4.04786473e+00]
------
Step:25, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  5.71866613e+00  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  2.96198060e+00  4.04786473e+00]
Reward: -1  Episode Reward:  -295
xxxxx
x  ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6           4.24838048]
------
Step:26, Action:West
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6           4.24838048]
New Q values:  [-180.6        -239.26767529 -180.6           2.31371161]
Reward: -1  Episode Reward:  -296
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  2.96198060e+00  4.04786473e+00]
------
Step:27, Action:West
State  123
Old Q Values:  [ -253.44886264 -6000.6           70.27151667    10.97401097]
New Q values:  [ -253.44886264 -6000.6           70.27151667    14.05885892]
Reward: -1  Episode Reward:  -297
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   14.05929374   34.23084843 -252.78192178]
------
Step:28, Action:East
State  107
Old Q Values:  [-252.35169558   14.05929374   34.23084843 -252.78192178]
New Q values:  [-252.35169558   14.05929374   14.30669879 -252.78192178]
Reward: -1  Episode Reward:  -298
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  2.96198060e+00  4.04786473e+00]
------
Step:29, Action:West
State  123
Old Q Values:  [ -253.44886264 -6000.6           70.27151667    14.05885892]
New Q values:  [ -253.44886264 -6000.6           70.27151667     9.3155532 ]
Reward: -1  Episode Reward:  -299
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   14.05929374   14.30669879 -252.78192178]
------
Step:30, Action:East
State  107
Old Q Values:  [-252.35169558   14.05929374   14.30669879 -252.78192178]
New Q values:  [-252.35169558   14.05929374    6.33703894 -252.78192178]
Reward: -1  Episode Reward:  -300
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  2.96198060e+00  4.04786473e+00]
------
Step:31, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  2.96198060e+00  4.04786473e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  2.96198060e+00  5.23693402e+00]
Reward: -1  Episode Reward:  -301
xxxxx
xa  x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   14.05929374    6.33703894 -252.78192178]
------
Step:32, Action:South
State  107
Old Q Values:  [-252.35169558   14.05929374    6.33703894 -252.78192178]
New Q values:  [-252.35169558   11.0237175     6.33703894 -252.78192178]
Reward: 9  Episode Reward:  -292
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -2.39790111e+03 -1.78980000e+02]
------
Step:33, Action:South
State  181
Old Q Values:  [   6.98192268  119.14523797   60.4869543  -180.6       ]
New Q values:  [   6.98192268  159.71371265   60.4869543  -180.6       ]
Reward: 9  Episode Reward:  -283
xxxxx
x   x
x .gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 166.58777651 -289.59534477  355.51872487 -180.6       ]
------
Step:34, Action:East
State  257
Old Q Values:  [5617.45153544 -180.6          85.75155681 6620.49276028]
New Q values:  [5617.45153544 -180.6          88.572404   6620.49276028]
Reward: 9  Episode Reward:  -274
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477  -211.18692245   162.90593758]
------
Step:35, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477  -211.18692245   162.90593758]
New Q values:  [-1463.93735889 -6212.61234477  -211.18692245   171.21799249]
Reward: -1  Episode Reward:  -275
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 166.58777651 -289.59534477  355.51872487 -180.6       ]
------
Step:36, Action:North
State  261
Old Q Values:  [ 166.58777651 -289.59534477  355.51872487 -180.6       ]
New Q values:  [ 182.30702767 -289.59534477  355.51872487 -180.6       ]
Reward: -1  Episode Reward:  -276
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  288.10467651 387.57305688   0.        ]
------
Step:37, Action:East
State  179
Old Q Values:  [ 0.00000000e+00 -6.00000000e-01  6.00062263e+04  0.00000000e+00]
New Q values:  [ 0.00000000e+00 -6.00000000e-01  8.40134264e+04  0.00000000e+00]
Reward: 100009  Episode Reward:  99733
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -909.36349718 -8656.02923281 -7525.7277781  -6305.13925056]
------
Step:1, Action:North
State  288
Old Q Values:  [ -909.36349718 -8656.02923281 -7525.7277781  -6305.13925056]
New Q values:  [-6360.94821148 -8656.02923281 -7525.7277781  -6305.13925056]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. gx
x.. x
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3431.06190038  -244.53978092   336.47763072     0.        ]
------
Step:1, Action:East
State  180
Old Q Values:  [-3431.06190038  -244.53978092   336.47763072     0.        ]
New Q values:  [-3431.06190038  -244.53978092   291.01938823     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   503.42778648    72.14653931     0.        ]
------
Step:2, Action:South
State  196
Old Q Values:  [-2469.90645144   503.42778648    72.14653931     0.        ]
New Q values:  [-2469.90645144   444.70631545    72.14653931     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  14.77428561 -180.6         793.11733619   -6.75709867]
------
Step:3, Action:East
State  272
Old Q Values:  [-1463.93735889 -6212.61234477  -211.18692245   171.21799249]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415   171.21799249]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6360.94821148 -8656.02923281 -7525.7277781  -6305.13925056]
------
Step:4, Action:West
State  288
Old Q Values:  [-6360.94821148 -8656.02923281 -7525.7277781  -6305.13925056]
New Q values:  [-6360.94821148 -8656.02923281 -7525.7277781  -2471.29030248]
Reward: -1  Episode Reward:  26
xxxxx
xg..x
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415   171.21799249]
------
Step:5, Action:West
State  276
Old Q Values:  [  14.77428561 -180.6         793.11733619   -6.75709867]
New Q values:  [  14.77428561 -180.6         793.11733619  109.35277799]
Reward: 9  Episode Reward:  35
xxxxx
x g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 182.30702767 -289.59534477  355.51872487 -180.6       ]
------
Step:6, Action:East
State  261
Old Q Values:  [ 182.30702767 -289.59534477  355.51872487 -180.6       ]
New Q values:  [ 182.30702767 -289.59534477  766.98906088 -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x .gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.          -63.86984116 2084.60523643]
------
Step:7, Action:West
State  273
Old Q Values:  [  53.24162309    0.          -63.86984116 2084.60523643]
New Q values:  [  53.24162309    0.          -63.86984116 1063.33881284]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 182.30702767 -289.59534477  766.98906088 -180.6       ]
------
Step:8, Action:East
State  261
Old Q Values:  [ 182.30702767 -289.59534477  766.98906088 -180.6       ]
New Q values:  [ 182.30702767 -289.59534477  625.1972682  -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.          -63.86984116 1063.33881284]
------
Step:9, Action:West
State  276
Old Q Values:  [  14.77428561 -180.6         793.11733619  109.35277799]
New Q values:  [  14.77428561 -180.6         793.11733619  230.70029166]
Reward: -1  Episode Reward:  31
xxxxx
x g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 182.30702767 -289.59534477  625.1972682  -180.6       ]
------
Step:10, Action:East
State  261
Old Q Values:  [ 182.30702767 -289.59534477  625.1972682  -180.6       ]
New Q values:  [ 182.30702767 -289.59534477  487.41410814 -180.6       ]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  14.77428561 -180.6         793.11733619  230.70029166]
------
Step:11, Action:East
State  276
Old Q Values:  [  14.77428561 -180.6         793.11733619  230.70029166]
New Q values:  [  14.77428561 -180.6        -424.74015627  230.70029166]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6360.94821148 -8656.02923281 -7525.7277781  -2471.29030248]
------
Step:12, Action:West
State  288
Old Q Values:  [-6360.94821148 -8656.02923281 -7525.7277781  -2471.29030248]
New Q values:  [-6360.94821148 -8656.02923281 -7525.7277781   -919.90603349]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  14.77428561 -180.6        -424.74015627  230.70029166]
------
Step:13, Action:North
State  276
Old Q Values:  [  14.77428561 -180.6        -424.74015627  230.70029166]
New Q values:  [   5.30971424 -180.6        -424.74015627  230.70029166]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.7887208e-01 -5.9946000e+03  0.0000000e+00  0.0000000e+00]
------
Step:14, Action:East
State  195
Old Q Values:  [ 38.85388605 455.57989448  30.81591527   0.        ]
New Q values:  [ 38.85388605 455.57989448  21.35538099   0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   2.06134572  -23.4318597  -180.6          12.09671629]
------
Step:15, Action:West
State  208
Old Q Values:  [   -8.67604202 -2055.11156703 -2651.70614553 -8097.39556751]
New Q values:  [   -8.67604202 -2055.11156703 -2651.70614553 -3179.37964193]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   200.59528358 -9485.76780054     0.        ]
------
Step:16, Action:South
State  192
Old Q Values:  [  0.38977704 124.83072187 123.9417184    0.        ]
New Q values:  [  0.38977704 100.6976865  123.9417184    0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415   171.21799249]
------
Step:17, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415   171.21799249]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415   214.11142944]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 182.30702767 -289.59534477  487.41410814 -180.6       ]
------
Step:18, Action:North
State  261
Old Q Values:  [ 182.30702767 -289.59534477  487.41410814 -180.6       ]
New Q values:  [ 120.23692486 -289.59534477  487.41410814 -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268  159.71371265   60.4869543  -180.6       ]
------
Step:19, Action:South
State  183
Old Q Values:  [  6.1762476  288.10467651 387.57305688   0.        ]
New Q values:  [  6.1762476  260.86610305 387.57305688   0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 120.23692486 -289.59534477  487.41410814 -180.6       ]
------
Step:20, Action:North
State  261
Old Q Values:  [ 120.23692486 -289.59534477  487.41410814 -180.6       ]
New Q values:  [ 163.76668701 -289.59534477  487.41410814 -180.6       ]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  260.86610305 387.57305688   0.        ]
------
Step:21, Action:East
State  181
Old Q Values:  [   6.98192268  159.71371265   60.4869543  -180.6       ]
New Q values:  [   6.98192268  159.71371265   83.7733668  -180.6       ]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   200.59528358 -9485.76780054     0.        ]
------
Step:22, Action:South
State  192
Old Q Values:  [  0.38977704 100.6976865  123.9417184    0.        ]
New Q values:  [  0.38977704 103.91250343 123.9417184    0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415   214.11142944]
------
Step:23, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415   214.11142944]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415   132.84553253]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  104.36074887 -6457.4598       159.33653583 -6307.02      ]
------
Step:24, Action:East
State  260
Old Q Values:  [  104.36074887 -6457.4598       159.33653583 -6307.02      ]
New Q values:  [  104.36074887 -6457.4598       102.98827409 -6307.02      ]
Reward: -1  Episode Reward:  26
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415   132.84553253]
------
Step:25, Action:West
State  276
Old Q Values:  [   5.30971424 -180.6        -424.74015627  230.70029166]
New Q values:  [   5.30971424 -180.6        -424.74015627  237.9043491 ]
Reward: -1  Episode Reward:  25
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 163.76668701 -289.59534477  487.41410814 -180.6       ]
------
Step:26, Action:East
State  261
Old Q Values:  [ 163.76668701 -289.59534477  487.41410814 -180.6       ]
New Q values:  [ 163.76668701 -289.59534477  234.21930301 -180.6       ]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415   132.84553253]
------
Step:27, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415   132.84553253]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415   122.80400391]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 163.76668701 -289.59534477  234.21930301 -180.6       ]
------
Step:28, Action:North
State  261
Old Q Values:  [ 163.76668701 -289.59534477  234.21930301 -180.6       ]
New Q values:  [ 181.17859187 -289.59534477  234.21930301 -180.6       ]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  260.86610305 387.57305688   0.        ]
------
Step:29, Action:East
State  183
Old Q Values:  [  6.1762476  260.86610305 387.57305688   0.        ]
New Q values:  [  6.1762476  260.86610305 291.1031911    0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[ 38.85388605 455.57989448  21.35538099   0.        ]
------
Step:30, Action:South
State  193
Old Q Values:  [-5922.26708831   200.59528358 -9485.76780054     0.        ]
New Q values:  [-5922.26708831   398.63975728 -9485.76780054     0.        ]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  53.24162309    0.          -63.86984116 1063.33881284]
------
Step:31, Action:West
State  273
Old Q Values:  [  53.24162309    0.          -63.86984116 1063.33881284]
New Q values:  [ 53.24162309   0.         -63.86984116 495.00131604]
Reward: -1  Episode Reward:  19
xxxxx
x ..x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 181.17859187 -289.59534477  234.21930301 -180.6       ]
------
Step:32, Action:East
State  261
Old Q Values:  [ 181.17859187 -289.59534477  234.21930301 -180.6       ]
New Q values:  [  181.17859187  -289.59534477 -5870.07107762  -180.6       ]
Reward: -10001  Episode Reward:  -9982
xxxxx
x ..x
x   x
x g x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6360.94821148 -8656.02923281 -7525.7277781   -919.90603349]
------
Step:1, Action:West
State  288
Old Q Values:  [-6360.94821148 -8656.02923281 -7525.7277781   -919.90603349]
New Q values:  [-6360.94821148 -8656.02923281 -7525.7277781   -325.72121222]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415   122.80400391]
------
Step:2, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116 495.00131604]
New Q values:  [ 53.24162309   0.         -63.86984116 257.75410398]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  181.17859187  -289.59534477 -5870.07107762  -180.6       ]
------
Step:3, Action:North
State  261
Old Q Values:  [  181.17859187  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  125.78555054  -289.59534477 -5870.07107762  -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268  159.71371265   83.7733668  -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [   6.98192268  159.71371265   83.7733668  -180.6       ]
New Q values:  [   6.98192268  101.02115022   83.7733668  -180.6       ]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  125.78555054  -289.59534477 -5870.07107762  -180.6       ]
------
Step:5, Action:North
State  261
Old Q Values:  [  125.78555054  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  137.04517755  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  260.86610305 291.1031911    0.        ]
------
Step:6, Action:East
State  183
Old Q Values:  [  6.1762476  260.86610305 291.1031911    0.        ]
New Q values:  [   6.1762476   260.86610305 1764.60507411    0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  22.48535485 1274.14490661 5475.87932557    0.        ]
------
Step:7, Action:East
State  193
Old Q Values:  [-5922.26708831   398.63975728 -9485.76780054     0.        ]
New Q values:  [-5922.26708831   398.63975728 -9791.50993282     0.        ]
Reward: -9991  Episode Reward:  -9957
xxxxx
x ..x
x  gx
x   x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6           2.31371161]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6           2.31371161]
New Q values:  [-180.6        -239.26767529 -180.6          27.40693964]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6           70.27151667     9.3155532 ]
------
Step:2, Action:East
State  123
Old Q Values:  [ -253.44886264 -6000.6           70.27151667     9.3155532 ]
New Q values:  [ -253.44886264 -6000.6           35.73068856     9.3155532 ]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6          27.40693964]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6          27.40693964]
New Q values:  [-180.6        -239.26767529 -180.6          21.08198243]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6           35.73068856     9.3155532 ]
------
Step:4, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  2.96198060e+00  5.23693402e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  6.90938697e+00  5.23693402e+00]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6          21.08198243]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6          21.08198243]
New Q values:  [-180.6        -239.26767529 -180.6          18.55199954]
Reward: -1  Episode Reward:  5
xxxxx
x.a x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6           35.73068856     9.3155532 ]
------
Step:6, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  6.90938697e+00  5.23693402e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.72935465e+00  5.23693402e+00]
Reward: -1  Episode Reward:  4
xxxxx
x. ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6          18.55199954]
------
Step:7, Action:West
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6          18.55199954]
New Q values:  [-180.6        -239.26767529 -180.6          17.54000638]
Reward: -1  Episode Reward:  3
xxxxx
x.a x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6           35.73068856     9.3155532 ]
------
Step:8, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.72935465e+00  5.23693402e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.75374377e+00  5.23693402e+00]
Reward: -1  Episode Reward:  2
xxxxx
x. ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -239.26767529 -180.6          17.54000638]
------
Step:9, Action:West
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6          17.54000638]
New Q values:  [-180.6        -239.26767529 -180.6           8.74212569]
Reward: -1  Episode Reward:  1
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.75374377e+00  5.23693402e+00]
------
Step:10, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686          73.32792098   -966.76649461]
New Q values:  [-10156.11771313  -5995.686          32.39339681   -966.76649461]
Reward: -1  Episode Reward:  0
xxxxx
x.gax
x...x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           12.20742806  -179.38454759 -3312.21656119]
------
Step:11, Action:South
State  138
Old Q Values:  [-180.6        -239.26767529 -180.6           8.74212569]
New Q values:  [-180.6         -92.90988272 -180.6           8.74212569]
Reward: 9  Episode Reward:  9
xxxxx
x.  x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   -8.67604202 -2055.11156703 -2651.70614553 -3179.37964193]
------
Step:12, Action:North
State  208
Old Q Values:  [   -8.67604202 -2055.11156703 -2651.70614553 -3179.37964193]
New Q values:  [-1.44777910e+00 -2.05511157e+03 -2.65170615e+03 -3.17937964e+03]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -92.90988272 -180.6           8.74212569]
------
Step:13, Action:West
State  136
Old Q Values:  [-6180.6           12.20742806  -179.38454759 -3312.21656119]
New Q values:  [-6180.6           12.20742806  -179.38454759 -1315.76860544]
Reward: -1  Episode Reward:  7
xxxxx
xga x
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686          32.39339681   -966.76649461]
------
Step:14, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.75374377e+00  5.23693402e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  5.12413522e+00  5.23693402e+00]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -92.90988272 -180.6           8.74212569]
------
Step:15, Action:West
State  136
Old Q Values:  [-6180.6           12.20742806  -179.38454759 -1315.76860544]
New Q values:  [-6180.6           12.20742806  -179.38454759  -517.18942313]
Reward: -1  Episode Reward:  5
xxxxx
xga x
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686          32.39339681   -966.76649461]
------
Step:16, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  5.12413522e+00  5.23693402e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.07229179e+00  5.23693402e+00]
Reward: -1  Episode Reward:  4
xxxxx
x. ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -92.90988272 -180.6           8.74212569]
------
Step:17, Action:West
State  138
Old Q Values:  [-180.6         -92.90988272 -180.6           8.74212569]
New Q values:  [-180.6         -92.90988272 -180.6           4.46793048]
Reward: -1  Episode Reward:  3
xxxxx
x.a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  4.07229179e+00  5.23693402e+00]
------
Step:18, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.07229179e+00  5.23693402e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.07229179e+00  1.08018889e+01]
Reward: 9  Episode Reward:  12
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.0237175     6.33703894 -252.78192178]
------
Step:19, Action:South
State  110
Old Q Values:  [-1.80600000e+02 -6.21338664e+03  2.76177191e+00  0.00000000e+00]
New Q values:  [ -180.6        -2318.42233338     2.76177191     0.        ]
Reward: 9  Episode Reward:  21
xxxxx
x   x
xa. x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[  0.           0.         538.44106874   0.        ]
------
Step:20, Action:East
State  190
Old Q Values:  [ 1.04129094  0.         -0.84        0.        ]
New Q values:  [1.04129094 0.         5.064      0.        ]
Reward: 9  Episode Reward:  30
xxxxx
x   x
x a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[0. 0. 0. 0.]
------
Step:21, Action:North
State  200
Old Q Values:  [ 2.47730124  0.         84.95798214 -0.84      ]
New Q values:  [ 3.63148715  0.         84.95798214 -0.84      ]
Reward: -1  Episode Reward:  29
xxxxx
x a x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  4.07229179e+00  1.08018889e+01]
------
Step:22, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686          32.39339681   -966.76649461]
New Q values:  [-10156.11771313  -5995.686          32.39339681  -6387.30659784]
Reward: -10001  Episode Reward:  -9972
xxxxx
xg  x
x   x
x. .x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6360.94821148 -8656.02923281 -7525.7277781   -325.72121222]
------
Step:1, Action:West
State  288
Old Q Values:  [-6360.94821148 -8656.02923281 -7525.7277781   -325.72121222]
New Q values:  [-6360.94821148 -8656.02923281 -7525.7277781    -47.5622537 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116 257.75410398]
------
Step:2, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116 257.75410398]
New Q values:  [ 53.24162309   0.         -63.86984116 149.61519485]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  137.04517755  -289.59534477 -5870.07107762  -180.6       ]
------
Step:3, Action:North
State  261
Old Q Values:  [  137.04517755  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  589.59959325  -289.59534477 -5870.07107762  -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xa .x
x g x
xxxxx
Step:4, Action:South
State  182
Old Q Values:  [  0.           0.         538.44106874   0.        ]
New Q values:  [    0.         -5969.29177534   538.44106874     0.        ]
Reward: -10001  Episode Reward:  -9974
xxxxx
x...x
x  .x
xg  x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -180.6            8.49412954 -2781.31337986  -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [ -180.6            8.49412954 -2781.31337986  -180.6       ]
New Q values:  [ -180.6           39.10399688 -2781.31337986  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x  gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.98192268  101.02115022   83.7733668  -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [   6.98192268  101.02115022   83.7733668  -180.6       ]
New Q values:  [   6.98192268  222.68833806   83.7733668  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  589.59959325  -289.59534477 -5870.07107762  -180.6       ]
------
Step:3, Action:North
State  261
Old Q Values:  [  589.59959325  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  764.62135953  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x  .x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[   6.1762476   260.86610305 1764.60507411    0.        ]
------
Step:4, Action:East
State  183
Old Q Values:  [   6.1762476   260.86610305 1764.60507411    0.        ]
New Q values:  [  6.1762476  260.86610305 711.24202964   0.        ]
Reward: 9  Episode Reward:  26
xxxxx
x  .x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.7887208e-01 -5.9946000e+03  0.0000000e+00  0.0000000e+00]
------
Step:5, Action:East
State  199
Old Q Values:  [  22.48535485 1274.14490661 5475.87932557    0.        ]
New Q values:  [  22.48535485 1274.14490661 2199.38074512    0.        ]
Reward: 9  Episode Reward:  35
xxxxx
x  .x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   2.06134572  -23.4318597  -180.6          12.09671629]
------
Step:6, Action:West
State  210
Old Q Values:  [   2.06134572  -23.4318597  -180.6          12.09671629]
New Q values:  [   2.06134572  -23.4318597  -180.6           4.23868652]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.7887208e-01 -5.9946000e+03  0.0000000e+00  0.0000000e+00]
------
Step:7, Action:East
State  199
Old Q Values:  [  22.48535485 1274.14490661 2199.38074512    0.        ]
New Q values:  [  22.48535485 1274.14490661  880.423904      0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   2.06134572  -23.4318597  -180.6           4.23868652]
------
Step:8, Action:West
State  216
Old Q Values:  [ 4.89678369e+01 -6.42117887e+02 -6.17035694e+03  6.08663514e+00]
New Q values:  [ 4.89678369e+01 -6.42117887e+02 -6.17035694e+03  1.91167606e+00]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[0.13244338 0.25674    0.         0.        ]
------
Step:9, Action:South
State  199
Old Q Values:  [  22.48535485 1274.14490661  880.423904      0.        ]
New Q values:  [  22.48535485 1424.04718165  880.423904      0.        ]
Reward: 9  Episode Reward:  41
xxxxx
x  .x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   5.41827       0.         3029.96406335    0.        ]
------
Step:10, Action:North
State  277
Old Q Values:  [   5.41827       0.         3029.96406335    0.        ]
New Q values:  [1.64433000e+00 0.00000000e+00 3.02996406e+03 0.00000000e+00]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[0.13244338 0.25674    0.         0.        ]
------
Step:11, Action:South
State  196
Old Q Values:  [-2469.90645144   444.70631545    72.14653931     0.        ]
New Q values:  [-2469.90645144   248.65383091    72.14653931     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   5.30971424 -180.6        -424.74015627  237.9043491 ]
------
Step:12, Action:West
State  276
Old Q Values:  [   5.30971424 -180.6        -424.74015627  237.9043491 ]
New Q values:  [   5.30971424 -180.6        -424.74015627  323.9481475 ]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  764.62135953  -289.59534477 -5870.07107762  -180.6       ]
------
Step:13, Action:North
State  261
Old Q Values:  [  764.62135953  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  518.62115271  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  37
xxxxx
x  .x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  260.86610305 711.24202964   0.        ]
------
Step:14, Action:East
State  181
Old Q Values:  [   6.98192268  222.68833806   83.7733668  -180.6       ]
New Q values:  [   6.98192268  222.68833806   32.98636872 -180.6       ]
Reward: -1  Episode Reward:  36
xxxxx
x  .x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[0.13244338 0.25674    0.         0.        ]
------
Step:15, Action:South
State  196
Old Q Values:  [-2469.90645144   248.65383091    72.14653931     0.        ]
New Q values:  [-2469.90645144   196.04597662    72.14653931     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   5.30971424 -180.6        -424.74015627  323.9481475 ]
------
Step:16, Action:West
State  276
Old Q Values:  [   5.30971424 -180.6        -424.74015627  323.9481475 ]
New Q values:  [   5.30971424 -180.6        -424.74015627  284.56560481]
Reward: -1  Episode Reward:  34
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  518.62115271  -289.59534477 -5870.07107762  -180.6       ]
------
Step:17, Action:North
State  260
Old Q Values:  [  104.36074887 -6457.4598       102.98827409 -6307.02      ]
New Q values:  [  128.45011602 -6457.4598       102.98827409 -6307.02      ]
Reward: -1  Episode Reward:  33
xxxxx
xg .x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3431.06190038  -244.53978092   291.01938823     0.        ]
------
Step:18, Action:East
State  181
Old Q Values:  [   6.98192268  222.68833806   32.98636872 -180.6       ]
New Q values:  [   6.98192268  222.68833806   71.40834047 -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x g.x
x a x
x  .x
xxxxx
Step:19, Action:West
State  205
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.         0.         0.         0.76958789]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   4.56529296    2.17469001 -951.34636588 -180.6       ]
------
Step:20, Action:North
State  181
Old Q Values:  [   6.98192268  222.68833806   71.40834047 -180.6       ]
New Q values:  [   2.19276907  222.68833806   71.40834047 -180.6       ]
Reward: -1  Episode Reward:  30
xxxxx
xag.x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[0. 0. 0. 0.]
------
Step:21, Action:North
State  109
Old Q Values:  [ -180.6           39.10399688 -2781.31337986  -180.6       ]
New Q values:  [ -241.10880094    39.10399688 -2781.31337986  -180.6       ]
Reward: -301  Episode Reward:  -271
xxxxx
xa gx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    39.10399688 -2781.31337986  -180.6       ]
------
Step:22, Action:South
State  101
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [ 0.         66.20650142  0.          0.        ]
Reward: -1  Episode Reward:  -272
xxxxx
x g.x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   2.19276907  222.68833806   71.40834047 -180.6       ]
------
Step:23, Action:South
State  181
Old Q Values:  [   2.19276907  222.68833806   71.40834047 -180.6       ]
New Q values:  [   2.19276907  244.06168104   71.40834047 -180.6       ]
Reward: -1  Episode Reward:  -273
xxxxx
x  .x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  518.62115271  -289.59534477 -5870.07107762  -180.6       ]
------
Step:24, Action:North
State  261
Old Q Values:  [  518.62115271  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  420.22106998  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  -274
xxxxx
x  .x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  260.86610305 711.24202964   0.        ]
------
Step:25, Action:East
State  183
Old Q Values:  [  6.1762476  260.86610305 711.24202964   0.        ]
New Q values:  [  6.1762476  260.86610305 711.11096635   0.        ]
Reward: -1  Episode Reward:  -275
xxxxx
x  .x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  22.48535485 1424.04718165  880.423904      0.        ]
------
Step:26, Action:South
State  198
Old Q Values:  [-2.7887208e-01 -5.9946000e+03  0.0000000e+00  0.0000000e+00]
New Q values:  [-2.78872080e-01 -8.31307032e+03  0.00000000e+00  0.00000000e+00]
Reward: -10001  Episode Reward:  -10276
xxxxx
x  .x
x   x
x g.x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6360.94821148 -8656.02923281 -7525.7277781    -47.5622537 ]
------
Step:1, Action:West
State  288
Old Q Values:  [-6360.94821148 -8656.02923281 -7525.7277781    -47.5622537 ]
New Q values:  [-6360.94821148 -8656.02923281 -7525.7277781     23.2162997 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415   122.80400391]
------
Step:2, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415   122.80400391]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415    93.05663637]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x. .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  128.45011602 -6457.4598       102.98827409 -6307.02      ]
------
Step:3, Action:North
State  260
Old Q Values:  [  128.45011602 -6457.4598       102.98827409 -6307.02      ]
New Q values:  [-5855.91413712 -6457.4598       102.98827409 -6307.02      ]
Reward: -9991  Episode Reward:  -9973
xxxxx
x...x
xg .x
x   x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[  0.38977704 103.91250343 123.9417184    0.        ]
------
Step:1, Action:East
State  200
Old Q Values:  [ 3.63148715  0.         84.95798214 -0.84      ]
New Q values:  [ 3.63148715  0.         54.07354392 -0.84      ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4.89678369e+01 -6.42117887e+02 -6.17035694e+03  1.91167606e+00]
------
Step:2, Action:North
State  216
Old Q Values:  [ 4.89678369e+01 -6.42117887e+02 -6.17035694e+03  1.91167606e+00]
New Q values:  [ 2.86493632e+01 -6.42117887e+02 -6.17035694e+03  1.91167606e+00]
Reward: 9  Episode Reward:  18
xxxxx
xg.ax
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           12.20742806  -179.38454759  -517.18942313]
------
Step:3, Action:South
State  138
Old Q Values:  [-180.6         -92.90988272 -180.6           4.46793048]
New Q values:  [-180.6         -29.16914414 -180.6           4.46793048]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.86493632e+01 -6.42117887e+02 -6.17035694e+03  1.91167606e+00]
------
Step:4, Action:North
State  210
Old Q Values:  [   2.06134572  -23.4318597  -180.6           4.23868652]
New Q values:  [   1.56491743  -23.4318597  -180.6           4.23868652]
Reward: -1  Episode Reward:  16
xxxxx
x .ax
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6           4.46793048]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6         -29.16914414 -180.6           4.46793048]
New Q values:  [-180.6         -29.16914414 -180.6          10.42773885]
Reward: 9  Episode Reward:  25
xxxxx
x a x
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  4.07229179e+00  1.08018889e+01]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.07229179e+00  1.08018889e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.07229179e+00  7.02787079e+00]
Reward: -1  Episode Reward:  24
xxxxx
xa  x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.0237175     6.33703894 -252.78192178]
------
Step:7, Action:South
State  107
Old Q Values:  [-252.35169558   11.0237175     6.33703894 -252.78192178]
New Q values:  [-252.35169558   13.17751122    6.33703894 -252.78192178]
Reward: 9  Episode Reward:  33
xxxxx
x   x
xa  x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[-0.11058345  0.         11.22674742  0.        ]
------
Step:8, Action:East
State  190
Old Q Values:  [1.04129094 0.         5.064      0.        ]
New Q values:  [1.04129094 0.         5.09673373 0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    12.23711243     0.        ]
------
Step:9, Action:East
State  202
Old Q Values:  [    0.         -5884.35407458    12.23711243     0.        ]
New Q values:  [    0.         -5884.35407458   181.84834406     0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  0.         591.84499697   0.           0.        ]
------
Step:10, Action:South
State  218
Old Q Values:  [  0.         591.84499697   0.           0.        ]
New Q values:  [  0.        249.1028887   0.          0.       ]
Reward: 9  Episode Reward:  40
xxxxx
x   x
x   x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6360.94821148 -8656.02923281 -7525.7277781     23.2162997 ]
------
Step:11, Action:West
State  288
Old Q Values:  [-6360.94821148 -8656.02923281 -7525.7277781     23.2162997 ]
New Q values:  [-6360.94821148 -8656.02923281 -7525.7277781  -5957.39648921]
Reward: -9991  Episode Reward:  -9951
xxxxx
x   x
x   x
x.g x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6           35.73068856     9.3155532 ]
------
Step:1, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.07229179e+00  7.02787079e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.01572384e+01  7.02787079e+00]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6          10.42773885]
------
Step:2, Action:West
State  136
Old Q Values:  [-6180.6           12.20742806  -179.38454759  -517.18942313]
New Q values:  [-6180.6           12.20742806  -179.38454759 -6197.75775021]
Reward: -10001  Episode Reward:  -9992
xxxxx
x.g x
x.. x
x...x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1.44777910e+00 -2.05511157e+03 -2.65170615e+03 -3.17937964e+03]
------
Step:1, Action:North
State  216
Old Q Values:  [ 2.86493632e+01 -6.42117887e+02 -6.17035694e+03  1.91167606e+00]
New Q values:  [ 1.99880669e+01 -6.42117887e+02 -6.17035694e+03  1.91167606e+00]
Reward: 9  Episode Reward:  9
xxxxx
x .ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6          10.42773885]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6         -29.16914414 -180.6          10.42773885]
New Q values:  [-180.6         -29.16914414 -180.6          12.61826705]
Reward: 9  Episode Reward:  18
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.01572384e+01  7.02787079e+00]
------
Step:3, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.01572384e+01  7.02787079e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.24837546e+00  7.02787079e+00]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6          12.61826705]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6         -29.16914414 -180.6          12.61826705]
New Q values:  [-180.6         -29.16914414 -180.6           6.62181946]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.24837546e+00  7.02787079e+00]
------
Step:5, Action:East
State  123
Old Q Values:  [ -253.44886264 -6000.6           35.73068856     9.3155532 ]
New Q values:  [ -253.44886264 -6000.6           15.67882126     9.3155532 ]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6           6.62181946]
------
Step:6, Action:West
State  136
Old Q Values:  [-6180.6           12.20742806  -179.38454759 -6197.75775021]
New Q values:  [-6180.6           12.20742806  -179.38454759 -2478.23658912]
Reward: -1  Episode Reward:  14
xxxxx
x agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.88652971e+03  4.88836988e+00]
------
Step:7, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686          32.39339681  -6387.30659784]
New Q values:  [-10156.11771313  -5995.686          32.39339681  -2543.79144007]
Reward: -1  Episode Reward:  13
xxxxx
xag x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    39.10399688 -2781.31337986  -180.6       ]
------
Step:8, Action:South
State  109
Old Q Values:  [ -241.10880094    39.10399688 -2781.31337986  -180.6       ]
New Q values:  [ -241.10880094    22.41118664 -2781.31337986  -180.6       ]
Reward: 9  Episode Reward:  22
xxxxx
x  gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   4.56529296    2.17469001 -951.34636588 -180.6       ]
------
Step:9, Action:North
State  189
Old Q Values:  [   4.56529296    2.17469001 -951.34636588 -180.6       ]
New Q values:  [   7.94947318    2.17469001 -951.34636588 -180.6       ]
Reward: -1  Episode Reward:  21
xxxxx
xag x
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    22.41118664 -2781.31337986  -180.6       ]
------
Step:10, Action:South
State  109
Old Q Values:  [ -241.10880094    22.41118664 -2781.31337986  -180.6       ]
New Q values:  [ -241.10880094    10.74931661 -2781.31337986  -180.6       ]
Reward: -1  Episode Reward:  20
xxxxx
x  gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   7.94947318    2.17469001 -951.34636588 -180.6       ]
------
Step:11, Action:North
State  189
Old Q Values:  [   7.94947318    2.17469001 -951.34636588 -180.6       ]
New Q values:  [  32.0991032     2.17469001 -951.34636588 -180.6       ]
Reward: -1  Episode Reward:  19
xxxxx
xa  x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[ 0.          3.43614185 98.39771311  0.        ]
------
Step:12, Action:East
State  111
Old Q Values:  [ 0.          3.43614185 98.39771311  0.        ]
New Q values:  [ 0.          3.43614185 43.46273162  0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6           15.67882126     9.3155532 ]
------
Step:13, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.24837546e+00  7.02787079e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.28589602e+00  7.02787079e+00]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6           6.62181946]
------
Step:14, Action:West
State  138
Old Q Values:  [-180.6         -29.16914414 -180.6           6.62181946]
New Q values:  [-180.6         -29.16914414 -180.6           6.75237416]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6           15.67882126     9.3155532 ]
------
Step:15, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.28589602e+00  7.02787079e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.14007066e+00  7.02787079e+00]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6           6.75237416]
------
Step:16, Action:West
State  138
Old Q Values:  [-180.6         -29.16914414 -180.6           6.75237416]
New Q values:  [-180.6         -29.16914414 -180.6           4.2093109 ]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.14007066e+00  7.02787079e+00]
------
Step:17, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.14007066e+00  7.02787079e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.14007066e+00  1.52499678e+01]
Reward: -1  Episode Reward:  13
xxxxx
xa  x
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[ 0.          3.43614185 43.46273162  0.        ]
------
Step:18, Action:East
State  111
Old Q Values:  [ 0.          3.43614185 43.46273162  0.        ]
New Q values:  [ 0.          3.43614185 21.48873903  0.        ]
Reward: -1  Episode Reward:  12
xxxxx
x a x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6           15.67882126     9.3155532 ]
------
Step:19, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.14007066e+00  1.52499678e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.91882153e+00  1.52499678e+01]
Reward: -1  Episode Reward:  11
xxxxx
x  ax
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6           4.2093109 ]
------
Step:20, Action:West
State  138
Old Q Values:  [-180.6         -29.16914414 -180.6           4.2093109 ]
New Q values:  [-180.6         -29.16914414 -180.6           5.6587147 ]
Reward: -1  Episode Reward:  10
xxxxx
x a x
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.91882153e+00  1.52499678e+01]
------
Step:21, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.91882153e+00  1.52499678e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.91882153e+00  6.32851869e+00]
Reward: -1  Episode Reward:  9
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -2318.42233338     2.76177191     0.        ]
------
Step:22, Action:East
State  110
Old Q Values:  [ -180.6        -2318.42233338     2.76177191     0.        ]
New Q values:  [ -180.6        -2318.42233338     2.40326437     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x a x
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.91882153e+00  6.32851869e+00]
------
Step:23, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.91882153e+00  6.32851869e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.91882153e+00  8.37802919e+00]
Reward: -1  Episode Reward:  7
xxxxx
xa  x
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[ 0.          3.43614185 21.48873903  0.        ]
------
Step:24, Action:East
State  111
Old Q Values:  [ 0.          3.43614185 21.48873903  0.        ]
New Q values:  [ 0.          3.43614185 10.50890437  0.        ]
Reward: -1  Episode Reward:  6
xxxxx
x a x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.91882153e+00  8.37802919e+00]
------
Step:25, Action:West
State  123
Old Q Values:  [ -253.44886264 -6000.6           15.67882126     9.3155532 ]
New Q values:  [ -253.44886264 -6000.6           15.67882126     6.27889259]
Reward: -1  Episode Reward:  5
xxxxx
xa  x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[ 0.          3.43614185 10.50890437  0.        ]
------
Step:26, Action:East
State  109
Old Q Values:  [ -241.10880094    10.74931661 -2781.31337986  -180.6       ]
New Q values:  [ -241.10880094    10.74931661 -1111.65884098  -180.6       ]
Reward: -1  Episode Reward:  4
xxxxx
x agx
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.88652971e+03  4.88836988e+00]
------
Step:27, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686          32.39339681  -2543.79144007]
New Q values:  [-10156.11771313  -5995.686          32.39339681  -1014.89178105]
Reward: -1  Episode Reward:  3
xxxxx
xag x
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    10.74931661 -1111.65884098  -180.6       ]
------
Step:28, Action:South
State  108
Old Q Values:  [-6.1806000e+03  8.2694784e+00  3.9370827e-01  0.0000000e+00]
New Q values:  [-6.18060000e+03  2.70779136e+00  3.93708270e-01  0.00000000e+00]
Reward: -1  Episode Reward:  2
xxxxx
xg  x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-1343.6616728   -755.35550854     0.             0.        ]
------
Step:29, Action:East
State  189
Old Q Values:  [  32.0991032     2.17469001 -951.34636588 -180.6       ]
New Q values:  [  32.0991032     2.17469001 -358.91648318 -180.6       ]
Reward: 9  Episode Reward:  11
xxxxx
x g x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 3.63148715  0.         54.07354392 -0.84      ]
------
Step:30, Action:East
State  201
Old Q Values:  [10.42162111  0.          0.          0.024     ]
New Q values:  [10.42162111  0.          5.39642008  0.024     ]
Reward: -1  Episode Reward:  10
xxxxx
x  gx
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.99880669e+01 -6.42117887e+02 -6.17035694e+03  1.91167606e+00]
------
Step:31, Action:West
State  216
Old Q Values:  [ 1.99880669e+01 -6.42117887e+02 -6.17035694e+03  1.91167606e+00]
New Q values:  [   19.98806692  -642.11788715 -6170.35693855    16.3867336 ]
Reward: -1  Episode Reward:  9
xxxxx
x g x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 3.63148715  0.         54.07354392 -0.84      ]
------
Step:32, Action:East
State  200
Old Q Values:  [ 3.63148715  0.         54.07354392 -0.84      ]
New Q values:  [ 3.63148715  0.         27.02583764 -0.84      ]
Reward: -1  Episode Reward:  8
xxxxx
xg  x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   19.98806692  -642.11788715 -6170.35693855    16.3867336 ]
------
Step:33, Action:North
State  216
Old Q Values:  [   19.98806692  -642.11788715 -6170.35693855    16.3867336 ]
New Q values:  [    9.09284118  -642.11788715 -6170.35693855    16.3867336 ]
Reward: -1  Episode Reward:  7
xxxxx
x  ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6           5.6587147 ]
------
Step:34, Action:West
State  136
Old Q Values:  [-6180.6           12.20742806  -179.38454759 -2478.23658912]
New Q values:  [-6180.6           12.20742806  -179.38454759  -982.17661661]
Reward: -1  Episode Reward:  6
xxxxx
xga x
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686          32.39339681  -1014.89178105]
------
Step:35, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.91882153e+00  8.37802919e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.86514302e+00  8.37802919e+00]
Reward: -1  Episode Reward:  5
xxxxx
x  ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6           5.6587147 ]
------
Step:36, Action:West
State  138
Old Q Values:  [-180.6         -29.16914414 -180.6           5.6587147 ]
New Q values:  [-180.6         -29.16914414 -180.6           4.17689464]
Reward: -1  Episode Reward:  4
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.86514302e+00  8.37802919e+00]
------
Step:37, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.86514302e+00  8.37802919e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.86514302e+00  6.70446504e+00]
Reward: -1  Episode Reward:  3
xxxxx
xa  x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   13.17751122    6.33703894 -252.78192178]
------
Step:38, Action:South
State  107
Old Q Values:  [-252.35169558   13.17751122    6.33703894 -252.78192178]
New Q values:  [-252.35169558    8.03902871    6.33703894 -252.78192178]
Reward: -1  Episode Reward:  2
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[-0.11058345  0.         11.22674742  0.        ]
------
Step:39, Action:East
State  187
Old Q Values:  [-0.11058345  0.         11.22674742  0.        ]
New Q values:  [-0.11058345  0.         58.44520219  0.        ]
Reward: -1  Episode Reward:  1
xxxxx
x   x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458   181.84834406     0.        ]
------
Step:40, Action:East
State  202
Old Q Values:  [    0.         -5884.35407458   181.84834406     0.        ]
New Q values:  [    0.         -5884.35407458   146.87020423     0.        ]
Reward: -1  Episode Reward:  0
xxxxx
x   x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  0.        249.1028887   0.          0.       ]
------
Step:41, Action:South
State  218
Old Q Values:  [  0.        249.1028887   0.          0.       ]
New Q values:  [    0.         -1682.17779128     0.             0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-6360.94821148 -8656.02923281 -7525.7277781  -5957.39648921]
------
Step:42, Action:North
State  288
Old Q Values:  [-6360.94821148 -8656.02923281 -7525.7277781  -5957.39648921]
New Q values:  [-2544.97928459 -8656.02923281 -7525.7277781  -5957.39648921]
Reward: -1  Episode Reward:  8
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[    0.         -1682.17779128     0.             0.        ]
------
Step:43, Action:North
State  218
Old Q Values:  [    0.         -1682.17779128     0.             0.        ]
New Q values:  [ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  0.00000000e+00]
Reward: -1  Episode Reward:  7
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6           4.17689464]
------
Step:44, Action:West
State  138
Old Q Values:  [-180.6         -29.16914414 -180.6           4.17689464]
New Q values:  [-180.6         -29.16914414 -180.6           3.08209737]
Reward: -1  Episode Reward:  6
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.86514302e+00  6.70446504e+00]
------
Step:45, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.86514302e+00  6.70446504e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.86514302e+00  4.49349463e+00]
Reward: -1  Episode Reward:  5
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    8.03902871    6.33703894 -252.78192178]
------
Step:46, Action:South
State  107
Old Q Values:  [-252.35169558    8.03902871    6.33703894 -252.78192178]
New Q values:  [-252.35169558    2.61561149    6.33703894 -252.78192178]
Reward: -1  Episode Reward:  4
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -2.39790111e+03 -1.78980000e+02]
------
Step:47, Action:South
State  189
Old Q Values:  [  32.0991032     2.17469001 -358.91648318 -180.6       ]
New Q values:  [  32.0991032   132.336197   -358.91648318 -180.6       ]
Reward: 9  Episode Reward:  13
xxxxx
x   x
x  gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  420.22106998  -289.59534477 -5870.07107762  -180.6       ]
------
Step:48, Action:North
State  261
Old Q Values:  [  420.22106998  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  207.18928709  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
x  gx
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  32.0991032   132.336197   -358.91648318 -180.6       ]
------
Step:49, Action:South
State  189
Old Q Values:  [  32.0991032   132.336197   -358.91648318 -180.6       ]
New Q values:  [  32.0991032   114.49126492 -358.91648318 -180.6       ]
Reward: -1  Episode Reward:  11
xxxxx
x   x
x  gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  207.18928709  -289.59534477 -5870.07107762  -180.6       ]
------
Step:50, Action:North
State  261
Old Q Values:  [  207.18928709  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  116.62309431  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x  gx
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  32.0991032   114.49126492 -358.91648318 -180.6       ]
------
Step:51, Action:South
State  189
Old Q Values:  [  32.0991032   114.49126492 -358.91648318 -180.6       ]
New Q values:  [  32.0991032    80.18343426 -358.91648318 -180.6       ]
Reward: -1  Episode Reward:  9
xxxxx
x g x
x   x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  116.62309431  -289.59534477 -5870.07107762  -180.6       ]
------
Step:52, Action:North
State  261
Old Q Values:  [  116.62309431  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [   70.104268    -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x  gx
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  32.0991032    80.18343426 -358.91648318 -180.6       ]
------
Step:53, Action:South
State  189
Old Q Values:  [  32.0991032    80.18343426 -358.91648318 -180.6       ]
New Q values:  [  32.0991032    52.50465411 -358.91648318 -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x g x
x   x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   70.104268    -289.59534477 -5870.07107762  -180.6       ]
------
Step:54, Action:North
State  260
Old Q Values:  [-5855.91413712 -6457.4598       102.98827409 -6307.02      ]
New Q values:  [-2342.96565485 -6457.4598       102.98827409 -6307.02      ]
Reward: -1  Episode Reward:  6
xxxxx
xg  x
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-1343.6616728   -755.35550854     0.             0.        ]
------
Step:55, Action:East
State  189
Old Q Values:  [  32.0991032    52.50465411 -358.91648318 -180.6       ]
New Q values:  [  32.0991032    52.50465411 -136.05884198 -180.6       ]
Reward: -1  Episode Reward:  5
xxxxx
x g x
x a x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 3.63148715  0.         27.02583764 -0.84      ]
------
Step:56, Action:East
State  200
Old Q Values:  [ 3.63148715  0.         27.02583764 -0.84      ]
New Q values:  [ 3.63148715  0.         15.12635514 -0.84      ]
Reward: -1  Episode Reward:  4
xxxxx
xg  x
x  ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[    9.09284118  -642.11788715 -6170.35693855    16.3867336 ]
------
Step:57, Action:West
State  216
Old Q Values:  [    9.09284118  -642.11788715 -6170.35693855    16.3867336 ]
New Q values:  [    9.09284118  -642.11788715 -6170.35693855    10.49259998]
Reward: -1  Episode Reward:  3
xxxxx
x   x
xga x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 3.63148715  0.         15.12635514 -0.84      ]
------
Step:58, Action:East
State  200
Old Q Values:  [ 3.63148715  0.         15.12635514 -0.84      ]
New Q values:  [ 3.63148715  0.          8.59832205 -0.84      ]
Reward: -1  Episode Reward:  2
xxxxx
xg  x
x  ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[    9.09284118  -642.11788715 -6170.35693855    10.49259998]
------
Step:59, Action:West
State  216
Old Q Values:  [    9.09284118  -642.11788715 -6170.35693855    10.49259998]
New Q values:  [    9.09284118  -642.11788715 -6170.35693855     6.17653661]
Reward: -1  Episode Reward:  1
xxxxx
x   x
xga x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 3.63148715  0.          8.59832205 -0.84      ]
------
Step:60, Action:East
State  200
Old Q Values:  [ 3.63148715  0.          8.59832205 -0.84      ]
New Q values:  [ 3.63148715  0.          5.56718117 -0.84      ]
Reward: -1  Episode Reward:  0
xxxxx
x   x
x gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[    9.09284118  -642.11788715 -6170.35693855     6.17653661]
------
Step:61, Action:North
State  216
Old Q Values:  [    9.09284118  -642.11788715 -6170.35693855     6.17653661]
New Q values:  [ 3.96176568e+00 -6.42117887e+02 -6.17035694e+03  6.17653661e+00]
Reward: -1  Episode Reward:  -1
xxxxx
x  ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6           3.08209737]
------
Step:62, Action:West
State  136
Old Q Values:  [-6180.6           12.20742806  -179.38454759  -982.17661661]
New Q values:  [-6180.6           12.20742806  -179.38454759  -383.7526276 ]
Reward: -1  Episode Reward:  -2
xxxxx
xga x
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686          32.39339681  -1014.89178105]
------
Step:63, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.86514302e+00  4.49349463e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.07068642e+00  4.49349463e+00]
Reward: -1  Episode Reward:  -3
xxxxx
x  ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6           3.08209737]
------
Step:64, Action:West
State  136
Old Q Values:  [-6180.6           12.20742806  -179.38454759  -383.7526276 ]
New Q values:  [-6180.6           12.20742806  -179.38454759  -144.383032  ]
Reward: -1  Episode Reward:  -4
xxxxx
xga x
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686          32.39339681  -1014.89178105]
------
Step:65, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686          32.39339681  -1014.89178105]
New Q values:  [-10156.11771313  -5995.686          16.01958714  -1014.89178105]
Reward: -1  Episode Reward:  -5
xxxxx
x gax
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           12.20742806  -179.38454759  -144.383032  ]
------
Step:66, Action:South
State  136
Old Q Values:  [-6180.6           12.20742806  -179.38454759  -144.383032  ]
New Q values:  [-6.18060000e+03  6.13593220e+00 -1.79384548e+02 -1.44383032e+02]
Reward: -1  Episode Reward:  -6
xxxxx
x  gx
x  ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.96176568e+00 -6.42117887e+02 -6.17035694e+03  6.17653661e+00]
------
Step:67, Action:West
State  216
Old Q Values:  [ 3.96176568e+00 -6.42117887e+02 -6.17035694e+03  6.17653661e+00]
New Q values:  [ 3.96176568e+00 -6.42117887e+02 -6.17035694e+03  4.99710098e+00]
Reward: -1  Episode Reward:  -7
xxxxx
x   x
x agx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[10.42162111  0.          5.39642008  0.024     ]
------
Step:68, Action:North
State  200
Old Q Values:  [ 3.63148715  0.          5.56718117 -0.84      ]
New Q values:  [ 2.20064325  0.          5.56718117 -0.84      ]
Reward: -1  Episode Reward:  -8
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  1.07068642e+00  4.49349463e+00]
------
Step:69, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.07068642e+00  4.49349463e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  1.07068642e+00  1.91837716e+00]
Reward: -1  Episode Reward:  -9
xxxxx
xa  x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -2318.42233338     2.40326437     0.        ]
------
Step:70, Action:East
State  108
Old Q Values:  [-6.18060000e+03  2.70779136e+00  3.93708270e-01  0.00000000e+00]
New Q values:  [-6.18060000e+03  2.70779136e+00  4.36335945e+00  0.00000000e+00]
Reward: -1  Episode Reward:  -10
xxxxx
xga x
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686          16.01958714  -1014.89178105]
------
Step:71, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  1.07068642e+00  1.91837716e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.52903778e-01  1.91837716e+00]
Reward: -1  Episode Reward:  -11
xxxxx
x  ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         -29.16914414 -180.6           3.08209737]
------
Step:72, Action:West
State  138
Old Q Values:  [-180.6         -29.16914414 -180.6           3.08209737]
New Q values:  [-180.6         -29.16914414 -180.6           1.2083521 ]
Reward: -1  Episode Reward:  -12
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.52903778e-01  1.91837716e+00]
------
Step:73, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.52903778e-01  1.91837716e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.52903778e-01  8.88330176e-01]
Reward: -1  Episode Reward:  -13
xxxxx
xa  x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -2318.42233338     2.40326437     0.        ]
------
Step:74, Action:East
State  110
Old Q Values:  [ -180.6        -2318.42233338     2.40326437     0.        ]
New Q values:  [-1.80600000e+02 -2.31842233e+03  6.27804801e-01  0.00000000e+00]
Reward: -1  Episode Reward:  -14
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.52903778e-01  8.88330176e-01]
------
Step:75, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.52903778e-01  8.88330176e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.52903778e-01 -5.63264892e-02]
Reward: -1  Episode Reward:  -15
xxxxx
xa  x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -2.31842233e+03  6.27804801e-01  0.00000000e+00]
------
Step:76, Action:East
State  110
Old Q Values:  [-1.80600000e+02 -2.31842233e+03  6.27804801e-01  0.00000000e+00]
New Q values:  [-1.80600000e+02 -2.31842233e+03 -1.23006946e-01  0.00000000e+00]
Reward: -1  Episode Reward:  -16
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.52903778e-01 -5.63264892e-02]
------
Step:77, Action:East
State  114
Old Q Values:  [-1.80600000e+02 -5.96189144e+03  1.24604403e+00  0.00000000e+00]
New Q values:  [-1.80600000e+02 -5.96189144e+03  3.89358179e-01  0.00000000e+00]
Reward: -1  Episode Reward:  -17
xxxxx
x  ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    1.63646856 -180.00807518    1.41054896]
------
Step:78, Action:South
State  138
Old Q Values:  [-180.6         -29.16914414 -180.6           1.2083521 ]
New Q values:  [-180.6         -10.76852736 -180.6           1.2083521 ]
Reward: -1  Episode Reward:  -18
xxxxx
x   x
x gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.96176568e+00 -6.42117887e+02 -6.17035694e+03  4.99710098e+00]
------
Step:79, Action:North
State  216
Old Q Values:  [ 3.96176568e+00 -6.42117887e+02 -6.17035694e+03  4.99710098e+00]
New Q values:  [ 2.82548593e+00 -6.42117887e+02 -6.17035694e+03  4.99710098e+00]
Reward: -1  Episode Reward:  -19
xxxxx
x gax
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  6.13593220e+00 -1.79384548e+02 -1.44383032e+02]
------
Step:80, Action:South
State  136
Old Q Values:  [-6.18060000e+03  6.13593220e+00 -1.79384548e+02 -1.44383032e+02]
New Q values:  [-6.18060000e+03  3.35350317e+00 -1.79384548e+02 -1.44383032e+02]
Reward: -1  Episode Reward:  -20
xxxxx
x  gx
x  ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.82548593e+00 -6.42117887e+02 -6.17035694e+03  4.99710098e+00]
------
Step:81, Action:West
State  216
Old Q Values:  [ 2.82548593e+00 -6.42117887e+02 -6.17035694e+03  4.99710098e+00]
New Q values:  [ 2.82548593e+00 -6.42117887e+02 -6.17035694e+03  4.52532672e+00]
Reward: -1  Episode Reward:  -21
xxxxx
x   x
x agx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[10.42162111  0.          5.39642008  0.024     ]
------
Step:82, Action:North
State  200
Old Q Values:  [ 2.20064325  0.          5.56718117 -0.84      ]
New Q values:  [ 0.50612843  0.          5.56718117 -0.84      ]
Reward: -1  Episode Reward:  -22
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.52903778e-01 -5.63264892e-02]
------
Step:83, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686          16.01958714  -1014.89178105]
New Q values:  [-1.01561177e+04 -5.99568600e+03  6.81388581e+00 -1.01489178e+03]
Reward: -1  Episode Reward:  -23
xxxxx
x gax
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  3.35350317e+00 -1.79384548e+02 -1.44383032e+02]
------
Step:84, Action:South
State  138
Old Q Values:  [-180.6         -10.76852736 -180.6           1.2083521 ]
New Q values:  [-180.6          -3.54981293 -180.6           1.2083521 ]
Reward: -1  Episode Reward:  -24
xxxxx
x   x
x gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.82548593e+00 -6.42117887e+02 -6.17035694e+03  4.52532672e+00]
------
Step:85, Action:North
State  210
Old Q Values:  [   1.56491743  -23.4318597  -180.6           4.23868652]
New Q values:  [   0.51690754  -23.4318597  -180.6           4.23868652]
Reward: -1  Episode Reward:  -25
xxxxx
x  ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    1.63646856 -180.00807518    1.41054896]
------
Step:86, Action:South
State  138
Old Q Values:  [-180.6          -3.54981293 -180.6           1.2083521 ]
New Q values:  [-180.6          -1.82400465 -180.6           1.2083521 ]
Reward: -1  Episode Reward:  -26
xxxxx
x   x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  0.00000000e+00]
------
Step:87, Action:North
State  216
Old Q Values:  [ 2.82548593e+00 -6.42117887e+02 -6.17035694e+03  4.52532672e+00]
New Q values:  [ 8.92700002e-01 -6.42117887e+02 -6.17035694e+03  4.52532672e+00]
Reward: -1  Episode Reward:  -27
xxxxx
x  ax
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -1.82400465 -180.6           1.2083521 ]
------
Step:88, Action:West
State  138
Old Q Values:  [-180.6          -1.82400465 -180.6           1.2083521 ]
New Q values:  [-1.80600000e+02 -1.82400465e+00 -1.80600000e+02  1.09211972e-01]
Reward: -1  Episode Reward:  -28
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.52903778e-01 -5.63264892e-02]
------
Step:89, Action:East
State  114
Old Q Values:  [-1.80600000e+02 -5.96189144e+03  3.89358179e-01  0.00000000e+00]
New Q values:  [-1.80600000e+02 -5.96189144e+03  4.66838381e-02  0.00000000e+00]
Reward: -1  Episode Reward:  -29
xxxxx
x  ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    1.63646856 -180.00807518    1.41054896]
------
Step:90, Action:South
State  138
Old Q Values:  [-1.80600000e+02 -1.82400465e+00 -1.80600000e+02  1.09211972e-01]
New Q values:  [-1.80600000e+02 -1.13368134e+00 -1.80600000e+02  1.09211972e-01]
Reward: -1  Episode Reward:  -30
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  0.00000000e+00]
------
Step:91, Action:North
State  216
Old Q Values:  [ 8.92700002e-01 -6.42117887e+02 -6.17035694e+03  4.52532672e+00]
New Q values:  [-2.10156408e-01 -6.42117887e+02 -6.17035694e+03  4.52532672e+00]
Reward: -1  Episode Reward:  -31
xxxxx
x  ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -1.13368134e+00 -1.80600000e+02  1.09211972e-01]
------
Step:92, Action:West
State  138
Old Q Values:  [-1.80600000e+02 -1.13368134e+00 -1.80600000e+02  1.09211972e-01]
New Q values:  [-180.6          -1.13368134 -180.6          -0.33044408]
Reward: -1  Episode Reward:  -32
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.52903778e-01 -5.63264892e-02]
------
Step:93, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.52903778e-01 -5.63264892e-02]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -3.97971712e-01 -5.63264892e-02]
Reward: -1  Episode Reward:  -33
xxxxx
x  ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -1.13368134 -180.6          -0.33044408]
------
Step:94, Action:West
State  138
Old Q Values:  [-180.6          -1.13368134 -180.6          -0.33044408]
New Q values:  [-180.6          -1.13368134 -180.6          -0.74907558]
Reward: -1  Episode Reward:  -34
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -3.97971712e-01 -5.63264892e-02]
------
Step:95, Action:West
State  114
Old Q Values:  [-1.80600000e+02 -5.96189144e+03  4.66838381e-02  0.00000000e+00]
New Q values:  [-1.80600000e+02 -5.96189144e+03  4.66838381e-02 -6.00000000e-01]
Reward: -1  Episode Reward:  -35
xxxxx
xa  x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SE
[ 0.    -0.936  0.     0.   ]
------
Step:96, Action:North
State  111
Old Q Values:  [ 0.          3.43614185 10.50890437  0.        ]
New Q values:  [-177.44732869    3.43614185   10.50890437    0.        ]
Reward: -301  Episode Reward:  -336
xxxxx
xa  x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869    3.43614185   10.50890437    0.        ]
------
Step:97, Action:East
State  111
Old Q Values:  [-177.44732869    3.43614185   10.50890437    0.        ]
New Q values:  [-177.44732869    3.43614185    8.30720813    0.        ]
Reward: -1  Episode Reward:  -337
xxxxx
x a x
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6           15.67882126     6.27889259]
------
Step:98, Action:East
State  121
Old Q Values:  [ 0.00000000e+00  0.00000000e+00 -9.88652971e+03  4.88836988e+00]
New Q values:  [ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
Reward: -10001  Episode Reward:  -10338
xxxxx
x  gx
x   x
x . x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -2.31842233e+03 -1.23006946e-01  0.00000000e+00]
------
Step:1, Action:West
State  111
Old Q Values:  [-177.44732869    3.43614185    8.30720813    0.        ]
New Q values:  [-177.44732869    3.43614185    8.30720813 -178.10783756]
Reward: -301  Episode Reward:  -301
xxxxx
xa..x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869    3.43614185    8.30720813 -178.10783756]
------
Step:2, Action:East
State  111
Old Q Values:  [-177.44732869    3.43614185    8.30720813 -178.10783756]
New Q values:  [-177.44732869    3.43614185   13.42652963 -178.10783756]
Reward: 9  Episode Reward:  -292
xxxxx
x a.x
x...x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -6000.6           15.67882126     6.27889259]
------
Step:3, Action:East
State  123
Old Q Values:  [ -253.44886264 -6000.6           15.67882126     6.27889259]
New Q values:  [ -253.44886264 -6000.6           11.44680583     6.27889259]
Reward: 9  Episode Reward:  -283
xxxxx
x  ax
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -1.13368134 -180.6          -0.74907558]
------
Step:4, Action:West
State  136
Old Q Values:  [-6.18060000e+03  3.35350317e+00 -1.79384548e+02 -1.44383032e+02]
New Q values:  [-6.18060000e+03  3.35350317e+00 -1.79384548e+02 -5.68867018e+01]
Reward: -1  Episode Reward:  -284
xxxxx
x agx
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -6000.6           11.44680583     6.27889259]
New Q values:  [-2.53448863e+02 -6.00060000e+03  1.14468058e+01  5.93951593e+00]
Reward: -1  Episode Reward:  -285
xxxxx
xa  x
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869    3.43614185   13.42652963 -178.10783756]
------
Step:6, Action:East
State  111
Old Q Values:  [-177.44732869    3.43614185   13.42652963 -178.10783756]
New Q values:  [-177.44732869    3.43614185    4.7537139  -178.10783756]
Reward: -1  Episode Reward:  -286
xxxxx
x a x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -3.97971712e-01 -5.63264892e-02]
------
Step:7, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -3.97971712e-01 -5.63264892e-02]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -3.97971712e-01 -6.22530596e-01]
Reward: -1  Episode Reward:  -287
xxxxx
xa  x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -2.31842233e+03 -1.23006946e-01  0.00000000e+00]
------
Step:8, Action:West
State  111
Old Q Values:  [-177.44732869    3.43614185    4.7537139  -178.10783756]
New Q values:  [-177.44732869    3.43614185    4.7537139  -250.41702085]
Reward: -301  Episode Reward:  -588
xxxxx
xa  x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869    3.43614185    4.7537139  -250.41702085]
------
Step:9, Action:East
State  109
Old Q Values:  [ -241.10880094    10.74931661 -1111.65884098  -180.6       ]
New Q values:  [ -241.10880094    10.74931661 -6443.21937065  -180.6       ]
Reward: -10001  Episode Reward:  -10589
xxxxx
x g x
x...x
x ..x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116 149.61519485]
------
Step:1, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116 149.61519485]
New Q values:  [ 53.24162309   0.         -63.86984116  86.27735834]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   70.104268    -289.59534477 -5870.07107762  -180.6       ]
------
Step:2, Action:North
State  261
Old Q Values:  [   70.104268    -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  106.66021151  -289.59534477 -5870.07107762  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   2.19276907  244.06168104   71.40834047 -180.6       ]
------
Step:3, Action:South
State  181
Old Q Values:  [   2.19276907  244.06168104   71.40834047 -180.6       ]
New Q values:  [   2.19276907  129.02273587   71.40834047 -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  106.66021151  -289.59534477 -5870.07107762  -180.6       ]
------
Step:4, Action:North
State  261
Old Q Values:  [  106.66021151  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  255.39737451  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  260.86610305 711.11096635   0.        ]
------
Step:5, Action:East
State  177
Old Q Values:  [   0.         8289.641515   3995.09020427    0.        ]
New Q values:  [   0.         8289.641515   1723.02800889    0.        ]
Reward: 9  Episode Reward:  25
xxxxx
x.. x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   398.63975728 -9791.50993282     0.        ]
------
Step:6, Action:South
State  193
Old Q Values:  [-5922.26708831   398.63975728 -9791.50993282     0.        ]
New Q values:  [-5922.26708831   184.73911042 -9791.50993282     0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116  86.27735834]
------
Step:7, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116  86.27735834]
New Q values:  [ 53.24162309   0.         -63.86984116 110.53015569]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  255.39737451  -289.59534477 -5870.07107762  -180.6       ]
------
Step:8, Action:North
State  261
Old Q Values:  [  255.39737451  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  140.26577057  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   2.19276907  129.02273587   71.40834047 -180.6       ]
------
Step:9, Action:South
State  177
Old Q Values:  [   0.         8289.641515   1723.02800889    0.        ]
New Q values:  [   0.         3357.33633717 1723.02800889    0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  140.26577057  -289.59534477 -5870.07107762  -180.6       ]
------
Step:10, Action:North
State  261
Old Q Values:  [  140.26577057  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [   94.21312899  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  20
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   2.19276907  129.02273587   71.40834047 -180.6       ]
------
Step:11, Action:South
State  177
Old Q Values:  [   0.         3357.33633717 1723.02800889    0.        ]
New Q values:  [   0.         1370.59847356 1723.02800889    0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x.g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   94.21312899  -289.59534477 -5870.07107762  -180.6       ]
------
Step:12, Action:North
State  261
Old Q Values:  [   94.21312899  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [   75.79207236  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  18
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   2.19276907  129.02273587   71.40834047 -180.6       ]
------
Step:13, Action:South
State  181
Old Q Values:  [   2.19276907  129.02273587   71.40834047 -180.6       ]
New Q values:  [   2.19276907   73.74671605   71.40834047 -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x..gx
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   75.79207236  -289.59534477 -5870.07107762  -180.6       ]
------
Step:14, Action:North
State  261
Old Q Values:  [   75.79207236  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  546.62523161  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         1370.59847356 1723.02800889    0.        ]
------
Step:15, Action:East
State  181
Old Q Values:  [   2.19276907   73.74671605   71.40834047 -180.6       ]
New Q values:  [ 2.19276907e+00  7.37467161e+01 -5.93485415e+03 -1.80600000e+02]
Reward: -10001  Episode Reward:  -9985
xxxxx
x.. x
x g.x
x  .x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  260.86610305 711.11096635   0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [  6.1762476  260.86610305 711.11096635   0.        ]
New Q values:  [  6.1762476  260.86610305 426.51835489   0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[ 38.85388605 455.57989448  21.35538099   0.        ]
------
Step:2, Action:South
State  194
Old Q Values:  [-0.6         2.56078987 18.45297824  0.        ]
New Q values:  [-6.00000000e-01 -5.97165869e+03  1.84529782e+01  0.00000000e+00]
Reward: -10001  Episode Reward:  -9992
xxxxx
x...x
x  .x
x.g.x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    10.74931661 -6443.21937065  -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869    3.43614185    4.7537139  -250.41702085]
New Q values:  [-177.44732869   28.89847156    4.7537139  -250.41702085]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  7.37467161e+01 -5.93485415e+03 -1.80600000e+02]
------
Step:2, Action:South
State  183
Old Q Values:  [  6.1762476  260.86610305 426.51835489   0.        ]
New Q values:  [  6.1762476  273.7340107  426.51835489   0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x . x
x ..x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  546.62523161  -289.59534477 -5870.07107762  -180.6       ]
------
Step:3, Action:North
State  261
Old Q Values:  [  546.62523161  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  346.00559911  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  273.7340107  426.51835489   0.        ]
------
Step:4, Action:East
State  183
Old Q Values:  [  6.1762476  273.7340107  426.51835489   0.        ]
New Q values:  [  6.1762476 273.7340107 312.6813103   0.       ]
Reward: 9  Episode Reward:  26
xxxxx
x . x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[ 38.85388605 455.57989448  21.35538099   0.        ]
------
Step:5, Action:South
State  193
Old Q Values:  [-5922.26708831   184.73911042 -9791.50993282     0.        ]
New Q values:  [-5922.26708831   112.45469087 -9791.50993282     0.        ]
Reward: 9  Episode Reward:  35
xxxxx
x . x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116 110.53015569]
------
Step:6, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116 110.53015569]
New Q values:  [ 53.24162309   0.         -63.86984116 147.41374201]
Reward: -1  Episode Reward:  34
xxxxx
x . x
x  .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  346.00559911  -289.59534477 -5870.07107762  -180.6       ]
------
Step:7, Action:North
State  261
Old Q Values:  [  346.00559911  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  231.60663273  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  33
xxxxx
x . x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476 273.7340107 312.6813103   0.       ]
------
Step:8, Action:East
State  181
Old Q Values:  [ 2.19276907e+00  7.37467161e+01 -5.93485415e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  7.37467161e+01 -8.33735914e+03 -1.80600000e+02]
Reward: -10001  Episode Reward:  -9968
xxxxx
x . x
x g.x
x  .x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -2.31842233e+03 -1.23006946e-01  0.00000000e+00]
------
Step:1, Action:West
State  111
Old Q Values:  [-177.44732869   28.89847156    4.7537139  -250.41702085]
New Q values:  [-177.44732869   28.89847156    4.7537139  -272.09726687]
Reward: -301  Episode Reward:  -301
xxxxx
xa..x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   28.89847156    4.7537139  -272.09726687]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869   28.89847156    4.7537139  -272.09726687]
New Q values:  [-177.44732869   39.08340344    4.7537139  -272.09726687]
Reward: 9  Episode Reward:  -292
xxxxx
x ..x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  7.37467161e+01 -8.33735914e+03 -1.80600000e+02]
------
Step:3, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  7.37467161e+01 -8.33735914e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  9.83806762e+01 -8.33735914e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -293
xxxxx
x g.x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  231.60663273  -289.59534477 -5870.07107762  -180.6       ]
------
Step:4, Action:North
State  261
Old Q Values:  [  231.60663273  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  121.55685597  -289.59534477 -5870.07107762  -180.6       ]
Reward: -1  Episode Reward:  -294
xxxxx
x ..x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  9.83806762e+01 -8.33735914e+03 -1.80600000e+02]
------
Step:5, Action:South
State  183
Old Q Values:  [  6.1762476 273.7340107 312.6813103   0.       ]
New Q values:  [  6.1762476  145.36066107 312.6813103    0.        ]
Reward: -1  Episode Reward:  -295
xxxxx
x ..x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  121.55685597  -289.59534477 -5870.07107762  -180.6       ]
------
Step:6, Action:North
State  260
Old Q Values:  [-2342.96565485 -6457.4598       102.98827409 -6307.02      ]
New Q values:  [ -776.25394132 -6457.4598       102.98827409 -6307.02      ]
Reward: -1  Episode Reward:  -296
xxxxx
x ..x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   538.44106874     0.        ]
------
Step:7, Action:East
State  180
Old Q Values:  [-3431.06190038  -244.53978092   291.01938823     0.        ]
New Q values:  [-3431.06190038  -244.53978092   180.62154828     0.        ]
Reward: 9  Episode Reward:  -287
xxxxx
x ..x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   196.04597662    72.14653931     0.        ]
------
Step:8, Action:South
State  198
Old Q Values:  [-2.78872080e-01 -8.31307032e+03  0.00000000e+00  0.00000000e+00]
New Q values:  [-2.78872080e-01 -3.23445845e+03  0.00000000e+00  0.00000000e+00]
Reward: 9  Episode Reward:  -278
xxxxx
x ..x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   5.30971424 -180.6        -424.74015627  284.56560481]
------
Step:9, Action:North
State  276
Old Q Values:  [   5.30971424 -180.6        -424.74015627  284.56560481]
New Q values:  [   1.5238857  -180.6        -424.74015627  284.56560481]
Reward: -1  Episode Reward:  -279
xxxxx
x ..x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -3.23445845e+03  0.00000000e+00  0.00000000e+00]
------
Step:10, Action:East
State  195
Old Q Values:  [ 38.85388605 455.57989448  21.35538099   0.        ]
New Q values:  [ 38.85388605 455.57989448  15.21375835   0.        ]
Reward: 9  Episode Reward:  -270
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   0.51690754  -23.4318597  -180.6           4.23868652]
------
Step:11, Action:West
State  210
Old Q Values:  [   0.51690754  -23.4318597  -180.6           4.23868652]
New Q values:  [   0.51690754  -23.4318597  -180.6           6.63136808]
Reward: -1  Episode Reward:  -271
xxxxx
x ..x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -5.97165869e+03  1.84529782e+01  0.00000000e+00]
------
Step:12, Action:East
State  195
Old Q Values:  [ 38.85388605 455.57989448  15.21375835   0.        ]
New Q values:  [ 38.85388605 455.57989448   7.47491377   0.        ]
Reward: -1  Episode Reward:  -272
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   0.51690754  -23.4318597  -180.6           6.63136808]
------
Step:13, Action:West
State  208
Old Q Values:  [-1.44777910e+00 -2.05511157e+03 -2.65170615e+03 -3.17937964e+03]
New Q values:  [-1.44777910e+00 -2.05511157e+03 -2.65170615e+03 -1.23861545e+03]
Reward: -1  Episode Reward:  -273
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   112.45469087 -9791.50993282     0.        ]
------
Step:14, Action:South
State  192
Old Q Values:  [  0.38977704 103.91250343 123.9417184    0.        ]
New Q values:  [  0.38977704 126.33468282 123.9417184    0.        ]
Reward: -1  Episode Reward:  -274
xxxxx
x ..x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.5238857  -180.6        -424.74015627  284.56560481]
------
Step:15, Action:West
State  276
Old Q Values:  [   1.5238857  -180.6        -424.74015627  284.56560481]
New Q values:  [   1.5238857  -180.6        -424.74015627  149.69329871]
Reward: -1  Episode Reward:  -275
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  121.55685597  -289.59534477 -5870.07107762  -180.6       ]
------
Step:16, Action:North
State  260
Old Q Values:  [ -776.25394132 -6457.4598       102.98827409 -6307.02      ]
New Q values:  [ -256.91511204 -6457.4598       102.98827409 -6307.02      ]
Reward: -1  Episode Reward:  -276
xxxxx
xg..x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3431.06190038  -244.53978092   180.62154828     0.        ]
------
Step:17, Action:East
State  181
Old Q Values:  [ 2.19276907e+00  9.83806762e+01 -8.33735914e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  9.83806762e+01 -3.27672986e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -277
xxxxx
x g.x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   196.04597662    72.14653931     0.        ]
------
Step:18, Action:South
State  193
Old Q Values:  [-5922.26708831   112.45469087 -9791.50993282     0.        ]
New Q values:  [-5922.26708831    88.60599895 -9791.50993282     0.        ]
Reward: -1  Episode Reward:  -278
xxxxx
x .gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116 147.41374201]
------
Step:19, Action:West
State  276
Old Q Values:  [   1.5238857  -180.6        -424.74015627  149.69329871]
New Q values:  [   1.5238857  -180.6        -424.74015627   95.74437628]
Reward: -1  Episode Reward:  -279
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  121.55685597  -289.59534477 -5870.07107762  -180.6       ]
------
Step:20, Action:North
State  260
Old Q Values:  [ -256.91511204 -6457.4598       102.98827409 -6307.02      ]
New Q values:  [  -49.17958033 -6457.4598       102.98827409 -6307.02      ]
Reward: -1  Episode Reward:  -280
xxxxx
xg..x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3431.06190038  -244.53978092   180.62154828     0.        ]
------
Step:21, Action:East
State  180
Old Q Values:  [-3431.06190038  -244.53978092   180.62154828     0.        ]
New Q values:  [-3431.06190038  -244.53978092   109.54902416     0.        ]
Reward: -1  Episode Reward:  -281
xxxxx
x ..x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[  0.38977704 126.33468282 123.9417184    0.        ]
------
Step:22, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -5.97165869e+03  1.84529782e+01  0.00000000e+00]
New Q values:  [-6.00000000e-01 -2.36054016e+03  1.84529782e+01  0.00000000e+00]
Reward: -1  Episode Reward:  -282
xxxxx
x ..x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.5238857  -180.6        -424.74015627   95.74437628]
------
Step:23, Action:North
State  276
Old Q Values:  [   1.5238857  -180.6        -424.74015627   95.74437628]
New Q values:  [   5.54544775 -180.6        -424.74015627   95.74437628]
Reward: -1  Episode Reward:  -283
xxxxx
x ..x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  1.84529782e+01  0.00000000e+00]
------
Step:24, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -2.36054016e+03  1.84529782e+01  0.00000000e+00]
New Q values:  [-6.00000000e-01 -2.36054016e+03  8.77060172e+00  0.00000000e+00]
Reward: -1  Episode Reward:  -284
xxxxx
x ..x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   0.51690754  -23.4318597  -180.6           6.63136808]
------
Step:25, Action:West
State  208
Old Q Values:  [-1.44777910e+00 -2.05511157e+03 -2.65170615e+03 -1.23861545e+03]
New Q values:  [-1.44777910e+00 -2.05511157e+03 -2.65170615e+03 -4.58145775e+02]
Reward: -1  Episode Reward:  -285
xxxxx
x ..x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[  0.38977704 126.33468282 123.9417184    0.        ]
------
Step:26, Action:South
State  192
Old Q Values:  [  0.38977704 126.33468282 123.9417184    0.        ]
New Q values:  [  0.38977704  78.65718601 123.9417184    0.        ]
Reward: -1  Episode Reward:  -286
xxxxx
x ..x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   5.54544775 -180.6        -424.74015627   95.74437628]
------
Step:27, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627   95.74437628]
New Q values:  [   5.54544775 -180.6        -424.74015627   68.59423274]
Reward: -1  Episode Reward:  -287
xxxxx
x ..x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  -49.17958033 -6457.4598       102.98827409 -6307.02      ]
------
Step:28, Action:East
State  260
Old Q Values:  [  -49.17958033 -6457.4598       102.98827409 -6307.02      ]
New Q values:  [  -49.17958033 -6457.4598        61.17357946 -6307.02      ]
Reward: -1  Episode Reward:  -288
xxxxx
xg..x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   5.54544775 -180.6        -424.74015627   68.59423274]
------
Step:29, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627   68.59423274]
New Q values:  [   5.54544775 -180.6        -424.74015627   45.18976693]
Reward: -1  Episode Reward:  -289
xxxxx
x ..x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  -49.17958033 -6457.4598        61.17357946 -6307.02      ]
------
Step:30, Action:East
State  261
Old Q Values:  [  121.55685597  -289.59534477 -5870.07107762  -180.6       ]
New Q values:  [  121.55685597  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  -290
xxxxx
x ..x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   5.54544775 -180.6        -424.74015627   45.18976693]
------
Step:31, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627   45.18976693]
New Q values:  [   5.54544775 -180.6        -424.74015627   53.94296356]
Reward: -1  Episode Reward:  -291
xxxxx
x ..x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  121.55685597  -289.59534477 -2335.07150097  -180.6       ]
------
Step:32, Action:North
State  260
Old Q Values:  [  -49.17958033 -6457.4598        61.17357946 -6307.02      ]
New Q values:  [  141.26048849 -6457.4598        61.17357946 -6307.02      ]
Reward: -1  Episode Reward:  -292
xxxxx
x ..x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   538.44106874     0.        ]
------
Step:33, Action:East
State  183
Old Q Values:  [  6.1762476  145.36066107 312.6813103    0.        ]
New Q values:  [  6.1762476  145.36066107 127.10370464   0.        ]
Reward: -1  Episode Reward:  -293
xxxxx
x ..x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  8.77060172e+00  0.00000000e+00]
------
Step:34, Action:East
State  195
Old Q Values:  [ 38.85388605 455.57989448   7.47491377   0.        ]
New Q values:  [ 38.85388605 455.57989448   4.37937593   0.        ]
Reward: -1  Episode Reward:  -294
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   0.51690754  -23.4318597  -180.6           6.63136808]
------
Step:35, Action:West
State  208
Old Q Values:  [-1.44777910e+00 -2.05511157e+03 -2.65170615e+03 -4.58145775e+02]
New Q values:  [-1.44777910e+00 -2.05511157e+03 -2.65170615e+03 -1.57276510e+02]
Reward: -1  Episode Reward:  -295
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831    88.60599895 -9791.50993282     0.        ]
------
Step:36, Action:South
State  195
Old Q Values:  [ 38.85388605 455.57989448   4.37937593   0.        ]
New Q values:  [ 38.85388605 225.8560804    4.37937593   0.        ]
Reward: -1  Episode Reward:  -296
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116 147.41374201]
------
Step:37, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627   53.94296356]
New Q values:  [   5.54544775 -180.6        -424.74015627   57.44424221]
Reward: -1  Episode Reward:  -297
xxxxx
x ..x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  121.55685597  -289.59534477 -2335.07150097  -180.6       ]
------
Step:38, Action:North
State  260
Old Q Values:  [  141.26048849 -6457.4598        61.17357946 -6307.02      ]
New Q values:  [  217.43651602 -6457.4598        61.17357946 -6307.02      ]
Reward: -1  Episode Reward:  -298
xxxxx
x ..x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   538.44106874     0.        ]
------
Step:39, Action:East
State  180
Old Q Values:  [-3431.06190038  -244.53978092   109.54902416     0.        ]
New Q values:  [-3431.06190038  -244.53978092    80.40212518     0.        ]
Reward: -1  Episode Reward:  -299
xxxxx
x ..x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[  0.38977704  78.65718601 123.9417184    0.        ]
------
Step:40, Action:East
State  192
Old Q Values:  [  0.38977704  78.65718601 123.9417184    0.        ]
New Q values:  [ 0.38977704 78.65718601 48.54235363  0.        ]
Reward: -1  Episode Reward:  -300
xxxxx
xg..x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1.44777910e+00 -2.05511157e+03 -2.65170615e+03 -1.57276510e+02]
------
Step:41, Action:North
State  208
Old Q Values:  [-1.44777910e+00 -2.05511157e+03 -2.65170615e+03 -1.57276510e+02]
New Q values:  [    5.31182893 -2055.11156703 -2651.70614553  -157.2765103 ]
Reward: 9  Episode Reward:  -291
xxxxx
x .ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    1.63646856 -180.00807518    1.41054896]
------
Step:42, Action:South
State  130
Old Q Values:  [-274.86862066    1.63646856 -180.00807518    1.41054896]
New Q values:  [-274.86862066    2.04399785 -180.00807518    1.41054896]
Reward: -1  Episode Reward:  -292
xxxxx
x . x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   0.51690754  -23.4318597  -180.6           6.63136808]
------
Step:43, Action:West
State  208
Old Q Values:  [    5.31182893 -2055.11156703 -2651.70614553  -157.2765103 ]
New Q values:  [    5.31182893 -2055.11156703 -2651.70614553   -39.91344832]
Reward: -1  Episode Reward:  -293
xxxxx
x . x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 0.38977704 78.65718601 48.54235363  0.        ]
------
Step:44, Action:South
State  192
Old Q Values:  [ 0.38977704 78.65718601 48.54235363  0.        ]
New Q values:  [ 0.38977704 58.77986531 48.54235363  0.        ]
Reward: -1  Episode Reward:  -294
xxxxx
x . x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415    93.05663637]
------
Step:45, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116 147.41374201]
New Q values:  [ 53.24162309   0.         -63.86984116  94.83255359]
Reward: -1  Episode Reward:  -295
xxxxx
x . x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  121.55685597  -289.59534477 -2335.07150097  -180.6       ]
------
Step:46, Action:North
State  261
Old Q Values:  [  121.55685597  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   77.53694526  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  -296
xxxxx
x . x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  9.83806762e+01 -3.27672986e+03 -1.80600000e+02]
------
Step:47, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  9.83806762e+01 -3.27672986e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  6.20133541e+01 -3.27672986e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -297
xxxxx
x . x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   77.53694526  -289.59534477 -2335.07150097  -180.6       ]
------
Step:48, Action:North
State  261
Old Q Values:  [   77.53694526  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   49.01878433  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  -298
xxxxx
x .gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  6.20133541e+01 -3.27672986e+03 -1.80600000e+02]
------
Step:49, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  6.20133541e+01 -3.27672986e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  3.89109769e+01 -3.27672986e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -299
xxxxx
x . x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   49.01878433  -289.59534477 -2335.07150097  -180.6       ]
------
Step:50, Action:North
State  261
Old Q Values:  [   49.01878433  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   62.61571205  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  -300
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476  145.36066107 127.10370464   0.        ]
------
Step:51, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  3.89109769e+01 -3.27672986e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  3.37491044e+01 -3.27672986e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -301
xxxxx
x . x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   62.61571205  -289.59534477 -2335.07150097  -180.6       ]
------
Step:52, Action:North
State  261
Old Q Values:  [   62.61571205  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   34.57101614  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  -302
xxxxx
x . x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  3.37491044e+01 -3.27672986e+03 -1.80600000e+02]
------
Step:53, Action:South
State  183
Old Q Values:  [  6.1762476  145.36066107 127.10370464   0.        ]
New Q values:  [  6.1762476   67.91556927 127.10370464   0.        ]
Reward: -1  Episode Reward:  -303
xxxxx
x . x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   34.57101614  -289.59534477 -2335.07150097  -180.6       ]
------
Step:54, Action:North
State  261
Old Q Values:  [   34.57101614  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   51.35951785  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  -304
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476   67.91556927 127.10370464   0.        ]
------
Step:55, Action:East
State  181
Old Q Values:  [ 2.19276907e+00  3.37491044e+01 -3.27672986e+03 -1.80600000e+02]
New Q values:  [    2.19276907    33.74910439 -1284.71014613  -180.6       ]
Reward: -1  Episode Reward:  -305
xxxxx
x . x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831    88.60599895 -9791.50993282     0.        ]
------
Step:56, Action:South
State  192
Old Q Values:  [ 0.38977704 58.77986531 48.54235363  0.        ]
New Q values:  [ 0.38977704 50.82893704 48.54235363  0.        ]
Reward: -1  Episode Reward:  -306
xxxxx
x . x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415    93.05663637]
------
Step:57, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116  94.83255359]
New Q values:  [ 53.24162309   0.         -63.86984116  52.74087679]
Reward: -1  Episode Reward:  -307
xxxxx
x . x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   51.35951785  -289.59534477 -2335.07150097  -180.6       ]
------
Step:58, Action:North
State  261
Old Q Values:  [   51.35951785  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   58.07491853  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  -308
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  6.1762476   67.91556927 127.10370464   0.        ]
------
Step:59, Action:East
State  183
Old Q Values:  [  6.1762476   67.91556927 127.10370464   0.        ]
New Q values:  [ 6.1762476  67.91556927 52.87266237  0.        ]
Reward: -1  Episode Reward:  -309
xxxxx
x . x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  8.77060172e+00  0.00000000e+00]
------
Step:60, Action:East
State  192
Old Q Values:  [ 0.38977704 50.82893704 48.54235363  0.        ]
New Q values:  [ 0.38977704 50.82893704 20.41049013  0.        ]
Reward: -1  Episode Reward:  -310
xxxxx
x . x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    5.31182893 -2055.11156703 -2651.70614553   -39.91344832]
------
Step:61, Action:North
State  216
Old Q Values:  [-2.10156408e-01 -6.42117887e+02 -6.17035694e+03  4.52532672e+00]
New Q values:  [ 3.21988389e-01 -6.42117887e+02 -6.17035694e+03  4.52532672e+00]
Reward: -1  Episode Reward:  -311
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  3.35350317e+00 -1.79384548e+02 -5.68867018e+01]
------
Step:62, Action:South
State  128
Old Q Values:  [ 6067.82062533  2850.949675   -8652.84       11155.58059144]
New Q values:  [ 6067.82062533  1141.37341868 -8652.84       11155.58059144]
Reward: -1  Episode Reward:  -312
xxxxx
x .gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    5.31182893 -2055.11156703 -2651.70614553   -39.91344832]
------
Step:63, Action:West
State  216
Old Q Values:  [ 3.21988389e-01 -6.42117887e+02 -6.17035694e+03  4.52532672e+00]
New Q values:  [ 3.21988389e-01 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
Reward: -1  Episode Reward:  -313
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[0. 0. 0. 0.]
------
Step:64, Action:East
State  192
Old Q Values:  [ 0.38977704 50.82893704 20.41049013  0.        ]
New Q values:  [ 0.38977704 50.82893704  9.15774473  0.        ]
Reward: -1  Episode Reward:  -314
xxxxx
xg. x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[    5.31182893 -2055.11156703 -2651.70614553   -39.91344832]
------
Step:65, Action:North
State  208
Old Q Values:  [    5.31182893 -2055.11156703 -2651.70614553   -39.91344832]
New Q values:  [ 2.13793092e+00 -2.05511157e+03 -2.65170615e+03 -3.99134483e+01]
Reward: -1  Episode Reward:  -315
xxxxx
x .ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    2.04399785 -180.00807518    1.41054896]
------
Step:66, Action:South
State  130
Old Q Values:  [-274.86862066    2.04399785 -180.00807518    1.41054896]
New Q values:  [-274.86862066    2.20700956 -180.00807518    1.41054896]
Reward: -1  Episode Reward:  -316
xxxxx
x . x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   0.51690754  -23.4318597  -180.6           6.63136808]
------
Step:67, Action:West
State  208
Old Q Values:  [ 2.13793092e+00 -2.05511157e+03 -2.65170615e+03 -3.99134483e+01]
New Q values:  [ 2.13793092e+00 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
Reward: -1  Episode Reward:  -317
xxxxx
x . x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 0.38977704 50.82893704  9.15774473  0.        ]
------
Step:68, Action:South
State  192
Old Q Values:  [ 0.38977704 50.82893704  9.15774473  0.        ]
New Q values:  [ 0.38977704 47.64856573  9.15774473  0.        ]
Reward: -1  Episode Reward:  -318
xxxxx
x . x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415    93.05663637]
------
Step:69, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627   57.44424221]
New Q values:  [   5.54544775 -180.6        -424.74015627   39.80017244]
Reward: -1  Episode Reward:  -319
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   58.07491853  -289.59534477 -2335.07150097  -180.6       ]
------
Step:70, Action:North
State  260
Old Q Values:  [  217.43651602 -6457.4598        61.17357946 -6307.02      ]
New Q values:  [  110.49524396 -6457.4598        61.17357946 -6307.02      ]
Reward: -1  Episode Reward:  -320
xxxxx
xg. x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3431.06190038  -244.53978092    80.40212518     0.        ]
------
Step:71, Action:East
State  180
Old Q Values:  [-3431.06190038  -244.53978092    80.40212518     0.        ]
New Q values:  [-3431.06190038  -244.53978092    45.85541979     0.        ]
Reward: -1  Episode Reward:  -321
xxxxx
x . x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 0.38977704 47.64856573  9.15774473  0.        ]
------
Step:72, Action:South
State  192
Old Q Values:  [ 0.38977704 47.64856573  9.15774473  0.        ]
New Q values:  [ 0.38977704 46.3764172   9.15774473  0.        ]
Reward: -1  Episode Reward:  -322
xxxxx
x . x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415    93.05663637]
------
Step:73, Action:West
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116  52.74087679]
New Q values:  [ 53.24162309   0.         -63.86984116  37.91882628]
Reward: -1  Episode Reward:  -323
xxxxx
x . x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   58.07491853  -289.59534477 -2335.07150097  -180.6       ]
------
Step:74, Action:North
State  261
Old Q Values:  [   58.07491853  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   32.75469873  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  -324
xxxxx
x . x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[    2.19276907    33.74910439 -1284.71014613  -180.6       ]
------
Step:75, Action:South
State  181
Old Q Values:  [    2.19276907    33.74910439 -1284.71014613  -180.6       ]
New Q values:  [    2.19276907    22.72605137 -1284.71014613  -180.6       ]
Reward: -1  Episode Reward:  -325
xxxxx
x . x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   32.75469873  -289.59534477 -2335.07150097  -180.6       ]
------
Step:76, Action:North
State  261
Old Q Values:  [   32.75469873  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   19.3196949   -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  -326
xxxxx
x .gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[    2.19276907    22.72605137 -1284.71014613  -180.6       ]
------
Step:77, Action:South
State  189
Old Q Values:  [  32.0991032    52.50465411 -136.05884198 -180.6       ]
New Q values:  [  32.0991032    26.19777011 -136.05884198 -180.6       ]
Reward: -1  Episode Reward:  -327
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   19.3196949   -289.59534477 -2335.07150097  -180.6       ]
------
Step:78, Action:North
State  260
Old Q Values:  [  110.49524396 -6457.4598        61.17357946 -6307.02      ]
New Q values:  [   57.35472352 -6457.4598        61.17357946 -6307.02      ]
Reward: -1  Episode Reward:  -328
xxxxx
xg. x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3431.06190038  -244.53978092    45.85541979     0.        ]
------
Step:79, Action:East
State  180
Old Q Values:  [-3431.06190038  -244.53978092    45.85541979     0.        ]
New Q values:  [-3431.06190038  -244.53978092    31.65509308     0.        ]
Reward: -1  Episode Reward:  -329
xxxxx
x . x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 0.38977704 46.3764172   9.15774473  0.        ]
------
Step:80, Action:South
State  192
Old Q Values:  [ 0.38977704 46.3764172   9.15774473  0.        ]
New Q values:  [ 0.38977704 45.86755779  9.15774473  0.        ]
Reward: -1  Episode Reward:  -330
xxxxx
x . x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415    93.05663637]
------
Step:81, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415    93.05663637]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415    42.41856302]
Reward: -1  Episode Reward:  -331
xxxxx
x . x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   19.3196949   -289.59534477 -2335.07150097  -180.6       ]
------
Step:82, Action:North
State  260
Old Q Values:  [   57.35472352 -6457.4598        61.17357946 -6307.02      ]
New Q values:  [  183.87421003 -6457.4598        61.17357946 -6307.02      ]
Reward: -1  Episode Reward:  -332
xxxxx
x . x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   538.44106874     0.        ]
------
Step:83, Action:East
State  183
Old Q Values:  [ 6.1762476  67.91556927 52.87266237  0.        ]
New Q values:  [ 6.1762476  67.91556927 23.18024546  0.        ]
Reward: -1  Episode Reward:  -333
xxxxx
x . x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  8.77060172e+00  0.00000000e+00]
------
Step:84, Action:East
State  192
Old Q Values:  [ 0.38977704 45.86755779  9.15774473  0.        ]
New Q values:  [ 0.38977704 45.86755779  3.70447717  0.        ]
Reward: -1  Episode Reward:  -334
xxxxx
x . x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2.13793092e+00 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
------
Step:85, Action:North
State  210
Old Q Values:  [   0.51690754  -23.4318597  -180.6           6.63136808]
New Q values:  [   0.26886588  -23.4318597  -180.6           6.63136808]
Reward: -1  Episode Reward:  -335
xxxxx
x .ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    2.20700956 -180.00807518    1.41054896]
------
Step:86, Action:South
State  130
Old Q Values:  [-274.86862066    2.20700956 -180.00807518    1.41054896]
New Q values:  [-274.86862066    0.9241831  -180.00807518    1.41054896]
Reward: -1  Episode Reward:  -336
xxxxx
x . x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2.13793092e+00 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
------
Step:87, Action:North
State  208
Old Q Values:  [ 2.13793092e+00 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
New Q values:  [ 6.78337057e-01 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
Reward: -1  Episode Reward:  -337
xxxxx
x .ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-274.86862066    0.9241831  -180.00807518    1.41054896]
------
Step:88, Action:West
State  136
Old Q Values:  [-6.18060000e+03  3.35350317e+00 -1.79384548e+02 -5.68867018e+01]
New Q values:  [-6.18060000e+03  3.35350317e+00 -1.79384548e+02 -1.70302373e+01]
Reward: 9  Episode Reward:  -328
xxxxx
x agx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         1.08147795]
------
Step:89, Action:West
State  127
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [ 0.          0.          0.         11.12502103]
Reward: -1  Episode Reward:  -329
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   39.08340344    4.7537139  -272.09726687]
------
Step:90, Action:South
State  99
Old Q Values:  [ 0.    -0.936  0.     0.   ]
New Q values:  [    0.         25203.05352785     0.             0.        ]
Reward: -1  Episode Reward:  -330
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[ 0.00000000e+00 -6.00000000e-01  8.40134264e+04  0.00000000e+00]
------
Step:91, Action:East
State  189
Old Q Values:  [  32.0991032    26.19777011 -136.05884198 -180.6       ]
New Q values:  [  32.0991032    26.19777011  -54.79266042 -180.6       ]
Reward: -1  Episode Reward:  -331
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         0.76958789]
------
Step:92, Action:West
State  204
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.         0.         0.         9.02973096]
Reward: -1  Episode Reward:  -332
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  32.0991032    26.19777011  -54.79266042 -180.6       ]
------
Step:93, Action:North
State  189
Old Q Values:  [  32.0991032    26.19777011  -54.79266042 -180.6       ]
New Q values:  [  23.96466231   26.19777011  -54.79266042 -180.6       ]
Reward: -1  Episode Reward:  -333
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   39.08340344    4.7537139  -272.09726687]
------
Step:94, Action:South
State  99
Old Q Values:  [    0.         25203.05352785     0.             0.        ]
New Q values:  [    0.         35284.64933898     0.             0.        ]
Reward: -1  Episode Reward:  -334
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[ 0.00000000e+00 -6.00000000e-01  8.40134264e+04  0.00000000e+00]
------
Step:95, Action:East
State  189
Old Q Values:  [  23.96466231   26.19777011  -54.79266042 -180.6       ]
New Q values:  [  23.96466231   26.19777011  -22.2861878  -180.6       ]
Reward: -1  Episode Reward:  -335
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         0.76958789]
------
Step:96, Action:West
State  205
Old Q Values:  [0.         0.         0.         0.76958789]
New Q values:  [0.         0.         0.         7.56716619]
Reward: -1  Episode Reward:  -336
xxxxx
x  gx
xa  x
x  .x
xxxxx
Step:97, Action:West
State  189
Old Q Values:  [  23.96466231   26.19777011  -22.2861878  -180.6       ]
New Q values:  [  23.96466231   26.19777011  -22.2861878  -244.98066897]
Reward: -301  Episode Reward:  -637
xxxxx
x g x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  23.96466231   26.19777011  -22.2861878  -244.98066897]
------
Step:98, Action:South
State  188
Old Q Values:  [-1343.6616728   -755.35550854     0.             0.        ]
New Q values:  [-1343.6616728   -247.57994041     0.             0.        ]
Reward: -1  Episode Reward:  -638
xxxxx
xg  x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  183.87421003 -6457.4598        61.17357946 -6307.02      ]
------
Step:99, Action:North
State  260
Old Q Values:  [  183.87421003 -6457.4598        61.17357946 -6307.02      ]
New Q values:  [-5927.05031599 -6457.4598        61.17357946 -6307.02      ]
Reward: -10001  Episode Reward:  -10639
xxxxx
x   x
xg  x
x  .x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831    88.60599895 -9791.50993282     0.        ]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831    88.60599895 -9791.50993282     0.        ]
New Q values:  [-5922.26708831    56.81488651 -9791.50993282     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x. gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 53.24162309   0.         -63.86984116  37.91882628]
------
Step:2, Action:North
State  273
Old Q Values:  [ 53.24162309   0.         -63.86984116  37.91882628]
New Q values:  [ 37.74111519   0.         -63.86984116  37.91882628]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x.a.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831    56.81488651 -9791.50993282     0.        ]
------
Step:3, Action:South
State  192
Old Q Values:  [ 0.38977704 45.86755779  3.70447717  0.        ]
New Q values:  [ 0.38977704 30.47259202  3.70447717  0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415    42.41856302]
------
Step:4, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415    42.41856302]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415    40.71949904]
Reward: 9  Episode Reward:  16
xxxxx
xg. x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5927.05031599 -6457.4598        61.17357946 -6307.02      ]
------
Step:5, Action:East
State  260
Old Q Values:  [-5927.05031599 -6457.4598        61.17357946 -6307.02      ]
New Q values:  [-5927.05031599 -6457.4598        36.0852815  -6307.02      ]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415    40.71949904]
------
Step:6, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415    40.71949904]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415    26.51338407]
Reward: -1  Episode Reward:  14
xxxxx
xg. x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5927.05031599 -6457.4598        36.0852815  -6307.02      ]
------
Step:7, Action:East
State  257
Old Q Values:  [5617.45153544 -180.6          88.572404   6620.49276028]
New Q values:  [5617.45153544 -180.6          42.78297682 6620.49276028]
Reward: -1  Episode Reward:  13
xxxxx
x.g x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415    26.51338407]
------
Step:8, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415    26.51338407]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415    15.8012621 ]
Reward: -1  Episode Reward:  12
xxxxx
x.. x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   19.3196949   -289.59534477 -2335.07150097  -180.6       ]
------
Step:9, Action:North
State  261
Old Q Values:  [   19.3196949   -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [  530.03628063  -289.59534477 -2335.07150097  -180.6       ]
Reward: 9  Episode Reward:  21
xxxxx
x.g x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         1370.59847356 1723.02800889    0.        ]
------
Step:10, Action:East
State  181
Old Q Values:  [    2.19276907    22.72605137 -1284.71014613  -180.6       ]
New Q values:  [ 2.19276907e+00  2.27260514e+01 -6.50534228e+03 -1.80600000e+02]
Reward: -10001  Episode Reward:  -9980
xxxxx
x.. x
x g.x
x  .x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2544.97928459 -8656.02923281 -7525.7277781  -5957.39648921]
------
Step:1, Action:North
State  288
Old Q Values:  [-2544.97928459 -8656.02923281 -7525.7277781  -5957.39648921]
New Q values:  [-1012.38821272 -8656.02923281 -7525.7277781  -5957.39648921]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6.78337057e-01 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
------
Step:2, Action:North
State  208
Old Q Values:  [ 6.78337057e-01 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
New Q values:  [ 5.44661215e+00 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
Reward: 9  Episode Reward:  18
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -1.13368134 -180.6          -0.74907558]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6          -1.13368134 -180.6          -0.74907558]
New Q values:  [-180.6          -1.13368134 -180.6          -1.01902174]
Reward: -1  Episode Reward:  17
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -3.97971712e-01 -6.22530596e-01]
------
Step:4, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -3.97971712e-01 -6.22530596e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -1.06489521e+00 -6.22530596e-01]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -1.13368134 -180.6          -1.01902174]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6          -1.13368134 -180.6          -1.01902174]
New Q values:  [-180.6          -1.13368134 -180.6          -1.19436788]
Reward: -1  Episode Reward:  15
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -1.06489521e+00 -6.22530596e-01]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -1.06489521e+00 -6.22530596e-01]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -1.06489521e+00  7.05209944e+00]
Reward: 9  Episode Reward:  24
xxxxx
xa  x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    2.61561149    6.33703894 -252.78192178]
------
Step:7, Action:East
State  107
Old Q Values:  [-252.35169558    2.61561149    6.33703894 -252.78192178]
New Q values:  [-252.35169558    2.61561149    5.36885732 -252.78192178]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  1.14468058e+01  5.93951593e+00]
------
Step:8, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -1.06489521e+00  7.05209944e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -1.36606249e+00  7.05209944e+00]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -1.13368134 -180.6          -1.19436788]
------
Step:9, Action:South
State  138
Old Q Values:  [-180.6          -1.13368134 -180.6          -1.19436788]
New Q values:  [-180.6          -0.69043333 -180.6          -1.19436788]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.21988389e-01 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
------
Step:10, Action:North
State  208
Old Q Values:  [ 5.44661215e+00 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
New Q values:  [ 1.37151486e+00 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
Reward: -1  Episode Reward:  20
xxxxx
x  ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.69043333 -180.6          -1.19436788]
------
Step:11, Action:West
State  138
Old Q Values:  [-180.6          -0.69043333 -180.6          -1.19436788]
New Q values:  [-180.6          -0.69043333 -180.6           1.03788268]
Reward: -1  Episode Reward:  19
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -1.36606249e+00  7.05209944e+00]
------
Step:12, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -1.36606249e+00  7.05209944e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -1.36606249e+00  2.22083978e+00]
Reward: -1  Episode Reward:  18
xxxxx
xa  x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -2.31842233e+03 -1.23006946e-01  0.00000000e+00]
------
Step:13, Action:West
State  104
Old Q Values:  [-8652.84           0.         -2395.35372705 -6180.6       ]
New Q values:  [-8652.84           0.         -2395.35372705 -8652.84      ]
Reward: -10301  Episode Reward:  -10283
xxxxx
xg  x
x.. x
x.. x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 37.74111519   0.         -63.86984116  37.91882628]
------
Step:1, Action:West
State  273
Old Q Values:  [ 37.74111519   0.         -63.86984116  37.91882628]
New Q values:  [ 37.74111519   0.         -63.86984116 179.5784147 ]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  530.03628063  -289.59534477 -2335.07150097  -180.6       ]
------
Step:2, Action:North
State  261
Old Q Values:  [  530.03628063  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [  224.23232766  -289.59534477 -2335.07150097  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.27260514e+01 -6.50534228e+03 -1.80600000e+02]
------
Step:3, Action:South
State  183
Old Q Values:  [ 6.1762476  67.91556927 23.18024546  0.        ]
New Q values:  [ 6.1762476  93.83592601 23.18024546  0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x ..x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  224.23232766  -289.59534477 -2335.07150097  -180.6       ]
------
Step:4, Action:North
State  261
Old Q Values:  [  224.23232766  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   95.91074648  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.27260514e+01 -6.50534228e+03 -1.80600000e+02]
------
Step:5, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  2.27260514e+01 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  3.72636445e+01 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   95.91074648  -289.59534477 -2335.07150097  -180.6       ]
------
Step:6, Action:North
State  261
Old Q Values:  [   95.91074648  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   65.91507639  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 6.1762476  93.83592601 23.18024546  0.        ]
------
Step:7, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  3.72636445e+01 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  3.40799807e+01 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   65.91507639  -289.59534477 -2335.07150097  -180.6       ]
------
Step:8, Action:North
State  261
Old Q Values:  [   65.91507639  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   35.99002477  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
x.g x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  3.40799807e+01 -6.50534228e+03 -1.80600000e+02]
------
Step:9, Action:South
State  180
Old Q Values:  [-3431.06190038  -244.53978092    31.65509308     0.        ]
New Q values:  [-3431.06190038   -87.59032792    31.65509308     0.        ]
Reward: -1  Episode Reward:  11
xxxxx
xg. x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5927.05031599 -6457.4598        36.0852815  -6307.02      ]
------
Step:10, Action:East
State  260
Old Q Values:  [-5927.05031599 -6457.4598        36.0852815  -6307.02      ]
New Q values:  [-5927.05031599 -6457.4598        18.57449123 -6307.02      ]
Reward: -1  Episode Reward:  10
xxxxx
x.. x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415    15.8012621 ]
------
Step:11, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415    15.8012621 ]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415    11.29285221]
Reward: -1  Episode Reward:  9
xxxxx
xg. x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5927.05031599 -6457.4598        18.57449123 -6307.02      ]
------
Step:12, Action:East
State  260
Old Q Values:  [-5927.05031599 -6457.4598        18.57449123 -6307.02      ]
New Q values:  [-5927.05031599 -6457.4598        10.21765215 -6307.02      ]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415    11.29285221]
------
Step:13, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415    11.29285221]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415     6.98243653]
Reward: -1  Episode Reward:  7
xxxxx
xg. x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5927.05031599 -6457.4598        10.21765215 -6307.02      ]
------
Step:14, Action:East
State  260
Old Q Values:  [-5927.05031599 -6457.4598        10.21765215 -6307.02      ]
New Q values:  [-5.92705032e+03 -6.45745980e+03  5.58179182e+00 -6.30702000e+03]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415     6.98243653]
------
Step:15, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415     6.98243653]
New Q values:  [-1.46393736e+03 -6.21261234e+03 -1.97061654e+03  3.86751216e+00]
Reward: -1  Episode Reward:  5
xxxxx
xg. x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5.92705032e+03 -6.45745980e+03  5.58179182e+00 -6.30702000e+03]
------
Step:16, Action:East
State  260
Old Q Values:  [-5.92705032e+03 -6.45745980e+03  5.58179182e+00 -6.30702000e+03]
New Q values:  [-5.92705032e+03 -6.45745980e+03  2.79297038e+00 -6.30702000e+03]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1.46393736e+03 -6.21261234e+03 -1.97061654e+03  3.86751216e+00]
------
Step:17, Action:West
State  272
Old Q Values:  [-1.46393736e+03 -6.21261234e+03 -1.97061654e+03  3.86751216e+00]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415    11.74401229]
Reward: -1  Episode Reward:  3
xxxxx
x.. x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   35.99002477  -289.59534477 -2335.07150097  -180.6       ]
------
Step:18, Action:North
State  261
Old Q Values:  [   35.99002477  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   24.02000412  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  2
xxxxx
x.g x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  3.40799807e+01 -6.50534228e+03 -1.80600000e+02]
------
Step:19, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  3.40799807e+01 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  2.02379935e+01 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  1
xxxxx
x.. x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   24.02000412  -289.59534477 -2335.07150097  -180.6       ]
------
Step:20, Action:North
State  260
Old Q Values:  [-5.92705032e+03 -6.45745980e+03  2.79297038e+00 -6.30702000e+03]
New Q values:  [-8.36192360e+03 -6.45745980e+03  2.79297038e+00 -6.30702000e+03]
Reward: -10001  Episode Reward:  -10000
xxxxx
x.. x
xg..x
x  .x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1.37151486e+00 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
------
Step:1, Action:North
State  216
Old Q Values:  [ 3.21988389e-01 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
New Q values:  [ 6.53484631e+00 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
Reward: 9  Episode Reward:  9
xxxxx
xg.ax
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  3.35350317e+00 -1.79384548e+02 -1.70302373e+01]
------
Step:2, Action:South
State  136
Old Q Values:  [-6.18060000e+03  3.35350317e+00 -1.79384548e+02 -1.70302373e+01]
New Q values:  [-6.18060000e+03  2.70185516e+00 -1.79384548e+02 -1.70302373e+01]
Reward: -1  Episode Reward:  8
xxxxx
x.g x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 6.53484631e+00 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
------
Step:3, Action:North
State  216
Old Q Values:  [ 6.53484631e+00 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
New Q values:  [ 2.82449507e+00 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
Reward: -1  Episode Reward:  7
xxxxx
xg.ax
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  2.70185516e+00 -1.79384548e+02 -1.70302373e+01]
------
Step:4, Action:South
State  136
Old Q Values:  [-6.18060000e+03  2.70185516e+00 -1.79384548e+02 -1.70302373e+01]
New Q values:  [-6.18060000e+03  1.32809059e+00 -1.79384548e+02 -1.70302373e+01]
Reward: -1  Episode Reward:  6
xxxxx
x.g x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.82449507e+00 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
------
Step:5, Action:North
State  216
Old Q Values:  [ 2.82449507e+00 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
New Q values:  [ 9.28225205e-01 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
Reward: -1  Episode Reward:  5
xxxxx
xg.ax
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  1.32809059e+00 -1.79384548e+02 -1.70302373e+01]
------
Step:6, Action:South
State  138
Old Q Values:  [-180.6          -0.69043333 -180.6           1.03788268]
New Q values:  [-180.6          -0.46471887 -180.6           1.03788268]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1.37151486e+00 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
------
Step:7, Action:North
State  210
Old Q Values:  [   0.26886588  -23.4318597  -180.6           6.63136808]
New Q values:  [  -0.18108884  -23.4318597  -180.6           6.63136808]
Reward: -1  Episode Reward:  3
xxxxx
x..ax
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           1.03788268]
------
Step:8, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6           1.03788268]
New Q values:  [-180.6          -0.46471887 -180.6           6.48140501]
Reward: 9  Episode Reward:  12
xxxxx
x.a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -1.36606249e+00  2.22083978e+00]
------
Step:9, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -1.36606249e+00  2.22083978e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03 -1.36606249e+00  6.28833591e+00]
Reward: 9  Episode Reward:  21
xxxxx
xa  x
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -2.31842233e+03 -1.23006946e-01  0.00000000e+00]
------
Step:10, Action:West
State  110
Old Q Values:  [-1.80600000e+02 -2.31842233e+03 -1.23006946e-01  0.00000000e+00]
New Q values:  [-1.80600000e+02 -2.31842233e+03 -1.23006946e-01 -1.80600000e+02]
Reward: -301  Episode Reward:  -280
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -2.31842233e+03 -1.23006946e-01 -1.80600000e+02]
------
Step:11, Action:East
State  111
Old Q Values:  [-177.44732869   39.08340344    4.7537139  -272.09726687]
New Q values:  [-177.44732869   39.08340344    3.18798634 -272.09726687]
Reward: -1  Episode Reward:  -281
xxxxx
x a x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03 -1.36606249e+00  6.28833591e+00]
------
Step:12, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  6.81388581e+00 -1.01489178e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  6.81388581e+00 -4.03331917e+02]
Reward: -1  Episode Reward:  -282
xxxxx
xag x
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    10.74931661 -6443.21937065  -180.6       ]
------
Step:13, Action:South
State  108
Old Q Values:  [-6.18060000e+03  2.70779136e+00  4.36335945e+00  0.00000000e+00]
New Q values:  [-6.18060000e+03  4.83116544e-01  4.36335945e+00  0.00000000e+00]
Reward: -1  Episode Reward:  -283
xxxxx
xg  x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-1343.6616728   -247.57994041     0.             0.        ]
------
Step:14, Action:East
State  188
Old Q Values:  [-1343.6616728   -247.57994041     0.             0.        ]
New Q values:  [-1343.6616728   -247.57994041     7.07015435     0.        ]
Reward: 9  Episode Reward:  -274
xxxxx
xg  x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 0.50612843  0.          5.56718117 -0.84      ]
------
Step:15, Action:East
State  200
Old Q Values:  [ 0.50612843  0.          5.56718117 -0.84      ]
New Q values:  [ 0.50612843  0.          1.98991168 -0.84      ]
Reward: -1  Episode Reward:  -275
xxxxx
x   x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 9.28225205e-01 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
------
Step:16, Action:West
State  218
Old Q Values:  [ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  0.00000000e+00]
New Q values:  [ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  4.34610613e+01]
Reward: -1  Episode Reward:  -276
xxxxx
x   x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458   146.87020423     0.        ]
------
Step:17, Action:East
State  200
Old Q Values:  [ 0.50612843  0.          1.98991168 -0.84      ]
New Q values:  [ 0.50612843  0.          0.55900388 -0.84      ]
Reward: -1  Episode Reward:  -277
xxxxx
x   x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 9.28225205e-01 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
------
Step:18, Action:West
State  218
Old Q Values:  [ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  4.34610613e+01]
New Q values:  [ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  6.08454858e+01]
Reward: -1  Episode Reward:  -278
xxxxx
x   x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458   146.87020423     0.        ]
------
Step:19, Action:East
State  202
Old Q Values:  [    0.         -5884.35407458   146.87020423     0.        ]
New Q values:  [    0.         -5884.35407458    76.40172743     0.        ]
Reward: -1  Episode Reward:  -279
xxxxx
x   x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  6.08454858e+01]
------
Step:20, Action:West
State  218
Old Q Values:  [ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  6.08454858e+01]
New Q values:  [ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  2.40074960e+01]
Reward: -1  Episode Reward:  -280
xxxxx
x   x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[0.89767243 0.         0.         0.        ]
------
Step:21, Action:North
State  201
Old Q Values:  [10.42162111  0.          5.39642008  0.024     ]
New Q values:  [7.00269019 0.         5.39642008 0.024     ]
Reward: -1  Episode Reward:  -281
xxxxx
x a x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  1.14468058e+01  5.93951593e+00]
------
Step:22, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03 -1.36606249e+00  6.28833591e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.97996507e-01  6.28833591e+00]
Reward: -1  Episode Reward:  -282
xxxxx
x  ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           6.48140501]
------
Step:23, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6           6.48140501]
New Q values:  [-180.6          -0.46471887 -180.6           3.87906278]
Reward: -1  Episode Reward:  -283
xxxxx
x a x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.97996507e-01  6.28833591e+00]
------
Step:24, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  6.81388581e+00 -4.03331917e+02]
New Q values:  [-1.01561177e+04 -5.99568600e+03  6.81388581e+00 -6.16062376e+03]
Reward: -10001  Episode Reward:  -10284
xxxxx
xg  x
x   x
x...x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 37.74111519   0.         -63.86984116 179.5784147 ]
------
Step:1, Action:West
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415    11.74401229]
New Q values:  [-1463.93735889 -6212.61234477 -1970.61654415    17.30360615]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   24.02000412  -289.59534477 -2335.07150097  -180.6       ]
------
Step:2, Action:North
State  261
Old Q Values:  [   24.02000412  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   21.07939971  -289.59534477 -2335.07150097  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.02379935e+01 -6.50534228e+03 -1.80600000e+02]
------
Step:3, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  2.02379935e+01 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  1.38190173e+01 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   21.07939971  -289.59534477 -2335.07150097  -180.6       ]
------
Step:4, Action:North
State  261
Old Q Values:  [   21.07939971  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   11.97746508  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  1.38190173e+01 -6.50534228e+03 -1.80600000e+02]
------
Step:5, Action:South
State  180
Old Q Values:  [-3431.06190038   -87.59032792    31.65509308     0.        ]
New Q values:  [-3431.06190038   -34.79824006    31.65509308     0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-8.36192360e+03 -6.45745980e+03  2.79297038e+00 -6.30702000e+03]
------
Step:6, Action:East
State  260
Old Q Values:  [-8.36192360e+03 -6.45745980e+03  2.79297038e+00 -6.30702000e+03]
New Q values:  [-8.3619236e+03 -6.4574598e+03  5.7082700e+00 -6.3070200e+03]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x ..x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-1463.93735889 -6212.61234477 -1970.61654415    17.30360615]
------
Step:7, Action:North
State  272
Old Q Values:  [-1463.93735889 -6212.61234477 -1970.61654415    17.30360615]
New Q values:  [ -577.54376304 -6212.61234477 -1970.61654415    17.30360615]
Reward: 9  Episode Reward:  23
xxxxx
x.. x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  8.77060172e+00  0.00000000e+00]
------
Step:8, Action:East
State  195
Old Q Values:  [ 38.85388605 225.8560804    4.37937593   0.        ]
New Q values:  [ 38.85388605 225.8560804    9.1411608    0.        ]
Reward: 9  Episode Reward:  32
xxxxx
x.. x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  -0.18108884  -23.4318597  -180.6           6.63136808]
------
Step:9, Action:West
State  208
Old Q Values:  [ 1.37151486e+00 -2.05511157e+03 -2.65170615e+03 -1.31669822e+00]
New Q values:  [ 1.37151486e+00 -2.05511157e+03 -2.65170615e+03  1.59177867e+01]
Reward: -1  Episode Reward:  31
xxxxx
x.. x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831    56.81488651 -9791.50993282     0.        ]
------
Step:10, Action:South
State  193
Old Q Values:  [-5922.26708831    56.81488651 -9791.50993282     0.        ]
New Q values:  [-5922.26708831    75.99947901 -9791.50993282     0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x..gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 37.74111519   0.         -63.86984116 179.5784147 ]
------
Step:11, Action:West
State  273
Old Q Values:  [ 37.74111519   0.         -63.86984116 179.5784147 ]
New Q values:  [ 37.74111519   0.         -63.86984116  74.8246054 ]
Reward: -1  Episode Reward:  29
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   11.97746508  -289.59534477 -2335.07150097  -180.6       ]
------
Step:12, Action:North
State  257
Old Q Values:  [5617.45153544 -180.6          42.78297682 6620.49276028]
New Q values:  [27450.40854202  -180.6           42.78297682  6620.49276028]
Reward: -1  Episode Reward:  28
xxxxx
x.. x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[ 0.00000000e+00 -6.00000000e-01  8.40134264e+04  0.00000000e+00]
------
Step:13, Action:East
State  179
Old Q Values:  [ 0.00000000e+00 -6.00000000e-01  8.40134264e+04  0.00000000e+00]
New Q values:  [ 0.00000000e+00 -6.00000000e-01  3.36074018e+04  0.00000000e+00]
Reward: -1  Episode Reward:  27
xxxxx
x.. x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  8.77060172e+00  0.00000000e+00]
------
Step:14, Action:East
State  192
Old Q Values:  [ 0.38977704 30.47259202  3.70447717  0.        ]
New Q values:  [ 0.38977704 30.47259202  5.65712687  0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1.37151486e+00 -2.05511157e+03 -2.65170615e+03  1.59177867e+01]
------
Step:15, Action:North
State  208
Old Q Values:  [ 1.37151486e+00 -2.05511157e+03 -2.65170615e+03  1.59177867e+01]
New Q values:  [ 3346.62278338 -2055.11156703 -2651.70614553    15.91778667]
Reward: -1  Episode Reward:  25
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 6067.82062533  1141.37341868 -8652.84       11155.58059144]
------
Step:16, Action:North
State  130
Old Q Values:  [-274.86862066    0.9241831  -180.00807518    1.41054896]
New Q values:  [-290.12428358    0.9241831  -180.00807518    1.41054896]
Reward: -301  Episode Reward:  -276
xxxxx
x..ax
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-290.12428358    0.9241831  -180.00807518    1.41054896]
------
Step:17, Action:West
State  130
Old Q Values:  [-290.12428358    0.9241831  -180.00807518    1.41054896]
New Q values:  [-290.12428358    0.9241831  -180.00807518   23.90117876]
Reward: 9  Episode Reward:  -267
xxxxx
x.a x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          59.78986392    6.47656656    0.        ]
------
Step:18, Action:South
State  114
Old Q Values:  [-1.80600000e+02 -5.96189144e+03  4.66838381e-02 -6.00000000e-01]
New Q values:  [-1.80600000e+02 -8.37621480e+03  4.66838381e-02 -6.00000000e-01]
Reward: -10001  Episode Reward:  -10268
xxxxx
x.  x
x g x
x  .x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 37.74111519   0.         -63.86984116  74.8246054 ]
------
Step:1, Action:West
State  273
Old Q Values:  [ 37.74111519   0.         -63.86984116  74.8246054 ]
New Q values:  [ 37.74111519   0.         -63.86984116  38.92308168]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   11.97746508  -289.59534477 -2335.07150097  -180.6       ]
------
Step:2, Action:North
State  261
Old Q Values:  [   11.97746508  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   14.33669123  -289.59534477 -2335.07150097  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  1.38190173e+01 -6.50534228e+03 -1.80600000e+02]
------
Step:3, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  1.38190173e+01 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  9.22861430e+00 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   14.33669123  -289.59534477 -2335.07150097  -180.6       ]
------
Step:4, Action:North
State  261
Old Q Values:  [   14.33669123  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [    7.90326078  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  9.22861430e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:5, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  9.22861430e+00 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  5.46242395e+00 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    7.90326078  -289.59534477 -2335.07150097  -180.6       ]
------
Step:6, Action:North
State  261
Old Q Values:  [    7.90326078  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [    4.2000315   -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x..gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  5.46242395e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:7, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  5.46242395e+00 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  2.84497903e+00 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    4.2000315   -289.59534477 -2335.07150097  -180.6       ]
------
Step:8, Action:North
State  261
Old Q Values:  [    4.2000315   -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [ 1.93350631e+00 -2.89595345e+02 -2.33507150e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  12
xxxxx
x..gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.84497903e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:9, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  2.84497903e+00 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  1.11804350e+00 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  11
xxxxx
x.. x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1.93350631e+00 -2.89595345e+02 -2.33507150e+03 -1.80600000e+02]
------
Step:10, Action:North
State  261
Old Q Values:  [ 1.93350631e+00 -2.89595345e+02 -2.33507150e+03 -1.80600000e+02]
New Q values:  [   28.32418033  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x.. x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 6.1762476  93.83592601 23.18024546  0.        ]
------
Step:11, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  1.11804350e+00 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  8.34447150e+00 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  9
xxxxx
x.. x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   28.32418033  -289.59534477 -2335.07150097  -180.6       ]
------
Step:12, Action:North
State  261
Old Q Values:  [   28.32418033  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   38.88044993  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 6.1762476  93.83592601 23.18024546  0.        ]
------
Step:13, Action:South
State  183
Old Q Values:  [ 6.1762476  93.83592601 23.18024546  0.        ]
New Q values:  [ 6.1762476  48.59850538 23.18024546  0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   38.88044993  -289.59534477 -2335.07150097  -180.6       ]
------
Step:14, Action:North
State  261
Old Q Values:  [   38.88044993  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   29.53173159  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 6.1762476  48.59850538 23.18024546  0.        ]
------
Step:15, Action:South
State  183
Old Q Values:  [ 6.1762476  48.59850538 23.18024546  0.        ]
New Q values:  [ 6.1762476  27.69892163 23.18024546  0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   29.53173159  -289.59534477 -2335.07150097  -180.6       ]
------
Step:16, Action:North
State  261
Old Q Values:  [   29.53173159  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   13.71603408  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  8.34447150e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:17, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  8.34447150e+00 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  6.85259883e+00 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  3
xxxxx
x.g x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   13.71603408  -289.59534477 -2335.07150097  -180.6       ]
------
Step:18, Action:North
State  261
Old Q Values:  [   13.71603408  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [    6.94219328  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  2
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  6.85259883e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:19, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  6.85259883e+00 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  4.22369751e+00 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  1
xxxxx
x.. x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    6.94219328  -289.59534477 -2335.07150097  -180.6       ]
------
Step:20, Action:North
State  261
Old Q Values:  [    6.94219328  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [   10.4865538   -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  0
xxxxx
x.. x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 6.1762476  27.69892163 23.18024546  0.        ]
------
Step:21, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  4.22369751e+00 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  4.23544515e+00 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -1
xxxxx
x.. x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   10.4865538   -289.59534477 -2335.07150097  -180.6       ]
------
Step:22, Action:North
State  261
Old Q Values:  [   10.4865538   -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [    4.86525506  -289.59534477 -2335.07150097  -180.6       ]
Reward: -1  Episode Reward:  -2
xxxxx
x..gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  4.23544515e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:23, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  4.23544515e+00 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  2.55375458e+00 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -3
xxxxx
x.g x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    4.86525506  -289.59534477 -2335.07150097  -180.6       ]
------
Step:24, Action:North
State  261
Old Q Values:  [    4.86525506  -289.59534477 -2335.07150097  -180.6       ]
New Q values:  [ 2.11222840e+00 -2.89595345e+02 -2.33507150e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -4
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.55375458e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:25, Action:South
State  180
Old Q Values:  [-3431.06190038   -34.79824006    31.65509308     0.        ]
New Q values:  [-3431.06190038   -12.80681502    31.65509308     0.        ]
Reward: -1  Episode Reward:  -5
xxxxx
x.. x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-8.3619236e+03 -6.4574598e+03  5.7082700e+00 -6.3070200e+03]
------
Step:26, Action:East
State  261
Old Q Values:  [ 2.11222840e+00 -2.89595345e+02 -2.33507150e+03 -1.80600000e+02]
New Q values:  [   2.1122284  -289.59534477 -929.43751854 -180.6       ]
Reward: -1  Episode Reward:  -6
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -577.54376304 -6212.61234477 -1970.61654415    17.30360615]
------
Step:27, Action:West
State  272
Old Q Values:  [ -577.54376304 -6212.61234477 -1970.61654415    17.30360615]
New Q values:  [ -577.54376304 -6212.61234477 -1970.61654415     8.03392346]
Reward: -1  Episode Reward:  -7
xxxxx
x.. x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-8.3619236e+03 -6.4574598e+03  5.7082700e+00 -6.3070200e+03]
------
Step:28, Action:East
State  260
Old Q Values:  [-8.3619236e+03 -6.4574598e+03  5.7082700e+00 -6.3070200e+03]
New Q values:  [-8.36192360e+03 -6.45745980e+03  4.09348504e+00 -6.30702000e+03]
Reward: -1  Episode Reward:  -8
xxxxx
xg. x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -577.54376304 -6212.61234477 -1970.61654415     8.03392346]
------
Step:29, Action:West
State  272
Old Q Values:  [ -577.54376304 -6212.61234477 -1970.61654415     8.03392346]
New Q values:  [-5.77543763e+02 -6.21261234e+03 -1.97061654e+03  3.24723790e+00]
Reward: -1  Episode Reward:  -9
xxxxx
x.g x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   2.1122284  -289.59534477 -929.43751854 -180.6       ]
------
Step:30, Action:North
State  261
Old Q Values:  [   2.1122284  -289.59534477 -929.43751854 -180.6       ]
New Q values:  [   1.01101773 -289.59534477 -929.43751854 -180.6       ]
Reward: -1  Episode Reward:  -10
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.55375458e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:31, Action:South
State  183
Old Q Values:  [ 6.1762476  27.69892163 23.18024546  0.        ]
New Q values:  [ 6.1762476  10.78287397 23.18024546  0.        ]
Reward: -1  Episode Reward:  -11
xxxxx
x.. x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   1.01101773 -289.59534477 -929.43751854 -180.6       ]
------
Step:32, Action:North
State  261
Old Q Values:  [   1.01101773 -289.59534477 -929.43751854 -180.6       ]
New Q values:  [   6.75848073 -289.59534477 -929.43751854 -180.6       ]
Reward: -1  Episode Reward:  -12
xxxxx
x.. x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 6.1762476  10.78287397 23.18024546  0.        ]
------
Step:33, Action:East
State  177
Old Q Values:  [   0.         1370.59847356 1723.02800889    0.        ]
New Q values:  [   0.         1370.59847356  717.41104726    0.        ]
Reward: 9  Episode Reward:  -3
xxxxx
x.. x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831    75.99947901 -9791.50993282     0.        ]
------
Step:34, Action:South
State  192
Old Q Values:  [ 0.38977704 30.47259202  5.65712687  0.        ]
New Q values:  [ 0.38977704 12.56320818  5.65712687  0.        ]
Reward: -1  Episode Reward:  -4
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-5.77543763e+02 -6.21261234e+03 -1.97061654e+03  3.24723790e+00]
------
Step:35, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627   39.80017244]
New Q values:  [   5.54544775 -180.6        -424.74015627   17.3476132 ]
Reward: -1  Episode Reward:  -5
xxxxx
x.g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   6.75848073 -289.59534477 -929.43751854 -180.6       ]
------
Step:36, Action:North
State  260
Old Q Values:  [-8.36192360e+03 -6.45745980e+03  4.09348504e+00 -6.30702000e+03]
New Q values:  [-3.33587291e+03 -6.45745980e+03  4.09348504e+00 -6.30702000e+03]
Reward: -1  Episode Reward:  -6
xxxxx
xg. x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3431.06190038   -12.80681502    31.65509308     0.        ]
------
Step:37, Action:East
State  180
Old Q Values:  [-3431.06190038   -12.80681502    31.65509308     0.        ]
New Q values:  [-3431.06190038   -12.80681502    15.83099968     0.        ]
Reward: -1  Episode Reward:  -7
xxxxx
x.. x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 0.38977704 12.56320818  5.65712687  0.        ]
------
Step:38, Action:South
State  192
Old Q Values:  [ 0.38977704 12.56320818  5.65712687  0.        ]
New Q values:  [0.38977704 9.62956723 5.65712687 0.        ]
Reward: -1  Episode Reward:  -8
xxxxx
xg. x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   5.54544775 -180.6        -424.74015627   17.3476132 ]
------
Step:39, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627   17.3476132 ]
New Q values:  [   5.54544775 -180.6        -424.74015627    8.3665895 ]
Reward: -1  Episode Reward:  -9
xxxxx
x.g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   6.75848073 -289.59534477 -929.43751854 -180.6       ]
------
Step:40, Action:North
State  261
Old Q Values:  [   6.75848073 -289.59534477 -929.43751854 -180.6       ]
New Q values:  [   2.86951867 -289.59534477 -929.43751854 -180.6       ]
Reward: -1  Episode Reward:  -10
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.55375458e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:41, Action:South
State  177
Old Q Values:  [   0.         1370.59847356  717.41104726    0.        ]
New Q values:  [  0.         548.50024503 717.41104726   0.        ]
Reward: -1  Episode Reward:  -11
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   2.86951867 -289.59534477 -929.43751854 -180.6       ]
------
Step:42, Action:North
State  261
Old Q Values:  [   2.86951867 -289.59534477 -929.43751854 -180.6       ]
New Q values:  [   1.31393384 -289.59534477 -929.43751854 -180.6       ]
Reward: -1  Episode Reward:  -12
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.55375458e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:43, Action:South
State  177
Old Q Values:  [  0.         548.50024503 717.41104726   0.        ]
New Q values:  [  0.         219.19427816 717.41104726   0.        ]
Reward: -1  Episode Reward:  -13
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   1.31393384 -289.59534477 -929.43751854 -180.6       ]
------
Step:44, Action:North
State  261
Old Q Values:  [   1.31393384 -289.59534477 -929.43751854 -180.6       ]
New Q values:  [ 6.91699909e-01 -2.89595345e+02 -9.29437519e+02 -1.80600000e+02]
Reward: -1  Episode Reward:  -14
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.55375458e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:45, Action:South
State  177
Old Q Values:  [  0.         219.19427816 717.41104726   0.        ]
New Q values:  [  0.          87.28522124 717.41104726   0.        ]
Reward: -1  Episode Reward:  -15
xxxxx
x.g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 6.91699909e-01 -2.89595345e+02 -9.29437519e+02 -1.80600000e+02]
------
Step:46, Action:North
State  261
Old Q Values:  [ 6.91699909e-01 -2.89595345e+02 -9.29437519e+02 -1.80600000e+02]
New Q values:  [ 4.42806337e-01 -2.89595345e+02 -9.29437519e+02 -1.80600000e+02]
Reward: -1  Episode Reward:  -16
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.55375458e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:47, Action:South
State  177
Old Q Values:  [  0.          87.28522124 717.41104726   0.        ]
New Q values:  [  0.          34.4469304  717.41104726   0.        ]
Reward: -1  Episode Reward:  -17
xxxxx
x.g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4.42806337e-01 -2.89595345e+02 -9.29437519e+02 -1.80600000e+02]
------
Step:48, Action:North
State  261
Old Q Values:  [ 4.42806337e-01 -2.89595345e+02 -9.29437519e+02 -1.80600000e+02]
New Q values:  [ 3.43248908e-01 -2.89595345e+02 -9.29437519e+02 -1.80600000e+02]
Reward: -1  Episode Reward:  -18
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.55375458e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:49, Action:South
State  177
Old Q Values:  [  0.          34.4469304  717.41104726   0.        ]
New Q values:  [  0.          13.28174683 717.41104726   0.        ]
Reward: -1  Episode Reward:  -19
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3.43248908e-01 -2.89595345e+02 -9.29437519e+02 -1.80600000e+02]
------
Step:50, Action:North
State  261
Old Q Values:  [ 3.43248908e-01 -2.89595345e+02 -9.29437519e+02 -1.80600000e+02]
New Q values:  [ 3.03425937e-01 -2.89595345e+02 -9.29437519e+02 -1.80600000e+02]
Reward: -1  Episode Reward:  -20
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.55375458e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:51, Action:South
State  177
Old Q Values:  [  0.          13.28174683 717.41104726   0.        ]
New Q values:  [  0.           4.80372651 717.41104726   0.        ]
Reward: -1  Episode Reward:  -21
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3.03425937e-01 -2.89595345e+02 -9.29437519e+02 -1.80600000e+02]
------
Step:52, Action:North
State  261
Old Q Values:  [ 3.03425937e-01 -2.89595345e+02 -9.29437519e+02 -1.80600000e+02]
New Q values:  [   6.47544401 -289.59534477 -929.43751854 -180.6       ]
Reward: -1  Episode Reward:  -22
xxxxx
x.. x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 6.1762476  10.78287397 23.18024546  0.        ]
------
Step:53, Action:East
State  183
Old Q Values:  [ 6.1762476  10.78287397 23.18024546  0.        ]
New Q values:  [ 6.1762476  10.78287397 11.3032787   0.        ]
Reward: -1  Episode Reward:  -23
xxxxx
x.. x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  8.77060172e+00  0.00000000e+00]
------
Step:54, Action:East
State  192
Old Q Values:  [0.38977704 9.62956723 5.65712687 0.        ]
New Q values:  [3.89777037e-01 9.62956723e+00 1.01164969e+03 0.00000000e+00]
Reward: 9  Episode Reward:  -14
xxxxx
x.. x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3346.62278338 -2055.11156703 -2651.70614553    15.91778667]
------
Step:55, Action:North
State  210
Old Q Values:  [  -0.18108884  -23.4318597  -180.6           6.63136808]
New Q values:  [   6.49791809  -23.4318597  -180.6           6.63136808]
Reward: -1  Episode Reward:  -15
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-290.12428358    0.9241831  -180.00807518   23.90117876]
------
Step:56, Action:West
State  130
Old Q Values:  [-290.12428358    0.9241831  -180.00807518   23.90117876]
New Q values:  [-290.12428358    0.9241831  -180.00807518   14.97447665]
Reward: 9  Episode Reward:  -6
xxxxx
x.a x
x   x
xg .x
xxxxx
Step:57, Action:East
State  114
Old Q Values:  [-1.80600000e+02 -8.37621480e+03  4.66838381e-02 -6.00000000e-01]
New Q values:  [-1.80600000e+02 -8.37621480e+03  3.91101653e+00 -6.00000000e-01]
Reward: -1  Episode Reward:  -7
xxxxx
x. ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-290.12428358    0.9241831  -180.00807518   14.97447665]
------
Step:58, Action:West
State  130
Old Q Values:  [-290.12428358    0.9241831  -180.00807518   14.97447665]
New Q values:  [-290.12428358    0.9241831  -180.00807518    6.56309562]
Reward: -1  Episode Reward:  -8
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03  3.91101653e+00 -6.00000000e-01]
------
Step:59, Action:East
State  114
Old Q Values:  [-1.80600000e+02 -8.37621480e+03  3.91101653e+00 -6.00000000e-01]
New Q values:  [-1.8060000e+02 -8.3762148e+03  2.9333353e+00 -6.0000000e-01]
Reward: -1  Episode Reward:  -9
xxxxx
x. ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-290.12428358    0.9241831  -180.00807518    6.56309562]
------
Step:60, Action:West
State  130
Old Q Values:  [-290.12428358    0.9241831  -180.00807518    6.56309562]
New Q values:  [-290.12428358    0.9241831  -180.00807518    2.90523884]
Reward: -1  Episode Reward:  -10
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.8060000e+02 -8.3762148e+03  2.9333353e+00 -6.0000000e-01]
------
Step:61, Action:East
State  114
Old Q Values:  [-1.8060000e+02 -8.3762148e+03  2.9333353e+00 -6.0000000e-01]
New Q values:  [-1.80600000e+02 -8.37621480e+03  1.44490577e+00 -6.00000000e-01]
Reward: -1  Episode Reward:  -11
xxxxx
x. ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-290.12428358    0.9241831  -180.00807518    2.90523884]
------
Step:62, Action:West
State  130
Old Q Values:  [-290.12428358    0.9241831  -180.00807518    2.90523884]
New Q values:  [-290.12428358    0.9241831  -180.00807518    0.99556727]
Reward: -1  Episode Reward:  -12
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03  1.44490577e+00 -6.00000000e-01]
------
Step:63, Action:East
State  114
Old Q Values:  [-1.80600000e+02 -8.37621480e+03  1.44490577e+00 -6.00000000e-01]
New Q values:  [-1.80600000e+02 -8.37621480e+03  2.76632488e-01 -6.00000000e-01]
Reward: -1  Episode Reward:  -13
xxxxx
x. ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-290.12428358    0.9241831  -180.00807518    0.99556727]
------
Step:64, Action:West
State  130
Old Q Values:  [-290.12428358    0.9241831  -180.00807518    0.99556727]
New Q values:  [-2.90124284e+02  9.24183102e-01 -1.80008075e+02 -1.18783347e-01]
Reward: -1  Episode Reward:  -14
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03  2.76632488e-01 -6.00000000e-01]
------
Step:65, Action:East
State  114
Old Q Values:  [-1.80600000e+02 -8.37621480e+03  2.76632488e-01 -6.00000000e-01]
New Q values:  [-1.80600000e+02 -8.37621480e+03 -2.12092074e-01 -6.00000000e-01]
Reward: -1  Episode Reward:  -15
xxxxx
x. ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.90124284e+02  9.24183102e-01 -1.80008075e+02 -1.18783347e-01]
------
Step:66, Action:South
State  130
Old Q Values:  [-2.90124284e+02  9.24183102e-01 -1.80008075e+02 -1.18783347e-01]
New Q values:  [-2.90124284e+02  1.75908366e+00 -1.80008075e+02 -1.18783347e-01]
Reward: -1  Episode Reward:  -16
xxxxx
x.  x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   6.49791809  -23.4318597  -180.6           6.63136808]
------
Step:67, Action:West
State  208
Old Q Values:  [ 3346.62278338 -2055.11156703 -2651.70614553    15.91778667]
New Q values:  [ 3346.62278338 -2055.11156703 -2651.70614553    28.56695837]
Reward: -1  Episode Reward:  -17
xxxxx
x.  x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831    75.99947901 -9791.50993282     0.        ]
------
Step:68, Action:South
State  193
Old Q Values:  [-5922.26708831    75.99947901 -9791.50993282     0.        ]
New Q values:  [-5922.26708831    41.47671611 -9791.50993282     0.        ]
Reward: -1  Episode Reward:  -18
xxxxx
x. gx
x   x
x a.x
xxxxx
Step:69, Action:South
State  273
Old Q Values:  [ 37.74111519   0.         -63.86984116  38.92308168]
New Q values:  [  37.74111519 -168.92307549  -63.86984116   38.92308168]
Reward: -301  Episode Reward:  -319
xxxxx
x.  x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  -63.86984116   38.92308168]
------
Step:70, Action:West
State  272
Old Q Values:  [-5.77543763e+02 -6.21261234e+03 -1.97061654e+03  3.24723790e+00]
New Q values:  [ -577.54376304 -6212.61234477 -1970.61654415  8235.82145777]
Reward: -1  Episode Reward:  -320
xxxxx
x.  x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[27450.40854202  -180.6           42.78297682  6620.49276028]
------
Step:71, Action:North
State  257
Old Q Values:  [27450.40854202  -180.6           42.78297682  6620.49276028]
New Q values:  [21061.7839421   -180.6           42.78297682  6620.49276028]
Reward: -1  Episode Reward:  -321
xxxxx
x.  x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[ 0.00000000e+00 -6.00000000e-01  3.36074018e+04  0.00000000e+00]
------
Step:72, Action:East
State  177
Old Q Values:  [  0.           4.80372651 717.41104726   0.        ]
New Q values:  [ 0.00000000e+00  4.80372651e+00 -5.41014068e+03  0.00000000e+00]
Reward: -10001  Episode Reward:  -10322
xxxxx
x.  x
x g x
x  .x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1012.38821272 -8656.02923281 -7525.7277781  -5957.39648921]
------
Step:1, Action:West
State  288
Old Q Values:  [-1012.38821272 -8656.02923281 -7525.7277781  -5957.39648921]
New Q values:  [-1012.38821272 -8656.02923281 -7525.7277781  -2365.88167118]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  -63.86984116   38.92308168]
------
Step:2, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  -63.86984116   38.92308168]
New Q values:  [  37.74111519 -168.92307549  -63.86984116   22.91186588]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   6.47544401 -289.59534477 -929.43751854 -180.6       ]
------
Step:3, Action:North
State  261
Old Q Values:  [   6.47544401 -289.59534477 -929.43751854 -180.6       ]
New Q values:  [   8.75630398 -289.59534477 -929.43751854 -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.55375458e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:4, Action:South
State  183
Old Q Values:  [ 6.1762476  10.78287397 11.3032787   0.        ]
New Q values:  [ 6.1762476   6.34004078 11.3032787   0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   8.75630398 -289.59534477 -929.43751854 -180.6       ]
------
Step:5, Action:North
State  260
Old Q Values:  [-3.33587291e+03 -6.45745980e+03  4.09348504e+00 -6.30702000e+03]
New Q values:  [-1.17341684e+03 -6.45745980e+03  4.09348504e+00 -6.30702000e+03]
Reward: -1  Episode Reward:  25
xxxxx
x...x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   538.44106874     0.        ]
------
Step:6, Action:East
State  180
Old Q Values:  [-3431.06190038   -12.80681502    15.83099968     0.        ]
New Q values:  [-3431.06190038   -12.80681502   315.2273056      0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x...x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.62956723e+00 1.01164969e+03 0.00000000e+00]
------
Step:7, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -2.36054016e+03  8.77060172e+00  0.00000000e+00]
New Q values:  [-6.00000000e-01 -2.36054016e+03  4.89765111e+00  0.00000000e+00]
Reward: -1  Episode Reward:  33
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   6.49791809  -23.4318597  -180.6           6.63136808]
------
Step:8, Action:West
State  208
Old Q Values:  [ 3346.62278338 -2055.11156703 -2651.70614553    28.56695837]
New Q values:  [ 3346.62278338 -2055.11156703 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  32
xxxxx
x...x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.62956723e+00 1.01164969e+03 0.00000000e+00]
------
Step:9, Action:East
State  192
Old Q Values:  [3.89777037e-01 9.62956723e+00 1.01164969e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 9.62956723e+00 1.40804671e+03 0.00000000e+00]
Reward: -1  Episode Reward:  31
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3346.62278338 -2055.11156703 -2651.70614553   314.32168908]
------
Step:10, Action:North
State  208
Old Q Values:  [ 3346.62278338 -2055.11156703 -2651.70614553   314.32168908]
New Q values:  [ 4690.72329078 -2055.11156703 -2651.70614553   314.32168908]
Reward: 9  Episode Reward:  40
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 6067.82062533  1141.37341868 -8652.84       11155.58059144]
------
Step:11, Action:North
State  130
Old Q Values:  [-2.90124284e+02  1.75908366e+00 -1.80008075e+02 -1.18783347e-01]
New Q values:  [-2.96121988e+02  1.75908366e+00 -1.80008075e+02 -1.18783347e-01]
Reward: -301  Episode Reward:  -261
xxxxx
x..ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.96121988e+02  1.75908366e+00 -1.80008075e+02 -1.18783347e-01]
------
Step:12, Action:South
State  130
Old Q Values:  [-2.96121988e+02  1.75908366e+00 -1.80008075e+02 -1.18783347e-01]
New Q values:  [-2.96121988e+02  1.40732062e+03 -1.80008075e+02 -1.18783347e-01]
Reward: -1  Episode Reward:  -262
xxxxx
x.. x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4690.72329078 -2055.11156703 -2651.70614553   314.32168908]
------
Step:13, Action:North
State  208
Old Q Values:  [ 4690.72329078 -2055.11156703 -2651.70614553   314.32168908]
New Q values:  [ 2297.88550252 -2055.11156703 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  -263
xxxxx
x..ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.96121988e+02  1.40732062e+03 -1.80008075e+02 -1.18783347e-01]
------
Step:14, Action:South
State  130
Old Q Values:  [-2.96121988e+02  1.40732062e+03 -1.80008075e+02 -1.18783347e-01]
New Q values:  [-2.96121988e+02 -4.74830610e+03 -1.80008075e+02 -1.18783347e-01]
Reward: -10001  Episode Reward:  -10264
xxxxx
x.. x
x  gx
x   x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -577.54376304 -6212.61234477 -1970.61654415  8235.82145777]
------
Step:1, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627    8.3665895 ]
New Q values:  [   5.54544775 -180.6        -424.74015627    9.97468131]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1.17341684e+03 -6.45745980e+03  4.09348504e+00 -6.30702000e+03]
------
Step:2, Action:East
State  261
Old Q Values:  [   8.75630398 -289.59534477 -929.43751854 -180.6       ]
New Q values:  [   8.75630398 -289.59534477 2098.37142991 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -577.54376304 -6212.61234477 -1970.61654415  8235.82145777]
------
Step:3, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  -63.86984116   22.91186588]
New Q values:  [  37.74111519 -168.92307549  -63.86984116  638.07617533]
Reward: -1  Episode Reward:  7
xxxxx
x. gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   8.75630398 -289.59534477 2098.37142991 -180.6       ]
------
Step:4, Action:East
State  261
Old Q Values:  [   8.75630398 -289.59534477 2098.37142991 -180.6       ]
New Q values:  [   8.75630398 -289.59534477 1030.17142456 -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
x. .x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  -63.86984116  638.07617533]
------
Step:5, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  -63.86984116  638.07617533]
New Q values:  [  37.74111519 -168.92307549  -63.86984116  563.6818975 ]
Reward: -1  Episode Reward:  5
xxxxx
x. gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   8.75630398 -289.59534477 1030.17142456 -180.6       ]
------
Step:6, Action:East
State  261
Old Q Values:  [   8.75630398 -289.59534477 1030.17142456 -180.6       ]
New Q values:  [   8.75630398 -289.59534477 2882.21500716 -180.6       ]
Reward: -1  Episode Reward:  4
xxxxx
x.g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -577.54376304 -6212.61234477 -1970.61654415  8235.82145777]
------
Step:7, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627    9.97468131]
New Q values:  [   5.54544775 -180.6        -424.74015627    4.61791804]
Reward: -1  Episode Reward:  3
xxxxx
xg .x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1.17341684e+03 -6.45745980e+03  4.09348504e+00 -6.30702000e+03]
------
Step:8, Action:East
State  261
Old Q Values:  [   8.75630398 -289.59534477 2882.21500716 -180.6       ]
New Q values:  [   8.75630398 -289.59534477 3623.03244019 -180.6       ]
Reward: -1  Episode Reward:  2
xxxxx
x.g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -577.54376304 -6212.61234477 -1970.61654415  8235.82145777]
------
Step:9, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  -63.86984116  563.6818975 ]
New Q values:  [  37.74111519 -168.92307549  -63.86984116 1311.78249106]
Reward: -1  Episode Reward:  1
xxxxx
x. gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   8.75630398 -289.59534477 3623.03244019 -180.6       ]
------
Step:10, Action:East
State  261
Old Q Values:  [   8.75630398 -289.59534477 3623.03244019 -180.6       ]
New Q values:  [   8.75630398 -289.59534477 3919.35941341 -180.6       ]
Reward: -1  Episode Reward:  0
xxxxx
x.g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -577.54376304 -6212.61234477 -1970.61654415  8235.82145777]
------
Step:11, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627    4.61791804]
New Q values:  [   5.54544775 -180.6        -424.74015627 1177.05499124]
Reward: -1  Episode Reward:  -1
xxxxx
x. .x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   8.75630398 -289.59534477 3919.35941341 -180.6       ]
------
Step:12, Action:East
State  261
Old Q Values:  [   8.75630398 -289.59534477 3919.35941341 -180.6       ]
New Q values:  [   8.75630398 -289.59534477 1960.67851268 -180.6       ]
Reward: -1  Episode Reward:  -2
xxxxx
x. .x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  -63.86984116 1311.78249106]
------
Step:13, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627 1177.05499124]
New Q values:  [   5.54544775 -180.6        -424.74015627 1058.4255503 ]
Reward: -1  Episode Reward:  -3
xxxxx
x. .x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   8.75630398 -289.59534477 1960.67851268 -180.6       ]
------
Step:14, Action:East
State  261
Old Q Values:  [   8.75630398 -289.59534477 1960.67851268 -180.6       ]
New Q values:  [   8.75630398 -289.59534477 1177.20615239 -180.6       ]
Reward: -1  Episode Reward:  -4
xxxxx
x. .x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  -63.86984116 1311.78249106]
------
Step:15, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627 1058.4255503 ]
New Q values:  [   5.54544775 -180.6        -424.74015627  775.93206584]
Reward: -1  Episode Reward:  -5
xxxxx
x. .x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   8.75630398 -289.59534477 1177.20615239 -180.6       ]
------
Step:16, Action:East
State  261
Old Q Values:  [   8.75630398 -289.59534477 1177.20615239 -180.6       ]
New Q values:  [    8.75630398  -289.59534477 -3058.97110171  -180.6       ]
Reward: -10001  Episode Reward:  -10006
xxxxx
x. .x
x...x
x g.x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1012.38821272 -8656.02923281 -7525.7277781  -2365.88167118]
------
Step:1, Action:North
State  288
Old Q Values:  [-1012.38821272 -8656.02923281 -7525.7277781  -2365.88167118]
New Q values:  [  289.81036567 -8656.02923281 -7525.7277781  -2365.88167118]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2297.88550252 -2055.11156703 -2651.70614553   314.32168908]
------
Step:2, Action:North
State  208
Old Q Values:  [ 2297.88550252 -2055.11156703 -2651.70614553   314.32168908]
New Q values:  [  925.71791984 -2055.11156703 -2651.70614553   314.32168908]
Reward: 9  Episode Reward:  18
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           3.87906278]
------
Step:3, Action:West
State  136
Old Q Values:  [-6.18060000e+03  1.32809059e+00 -1.79384548e+02 -1.70302373e+01]
New Q values:  [-6.18060000e+03  1.32809059e+00 -1.79384548e+02 -5.99936793e+03]
Reward: -9991  Episode Reward:  -9973
xxxxx
x g x
x.. x
x.. x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  289.81036567 -8656.02923281 -7525.7277781  -2365.88167118]
------
Step:1, Action:North
State  288
Old Q Values:  [  289.81036567 -8656.02923281 -7525.7277781  -2365.88167118]
New Q values:  [  399.03952222 -8656.02923281 -7525.7277781  -2365.88167118]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  925.71791984 -2055.11156703 -2651.70614553   314.32168908]
------
Step:2, Action:North
State  208
Old Q Values:  [  925.71791984 -2055.11156703 -2651.70614553   314.32168908]
New Q values:  [  376.85088677 -2055.11156703 -2651.70614553   314.32168908]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           3.87906278]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6           3.87906278]
New Q values:  [-180.6          -0.46471887 -180.6          10.38566686]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  1.14468058e+01  5.93951593e+00]
------
Step:4, Action:East
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  1.14468058e+01  5.93951593e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  7.09442239e+00  5.93951593e+00]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6          10.38566686]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6          10.38566686]
New Q values:  [-180.6          -0.46471887 -180.6           5.44076752]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.97996507e-01  6.28833591e+00]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.97996507e-01  6.28833591e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.97996507e-01  8.81091052e+00]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -6.00060000e+03  2.98525386e+00 -1.80600000e+02]
------
Step:7, Action:East
State  107
Old Q Values:  [-252.35169558    2.61561149    5.36885732 -252.78192178]
New Q values:  [-252.35169558    2.61561149    4.19081609 -252.78192178]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.97996507e-01  8.81091052e+00]
------
Step:8, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  7.09442239e+00  5.93951593e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  7.09442239e+00  3.03305120e+00]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    2.61561149    4.19081609 -252.78192178]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558    2.61561149    4.19081609 -252.78192178]
New Q values:  [-252.35169558    2.61561149    3.71959959 -252.78192178]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.97996507e-01  8.81091052e+00]
------
Step:10, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.97996507e-01  8.81091052e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.97996507e-01  3.81994037e+00]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -6.00060000e+03  2.98525386e+00 -1.80600000e+02]
------
Step:11, Action:East
State  106
Old Q Values:  [-1.80600000e+02 -6.00060000e+03  2.98525386e+00 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -6.00060000e+03  1.74008365e+00 -1.80600000e+02]
Reward: -1  Episode Reward:  29
xxxxx
x a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.97996507e-01  3.81994037e+00]
------
Step:12, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  6.81388581e+00 -6.16062376e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  6.81388581e+00 -8.46484950e+03]
Reward: -10001  Episode Reward:  -9972
xxxxx
xg  x
x.  x
x.. x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  376.85088677 -2055.11156703 -2651.70614553   314.32168908]
------
Step:1, Action:North
State  210
Old Q Values:  [   6.49791809  -23.4318597  -180.6           6.63136808]
New Q values:  [   9.63139749  -23.4318597  -180.6           6.63136808]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           5.44076752]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6           5.44076752]
New Q values:  [-180.6          -0.46471887 -180.6           8.72228912]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.97996507e-01  3.81994037e+00]
------
Step:3, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.97996507e-01  3.81994037e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.97996507e-01  1.86529972e+01]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   39.08340344    3.18798634 -272.09726687]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869   39.08340344    3.18798634 -272.09726687]
New Q values:  [-177.44732869   18.42434499    3.18798634 -272.09726687]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xa. x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 6.1762476   6.34004078 11.3032787   0.        ]
------
Step:5, Action:East
State  190
Old Q Values:  [1.04129094 0.         5.09673373 0.        ]
New Q values:  [ 1.04129094  0.         30.35921172  0.        ]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    76.40172743     0.        ]
------
Step:6, Action:East
State  200
Old Q Values:  [ 0.50612843  0.          0.55900388 -0.84      ]
New Q values:  [ 0.50612843  0.         -0.01335924 -0.84      ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 9.28225205e-01 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
------
Step:7, Action:West
State  216
Old Q Values:  [ 9.28225205e-01 -6.42117887e+02 -6.17035694e+03  1.21013069e+00]
New Q values:  [ 9.28225205e-01 -6.42117887e+02 -6.17035694e+03 -5.99996411e+03]
Reward: -10001  Episode Reward:  -9967
xxxxx
x   x
x g x
x...x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  7.97996507e-01  1.86529972e+01]
------
Step:1, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.97996507e-01  1.86529972e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  7.97996507e-01  1.39770787e+01]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    2.61561149    3.71959959 -252.78192178]
------
Step:2, Action:East
State  107
Old Q Values:  [-252.35169558    2.61561149    3.71959959 -252.78192178]
New Q values:  [-252.35169558    2.61561149    3.01616655 -252.78192178]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  7.09442239e+00  3.03305120e+00]
------
Step:3, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  7.97996507e-01  1.39770787e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  8.33588534e+00  1.39770787e+01]
Reward: 9  Episode Reward:  17
xxxxx
x  ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           8.72228912]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6           8.72228912]
New Q values:  [-180.6          -0.46471887 -180.6           7.08203927]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  8.33588534e+00  1.39770787e+01]
------
Step:5, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  8.33588534e+00  1.39770787e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  8.33588534e+00  5.89568147e+00]
Reward: -1  Episode Reward:  15
xxxxx
xa  x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    2.61561149    3.01616655 -252.78192178]
------
Step:6, Action:East
State  107
Old Q Values:  [-252.35169558    2.61561149    3.01616655 -252.78192178]
New Q values:  [-252.35169558    2.61561149    2.73479334 -252.78192178]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  7.09442239e+00  3.03305120e+00]
------
Step:7, Action:East
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  7.09442239e+00  3.03305120e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  4.36238074e+00  3.03305120e+00]
Reward: -1  Episode Reward:  13
xxxxx
x  ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           7.08203927]
------
Step:8, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6           7.08203927]
New Q values:  [-180.6          -0.46471887 -180.6           3.54152993]
Reward: -1  Episode Reward:  12
xxxxx
x a x
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  4.36238074e+00  3.03305120e+00]
------
Step:9, Action:East
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  4.36238074e+00  3.03305120e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.20741127e+00  3.03305120e+00]
Reward: -1  Episode Reward:  11
xxxxx
x  ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           3.54152993]
------
Step:10, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6           3.54152993]
New Q values:  [-180.6          -0.46471887 -180.6           3.31737757]
Reward: -1  Episode Reward:  10
xxxxx
x a x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  8.33588534e+00  5.89568147e+00]
------
Step:11, Action:East
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  6.81388581e+00 -8.46484950e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  2.52398150e+00 -8.46484950e+03]
Reward: -1  Episode Reward:  9
xxxxx
x gax
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  1.32809059e+00 -1.79384548e+02 -5.99936793e+03]
------
Step:12, Action:South
State  136
Old Q Values:  [-6.18060000e+03  1.32809059e+00 -1.79384548e+02 -5.99936793e+03]
New Q values:  [-6180.6            6.2097038   -179.38454759 -5999.3679292 ]
Reward: 9  Episode Reward:  18
xxxxx
x  gx
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 9.28225205e-01 -6.42117887e+02 -6.17035694e+03 -5.99996411e+03]
------
Step:13, Action:South
State  216
Old Q Values:  [ 9.28225205e-01 -6.42117887e+02 -6.17035694e+03 -5.99996411e+03]
New Q values:  [ 9.28225205e-01 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x. gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  399.03952222 -8656.02923281 -7525.7277781  -2365.88167118]
------
Step:14, Action:West
State  288
Old Q Values:  [  399.03952222 -8656.02923281 -7525.7277781  -2365.88167118]
New Q values:  [  399.03952222 -8656.02923281 -7525.7277781   1529.79376886]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -577.54376304 -6212.61234477 -1970.61654415  8235.82145777]
------
Step:15, Action:West
State  272
Old Q Values:  [ -577.54376304 -6212.61234477 -1970.61654415  8235.82145777]
New Q values:  [ -577.54376304 -6212.61234477 -1970.61654415  3315.25665298]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.            51.76023291 -6036.76313782]
------
Step:16, Action:East
State  257
Old Q Values:  [21061.7839421   -180.6           42.78297682  6620.49276028]
New Q values:  [21061.7839421   -180.6         1011.09018662  6620.49276028]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -577.54376304 -6212.61234477 -1970.61654415  3315.25665298]
------
Step:17, Action:West
State  272
Old Q Values:  [ -577.54376304 -6212.61234477 -1970.61654415  3315.25665298]
New Q values:  [ -577.54376304 -6212.61234477 -1970.61654415  1341.03073107]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.            51.76023291 -6036.76313782]
------
Step:18, Action:East
State  256
Old Q Values:  [    0.             0.            51.76023291 -6036.76313782]
New Q values:  [    0.             0.           422.41331248 -6036.76313782]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x.  x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -577.54376304 -6212.61234477 -1970.61654415  1341.03073107]
------
Step:19, Action:North
State  272
Old Q Values:  [ -577.54376304 -6212.61234477 -1970.61654415  1341.03073107]
New Q values:  [ -230.14820988 -6212.61234477 -1970.61654415  1341.03073107]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x.a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  4.89765111e+00  0.00000000e+00]
------
Step:20, Action:East
State  195
Old Q Values:  [ 38.85388605 225.8560804    9.1411608    0.        ]
New Q values:  [ 38.85388605 225.8560804    5.94588357   0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x. ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   9.63139749  -23.4318597  -180.6           6.63136808]
------
Step:21, Action:North
State  210
Old Q Values:  [   9.63139749  -23.4318597  -180.6           6.63136808]
New Q values:  [   4.24777227  -23.4318597  -180.6           6.63136808]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           3.31737757]
------
Step:22, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6           3.31737757]
New Q values:  [-180.6          -0.46471887 -180.6           3.22771663]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  8.33588534e+00  5.89568147e+00]
------
Step:23, Action:East
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.20741127e+00  3.03305120e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  1.25127950e+00  3.03305120e+00]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           3.22771663]
------
Step:24, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6           3.22771663]
New Q values:  [-180.6          -0.46471887 -180.6           3.19185225]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  8.33588534e+00  5.89568147e+00]
------
Step:25, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  8.33588534e+00  5.89568147e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  5.89568147e+00]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           3.19185225]
------
Step:26, Action:West
State  130
Old Q Values:  [-2.96121988e+02 -4.74830610e+03 -1.80008075e+02 -1.18783347e-01]
New Q values:  [-2.96121988e+02 -4.74830610e+03 -1.80008075e+02 -7.11140961e-01]
Reward: -1  Episode Reward:  34
xxxxx
x a x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03 -2.12092074e-01 -6.00000000e-01]
------
Step:27, Action:East
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  2.52398150e+00 -8.46484950e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -8.46484950e+03]
Reward: -1  Episode Reward:  33
xxxxx
xg ax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6            6.2097038   -179.38454759 -5999.3679292 ]
------
Step:28, Action:South
State  136
Old Q Values:  [-6180.6            6.2097038   -179.38454759 -5999.3679292 ]
New Q values:  [-6180.6          114.93914755  -179.38454759 -5999.3679292 ]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  376.85088677 -2055.11156703 -2651.70614553   314.32168908]
------
Step:29, Action:North
State  208
Old Q Values:  [  376.85088677 -2055.11156703 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103 -2055.11156703 -2651.70614553   314.32168908]
Reward: -10001  Episode Reward:  -9969
xxxxx
x  gx
x.  x
x   x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:1, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -8.46484950e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -3.37731501e+03]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    10.74931661 -6443.21937065  -180.6       ]
------
Step:2, Action:South
State  108
Old Q Values:  [-6.18060000e+03  4.83116544e-01  4.36335945e+00  0.00000000e+00]
New Q values:  [-6.18060000e+03  1.00161438e+02  4.36335945e+00  0.00000000e+00]
Reward: 9  Episode Reward:  18
xxxxx
xg  x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3431.06190038   -12.80681502   315.2273056      0.        ]
------
Step:3, Action:East
State  189
Old Q Values:  [  23.96466231   26.19777011  -22.2861878  -244.98066897]
New Q values:  [  23.96466231   26.19777011   -3.36263659 -244.98066897]
Reward: 9  Episode Reward:  27
xxxxx
x g x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 0.50612843  0.         -0.01335924 -0.84      ]
------
Step:4, Action:South
State  197
Old Q Values:  [0.13244338 0.25674    0.         0.        ]
New Q values:  [1.32443385e-01 9.14491915e+02 0.00000000e+00 0.00000000e+00]
Reward: 9  Episode Reward:  36
xxxxx
x  gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.02996406e+03 0.00000000e+00]
------
Step:5, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  -63.86984116 1311.78249106]
New Q values:  [  37.74111519 -168.92307549  438.79019419 1311.78249106]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  399.03952222 -8656.02923281 -7525.7277781   1529.79376886]
------
Step:6, Action:West
State  288
Old Q Values:  [  399.03952222 -8656.02923281 -7525.7277781   1529.79376886]
New Q values:  [  399.03952222 -8656.02923281 -7525.7277781   1004.85225486]
Reward: -1  Episode Reward:  44
xxxxx
x  gx
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  438.79019419 1311.78249106]
------
Step:7, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  438.79019419 1311.78249106]
New Q values:  [  37.74111519 -168.92307549  438.79019419 6848.64817905]
Reward: 9  Episode Reward:  53
xxxxx
x   x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[21061.7839421   -180.6         1011.09018662  6620.49276028]
------
Step:8, Action:North
State  261
Old Q Values:  [    8.75630398  -289.59534477 -3058.97110171  -180.6       ]
New Q values:  [    6.2935052   -289.59534477 -3058.97110171  -180.6       ]
Reward: -1  Episode Reward:  52
xxxxx
x   x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 6.1762476   6.34004078 11.3032787   0.        ]
------
Step:9, Action:East
State  177
Old Q Values:  [ 0.00000000e+00  4.80372651e+00 -5.41014068e+03  0.00000000e+00]
New Q values:  [    0.             4.80372651 -2152.21325531     0.        ]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831    41.47671611 -9791.50993282     0.        ]
------
Step:10, Action:South
State  197
Old Q Values:  [1.32443385e-01 9.14491915e+02 0.00000000e+00 0.00000000e+00]
New Q values:  [1.32443385e-01 1.27418599e+03 0.00000000e+00 0.00000000e+00]
Reward: -1  Episode Reward:  50
xxxxx
x  gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.02996406e+03 0.00000000e+00]
------
Step:11, Action:East
State  276
Old Q Values:  [   5.54544775 -180.6        -424.74015627  775.93206584]
New Q values:  [   5.54544775 -180.6         130.95961395  775.93206584]
Reward: -1  Episode Reward:  49
xxxxx
x g x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  399.03952222 -8656.02923281 -7525.7277781   1004.85225486]
------
Step:12, Action:West
State  288
Old Q Values:  [  399.03952222 -8656.02923281 -7525.7277781   1004.85225486]
New Q values:  [  399.03952222 -8656.02923281 -7525.7277781    634.1205217 ]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   5.54544775 -180.6         130.95961395  775.93206584]
------
Step:13, Action:West
State  276
Old Q Values:  [   5.54544775 -180.6         130.95961395  775.93206584]
New Q values:  [   5.54544775 -180.6         130.95961395  311.6608779 ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    6.2935052   -289.59534477 -3058.97110171  -180.6       ]
------
Step:14, Action:North
State  261
Old Q Values:  [    6.2935052   -289.59534477 -3058.97110171  -180.6       ]
New Q values:  [ 2.68352845e+00 -2.89595345e+02 -3.05897110e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  2.55375458e+00 -6.50534228e+03 -1.80600000e+02]
------
Step:15, Action:South
State  183
Old Q Values:  [ 6.1762476   6.34004078 11.3032787   0.        ]
New Q values:  [ 6.1762476   2.74107485 11.3032787   0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2.68352845e+00 -2.89595345e+02 -3.05897110e+03 -1.80600000e+02]
------
Step:16, Action:North
State  261
Old Q Values:  [ 2.68352845e+00 -2.89595345e+02 -3.05897110e+03 -1.80600000e+02]
New Q values:  [    3.86439499  -289.59534477 -3058.97110171  -180.6       ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 6.1762476   2.74107485 11.3032787   0.        ]
------
Step:17, Action:East
State  177
Old Q Values:  [    0.             4.80372651 -2152.21325531     0.        ]
New Q values:  [   0.            4.80372651 -849.04228729    0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831    41.47671611 -9791.50993282     0.        ]
------
Step:18, Action:South
State  199
Old Q Values:  [  22.48535485 1424.04718165  880.423904      0.        ]
New Q values:  [  22.48535485 1478.00809166  880.423904      0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.02996406e+03 0.00000000e+00]
------
Step:19, Action:North
State  276
Old Q Values:  [   5.54544775 -180.6         130.95961395  311.6608779 ]
New Q values:  [   1.6181791  -180.6         130.95961395  311.6608779 ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -3.23445845e+03  0.00000000e+00  0.00000000e+00]
------
Step:20, Action:East
State  192
Old Q Values:  [3.89777037e-01 9.62956723e+00 1.40804671e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 9.62956723e+00 6.06629152e+04 0.00000000e+00]
Reward: 100009  Episode Reward:  100050
xxxxx
x   x
x gax
x   x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  399.03952222 -8656.02923281 -7525.7277781    634.1205217 ]
------
Step:1, Action:West
State  288
Old Q Values:  [  399.03952222 -8656.02923281 -7525.7277781    634.1205217 ]
New Q values:  [  399.03952222 -8656.02923281 -7525.7277781    661.357428  ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -230.14820988 -6212.61234477 -1970.61654415  1341.03073107]
------
Step:2, Action:West
State  272
Old Q Values:  [ -230.14820988 -6212.61234477 -1970.61654415  1341.03073107]
New Q values:  [ -230.14820988 -6212.61234477 -1970.61654415   542.97161092]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    3.86439499  -289.59534477 -3058.97110171  -180.6       ]
------
Step:3, Action:North
State  260
Old Q Values:  [-1.17341684e+03 -6.45745980e+03  4.09348504e+00 -6.30702000e+03]
New Q values:  [-3.02434417e+02 -6.45745980e+03  4.09348504e+00 -6.30702000e+03]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   538.44106874     0.        ]
------
Step:4, Action:East
State  180
Old Q Values:  [-3431.06190038   -12.80681502   315.2273056      0.        ]
New Q values:  [-3.43106190e+03 -1.28068150e+01  1.83303655e+04  0.00000000e+00]
Reward: 9  Episode Reward:  36
xxxxx
x...x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.62956723e+00 6.06629152e+04 0.00000000e+00]
------
Step:5, Action:East
State  192
Old Q Values:  [3.89777037e-01 9.62956723e+00 6.06629152e+04 0.00000000e+00]
New Q values:  [3.89777037e-01 9.62956723e+00 2.43588626e+04 0.00000000e+00]
Reward: -1  Episode Reward:  35
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103 -2055.11156703 -2651.70614553   314.32168908]
------
Step:6, Action:South
State  210
Old Q Values:  [   4.24777227  -23.4318597  -180.6           6.63136808]
New Q values:  [   4.24777227  188.43448452 -180.6           6.63136808]
Reward: -1  Episode Reward:  34
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  399.03952222 -8656.02923281 -7525.7277781    661.357428  ]
------
Step:7, Action:North
State  288
Old Q Values:  [  399.03952222 -8656.02923281 -7525.7277781    661.357428  ]
New Q values:  [  253.31231561 -8656.02923281 -7525.7277781    661.357428  ]
Reward: -1  Episode Reward:  33
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103 -2055.11156703 -2651.70614553   314.32168908]
------
Step:8, Action:South
State  208
Old Q Values:  [-5815.37790103 -2055.11156703 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103  -624.23739841 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  253.31231561 -8656.02923281 -7525.7277781    661.357428  ]
------
Step:9, Action:West
State  288
Old Q Values:  [  253.31231561 -8656.02923281 -7525.7277781    661.357428  ]
New Q values:  [  253.31231561 -8656.02923281 -7525.7277781   2318.53742492]
Reward: -1  Episode Reward:  31
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  438.79019419 6848.64817905]
------
Step:10, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  438.79019419 6848.64817905]
New Q values:  [  37.74111519 -168.92307549  438.79019419 2740.01859012]
Reward: -1  Episode Reward:  30
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    3.86439499  -289.59534477 -3058.97110171  -180.6       ]
------
Step:11, Action:North
State  257
Old Q Values:  [21061.7839421   -180.6         1011.09018662  6620.49276028]
New Q values:  [8425.55469479 -180.6        1011.09018662 6620.49276028]
Reward: -1  Episode Reward:  29
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.            4.80372651 -849.04228729    0.        ]
------
Step:12, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  2.55375458e+00 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  1.58082033e+00 -6.50534228e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  28
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    3.86439499  -289.59534477 -3058.97110171  -180.6       ]
------
Step:13, Action:North
State  261
Old Q Values:  [    3.86439499  -289.59534477 -3058.97110171  -180.6       ]
New Q values:  [    4.33674161  -289.59534477 -3058.97110171  -180.6       ]
Reward: -1  Episode Reward:  27
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 6.1762476   2.74107485 11.3032787   0.        ]
------
Step:14, Action:East
State  181
Old Q Values:  [ 2.19276907e+00  1.58082033e+00 -6.50534228e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  1.58082033e+00 -2.59029390e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831    41.47671611 -9791.50993282     0.        ]
------
Step:15, Action:South
State  193
Old Q Values:  [-5922.26708831    41.47671611 -9791.50993282     0.        ]
New Q values:  [-5922.26708831   837.99626348 -9791.50993282     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  438.79019419 2740.01859012]
------
Step:16, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  438.79019419 2740.01859012]
New Q values:  [  37.74111519 -168.92307549  438.79019419 1096.70845853]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    4.33674161  -289.59534477 -3058.97110171  -180.6       ]
------
Step:17, Action:North
State  257
Old Q Values:  [8425.55469479 -180.6        1011.09018662 6620.49276028]
New Q values:  [3371.06299587 -180.6        1011.09018662 6620.49276028]
Reward: -1  Episode Reward:  23
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.            4.80372651 -849.04228729    0.        ]
------
Step:18, Action:South
State  177
Old Q Values:  [   0.            4.80372651 -849.04228729    0.        ]
New Q values:  [   0.         1987.46931869 -849.04228729    0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[3371.06299587 -180.6        1011.09018662 6620.49276028]
------
Step:19, Action:West
State  257
Old Q Values:  [3371.06299587 -180.6        1011.09018662 6620.49276028]
New Q values:  [3371.06299587 -180.6        1011.09018662 4453.74493219]
Reward: -301  Episode Reward:  -279
xxxxx
x..gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[3371.06299587 -180.6        1011.09018662 4453.74493219]
------
Step:20, Action:West
State  261
Old Q Values:  [    4.33674161  -289.59534477 -3058.97110171  -180.6       ]
New Q values:  [    4.33674161  -289.59534477 -3058.97110171  -251.53897752]
Reward: -301  Episode Reward:  -580
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    4.33674161  -289.59534477 -3058.97110171  -251.53897752]
------
Step:21, Action:North
State  257
Old Q Values:  [3371.06299587 -180.6        1011.09018662 4453.74493219]
New Q values:  [1944.06599396 -180.6        1011.09018662 4453.74493219]
Reward: -1  Episode Reward:  -581
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         1987.46931869 -849.04228729    0.        ]
------
Step:22, Action:South
State  177
Old Q Values:  [   0.         1987.46931869 -849.04228729    0.        ]
New Q values:  [   0.         2130.51120713 -849.04228729    0.        ]
Reward: -1  Episode Reward:  -582
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[1944.06599396 -180.6        1011.09018662 4453.74493219]
------
Step:23, Action:West
State  260
Old Q Values:  [-3.02434417e+02 -6.45745980e+03  4.09348504e+00 -6.30702000e+03]
New Q values:  [-3.02434417e+02 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
Reward: -301  Episode Reward:  -883
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3.02434417e+02 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
------
Step:24, Action:East
State  257
Old Q Values:  [1944.06599396 -180.6        1011.09018662 4453.74493219]
New Q values:  [1944.06599396 -180.6         566.72755793 4453.74493219]
Reward: -1  Episode Reward:  -884
xxxxx
x.g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -230.14820988 -6212.61234477 -1970.61654415   542.97161092]
------
Step:25, Action:West
State  272
Old Q Values:  [ -230.14820988 -6212.61234477 -1970.61654415   542.97161092]
New Q values:  [ -230.14820988 -6212.61234477 -1970.61654415   217.81668988]
Reward: -1  Episode Reward:  -885
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3.02434417e+02 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
------
Step:26, Action:East
State  257
Old Q Values:  [1944.06599396 -180.6         566.72755793 4453.74493219]
New Q values:  [1944.06599396 -180.6         291.43603013 4453.74493219]
Reward: -1  Episode Reward:  -886
xxxxx
x.g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -230.14820988 -6212.61234477 -1970.61654415   217.81668988]
------
Step:27, Action:West
State  272
Old Q Values:  [ -230.14820988 -6212.61234477 -1970.61654415   217.81668988]
New Q values:  [ -230.14820988 -6212.61234477 -1970.61654415    87.75472146]
Reward: -1  Episode Reward:  -887
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3.02434417e+02 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
------
Step:28, Action:East
State  257
Old Q Values:  [1944.06599396 -180.6         291.43603013 4453.74493219]
New Q values:  [1944.06599396 -180.6         142.30082849 4453.74493219]
Reward: -1  Episode Reward:  -888
xxxxx
x.g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -230.14820988 -6212.61234477 -1970.61654415    87.75472146]
------
Step:29, Action:West
State  272
Old Q Values:  [ -230.14820988 -6212.61234477 -1970.61654415    87.75472146]
New Q values:  [ -230.14820988 -6212.61234477 -1970.61654415    35.80291107]
Reward: -1  Episode Reward:  -889
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    4.33674161  -289.59534477 -3058.97110171  -251.53897752]
------
Step:30, Action:North
State  257
Old Q Values:  [1944.06599396 -180.6         142.30082849 4453.74493219]
New Q values:  [1416.17975972 -180.6         142.30082849 4453.74493219]
Reward: -1  Episode Reward:  -890
xxxxx
x.g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         2130.51120713 -849.04228729    0.        ]
------
Step:31, Action:South
State  181
Old Q Values:  [ 2.19276907e+00  1.58082033e+00 -2.59029390e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  1.33335061e+00 -2.59029390e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  -891
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    4.33674161  -289.59534477 -3058.97110171  -251.53897752]
------
Step:32, Action:North
State  261
Old Q Values:  [    4.33674161  -289.59534477 -3058.97110171  -251.53897752]
New Q values:  [ 1.79252736e+00 -2.89595345e+02 -3.05897110e+03 -2.51538978e+02]
Reward: -1  Episode Reward:  -892
xxxxx
x...x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  1.33335061e+00 -2.59029390e+03 -1.80600000e+02]
------
Step:33, Action:North
State  183
Old Q Values:  [ 6.1762476   2.74107485 11.3032787   0.        ]
New Q values:  [ 9.91988058  2.74107485 11.3032787   0.        ]
Reward: 9  Episode Reward:  -883
xxxxx
xa..x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6           6.83127179    0.            0.        ]
------
Step:34, Action:South
State  103
Old Q Values:  [-180.6           6.83127179    0.            0.        ]
New Q values:  [-180.6           5.52349233    0.            0.        ]
Reward: -1  Episode Reward:  -884
xxxxx
x ..x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 9.91988058  2.74107485 11.3032787   0.        ]
------
Step:35, Action:East
State  181
Old Q Values:  [ 2.19276907e+00  1.33335061e+00 -2.59029390e+03 -1.80600000e+02]
New Q values:  [   2.19276907    1.33335061  270.94121587 -180.6       ]
Reward: -10001  Episode Reward:  -10885
xxxxx
x ..x
x g x
x   x
xxxxx
xxxxx
x..ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           3.19185225]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6           3.19185225]
New Q values:  [-180.6          -0.46471887 -180.6           8.44544534]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.69190981e+00  5.89568147e+00]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  5.89568147e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.32855761e+01]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   18.42434499    3.18798634 -272.09726687]
------
Step:3, Action:South
State  110
Old Q Values:  [-1.80600000e+02 -2.31842233e+03 -1.23006946e-01 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -1.42285929e+03 -1.23006946e-01 -1.80600000e+02]
Reward: -9991  Episode Reward:  -9973
xxxxx
x   x
xg..x
x ..x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   2.19276907    1.33335061  270.94121587 -180.6       ]
------
Step:1, Action:East
State  181
Old Q Values:  [   2.19276907    1.33335061  270.94121587 -180.6       ]
New Q values:  [ 2.19276907e+00  1.33335061e+00  1.42143526e+03 -1.80600000e+02]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x g x
x...x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           8.44544534]
------
Step:1, Action:West
State  136
Old Q Values:  [-6180.6          114.93914755  -179.38454759 -5999.3679292 ]
New Q values:  [-6180.6          114.93914755  -179.38454759 -2392.88066071]
Reward: 9  Episode Reward:  9
xxxxx
x.agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:2, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -3.37731501e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -1.34230121e+03]
Reward: 9  Episode Reward:  18
xxxxx
xag x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    10.74931661 -6443.21937065  -180.6       ]
------
Step:3, Action:South
State  108
Old Q Values:  [-6.18060000e+03  1.00161438e+02  4.36335945e+00  0.00000000e+00]
New Q values:  [-6.18060000e+03  4.75856216e+01  4.36335945e+00  0.00000000e+00]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-1343.6616728   -247.57994041     7.07015435     0.        ]
------
Step:4, Action:East
State  189
Old Q Values:  [  23.96466231   26.19777011   -3.36263659 -244.98066897]
New Q values:  [  23.96466231   26.19777011    4.20678389 -244.98066897]
Reward: 9  Episode Reward:  36
xxxxx
x g x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 0.50612843  0.         -0.01335924 -0.84      ]
------
Step:5, Action:South
State  200
Old Q Values:  [ 0.50612843  0.         -0.01335924 -0.84      ]
New Q values:  [ 5.06128434e-01  1.61408733e+01 -1.33592424e-02 -8.40000000e-01]
Reward: 9  Episode Reward:  45
xxxxx
xg  x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -230.14820988 -6212.61234477 -1970.61654415    35.80291107]
------
Step:6, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         130.95961395  311.6608779 ]
New Q values:  [   1.6181791  -180.6         130.95961395  131.29239667]
Reward: 9  Episode Reward:  54
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3.02434417e+02 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
------
Step:7, Action:East
State  261
Old Q Values:  [ 1.79252736e+00 -2.89595345e+02 -3.05897110e+03 -2.51538978e+02]
New Q values:  [    1.79252736  -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  53
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         130.95961395  131.29239667]
------
Step:8, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         130.95961395  131.29239667]
New Q values:  [   1.6181791  -180.6         130.95961395   52.45471688]
Reward: -1  Episode Reward:  52
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    1.79252736  -289.59534477 -1184.80072168  -251.53897752]
------
Step:9, Action:North
State  261
Old Q Values:  [    1.79252736  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [    7.97634198  -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  51
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  23.96466231   26.19777011    4.20678389 -244.98066897]
------
Step:10, Action:South
State  191
Old Q Values:  [3.06655861 0.         0.         0.        ]
New Q values:  [3.06655861 1.79290259 0.         0.        ]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    7.97634198  -289.59534477 -1184.80072168  -251.53897752]
------
Step:11, Action:North
State  260
Old Q Values:  [-3.02434417e+02 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
New Q values:  [-1.12466003e+02 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094  0.         30.35921172  0.        ]
------
Step:12, Action:East
State  188
Old Q Values:  [-1343.6616728   -247.57994041     7.07015435     0.        ]
New Q values:  [-1343.6616728   -247.57994041     4.93698103     0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[0.         0.         0.         9.02973096]
------
Step:13, Action:East
State  206
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.         0.         6.60224881 0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  2.40074960e+01]
------
Step:14, Action:West
State  218
Old Q Values:  [ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  2.40074960e+01]
New Q values:  [ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  1.09836731e+01]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[0.         0.         6.60224881 0.        ]
------
Step:15, Action:East
State  204
Old Q Values:  [0.         0.         0.         9.02973096]
New Q values:  [ 0.          0.         -0.32153244  9.02973096]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 9.28225205e-01 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
------
Step:16, Action:North
State  216
Old Q Values:  [ 9.28225205e-01 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
New Q values:  [   34.25303435  -131.73529819 -6170.35693855 -5999.96410919]
Reward: -1  Episode Reward:  44
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6          114.93914755  -179.38454759 -2392.88066071]
------
Step:17, Action:South
State  136
Old Q Values:  [-6180.6          114.93914755  -179.38454759 -2392.88066071]
New Q values:  [-6180.6           55.65156932  -179.38454759 -2392.88066071]
Reward: -1  Episode Reward:  43
xxxxx
x  gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   34.25303435  -131.73529819 -6170.35693855 -5999.96410919]
------
Step:18, Action:South
State  208
Old Q Values:  [-5815.37790103  -624.23739841 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103 60451.26626811 -2651.70614553   314.32168908]
Reward: 100009  Episode Reward:  100052
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6           8.44544534]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6           8.44544534]
New Q values:  [-180.6          -0.46471887 -180.6          12.76385096]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.32855761e+01]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.32855761e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.62415339e+01]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   18.42434499    3.18798634 -272.09726687]
------
Step:3, Action:South
State  109
Old Q Values:  [ -241.10880094    10.74931661 -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    17.55905768 -6443.21937065  -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x g x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  23.96466231   26.19777011    4.20678389 -244.98066897]
------
Step:4, Action:South
State  189
Old Q Values:  [  23.96466231   26.19777011    4.20678389 -244.98066897]
New Q values:  [  23.96466231   18.27201064    4.20678389 -244.98066897]
Reward: 9  Episode Reward:  36
xxxxx
x  gx
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    7.97634198  -289.59534477 -1184.80072168  -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [    7.97634198  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [    9.77993549  -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  35
xxxxx
x g x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  23.96466231   18.27201064    4.20678389 -244.98066897]
------
Step:6, Action:North
State  188
Old Q Values:  [-1343.6616728   -247.57994041     4.93698103     0.        ]
New Q values:  [-6.52378898e+03 -2.47579940e+02  4.93698103e+00  0.00000000e+00]
Reward: -10001  Episode Reward:  -9966
xxxxx
xg  x
x  .x
x ..x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  253.31231561 -8656.02923281 -7525.7277781   2318.53742492]
------
Step:1, Action:West
State  288
Old Q Values:  [  253.31231561 -8656.02923281 -7525.7277781   2318.53742492]
New Q values:  [  253.31231561 -8656.02923281 -7525.7277781   1261.82750753]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  438.79019419 1096.70845853]
------
Step:2, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  438.79019419 1096.70845853]
New Q values:  [  37.74111519 -168.92307549  438.79019419  447.01736406]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x...x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[    9.77993549  -289.59534477 -1184.80072168  -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [    9.77993549  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [  435.74255256  -289.59534477 -1184.80072168  -251.53897752]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.19276907e+00  1.33335061e+00  1.42143526e+03 -1.80600000e+02]
------
Step:4, Action:East
State  183
Old Q Values:  [ 9.91988058  2.74107485 11.3032787   0.        ]
New Q values:  [ 9.91988058  2.74107485 77.6781356   0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x.. x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[ 38.85388605 225.8560804    5.94588357   0.        ]
------
Step:5, Action:South
State  193
Old Q Values:  [-5922.26708831   837.99626348 -9791.50993282     0.        ]
New Q values:  [-5922.26708831   468.70371461 -9791.50993282     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x.. x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  438.79019419  447.01736406]
------
Step:6, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  438.79019419  447.01736406]
New Q values:  [  37.74111519 -168.92307549  438.79019419  308.92971139]
Reward: -1  Episode Reward:  34
xxxxx
x.. x
x  .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  435.74255256  -289.59534477 -1184.80072168  -251.53897752]
------
Step:7, Action:North
State  261
Old Q Values:  [  435.74255256  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [  197.0004617   -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  33
xxxxx
x.. x
xa .x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 9.91988058  2.74107485 77.6781356   0.        ]
------
Step:8, Action:East
State  181
Old Q Values:  [ 2.19276907e+00  1.33335061e+00  1.42143526e+03 -1.80600000e+02]
New Q values:  [ 2.19276907e+00  1.33335061e+00  1.87563288e+03 -1.80600000e+02]
Reward: -10001  Episode Reward:  -9968
xxxxx
x.. x
x g.x
x   x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   18.42434499    3.18798634 -272.09726687]
------
Step:1, Action:South
State  110
Old Q Values:  [-1.80600000e+02 -1.42285929e+03 -1.23006946e-01 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -1.06463407e+03 -1.23006946e-01 -1.80600000e+02]
Reward: -9991  Episode Reward:  -9991
xxxxx
x ..x
xg .x
x...x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  197.0004617   -289.59534477 -1184.80072168  -251.53897752]
------
Step:1, Action:North
State  261
Old Q Values:  [  197.0004617   -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [  107.50362536  -289.59534477 -1184.80072168  -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa. x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 9.91988058  2.74107485 77.6781356   0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [ 2.19276907e+00  1.33335061e+00  1.87563288e+03 -1.80600000e+02]
New Q values:  [   2.19276907    1.33335061  896.26426613 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   468.70371461 -9791.50993282     0.        ]
------
Step:3, Action:South
State  195
Old Q Values:  [ 38.85388605 225.8560804    5.94588357   0.        ]
New Q values:  [ 38.85388605 227.37949042   5.94588357   0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  438.79019419  308.92971139]
------
Step:4, Action:West
State  272
Old Q Values:  [ -230.14820988 -6212.61234477 -1970.61654415    35.80291107]
New Q values:  [ -230.14820988 -6212.61234477 -1970.61654415    45.97225204]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  107.50362536  -289.59534477 -1184.80072168  -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [  107.50362536  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [  311.28072998  -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  25
xxxxx
x...x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   2.19276907    1.33335061  896.26426613 -180.6       ]
------
Step:6, Action:North
State  180
Old Q Values:  [-3.43106190e+03 -1.28068150e+01  1.83303655e+04  0.00000000e+00]
New Q values:  [-1.36702476e+03 -1.28068150e+01  1.83303655e+04  0.00000000e+00]
Reward: 9  Episode Reward:  34
xxxxx
xa..x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-1.80600000e+02 -8.39473682e+03 -6.00000000e-01  0.00000000e+00]
------
Step:7, Action:West
State  102
Old Q Values:  [-1.80600000e+02 -8.39473682e+03 -6.00000000e-01  0.00000000e+00]
New Q values:  [-1.80600000e+02 -8.39473682e+03 -6.00000000e-01 -1.80600000e+02]
Reward: -301  Episode Reward:  -267
xxxxx
xa..x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-1.80600000e+02 -8.39473682e+03 -6.00000000e-01 -1.80600000e+02]
------
Step:8, Action:East
State  102
Old Q Values:  [-1.80600000e+02 -8.39473682e+03 -6.00000000e-01 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -8.39473682e+03  5.16000000e+00 -1.80600000e+02]
Reward: 9  Episode Reward:  -258
xxxxx
x a.x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[ -180.6 -6000.6     0.      0. ]
------
Step:9, Action:East
State  126
Old Q Values:  [0.        0.        1.1292464 0.       ]
New Q values:  [0.         0.         9.68085385 0.        ]
Reward: 9  Episode Reward:  -249
xxxxx
x  ax
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6          12.76385096]
------
Step:10, Action:West
State  136
Old Q Values:  [-6180.6           55.65156932  -179.38454759 -2392.88066071]
New Q values:  [-6180.6           55.65156932  -179.38454759 -6942.80389713]
Reward: -10001  Episode Reward:  -10250
xxxxx
x g x
x   x
x  .x
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.62415339e+01]
------
Step:1, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -1.34230121e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -6.51724480e+03]
Reward: -9991  Episode Reward:  -9991
xxxxx
xg .x
x ..x
x...x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          -0.46471887 -180.6          12.76385096]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6          12.76385096]
New Q values:  [-180.6          -0.46471887 -180.6          15.37800056]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.62415339e+01]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.62415339e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.18597115e+01]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -1.06463407e+03 -1.23006946e-01 -1.80600000e+02]
------
Step:3, Action:East
State  111
Old Q Values:  [-177.44732869   18.42434499    3.18798634 -272.09726687]
New Q values:  [-177.44732869   18.42434499    4.23310798 -272.09726687]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.18597115e+01]
------
Step:4, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.18597115e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  9.67118809e+00]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   18.42434499    4.23310798 -272.09726687]
------
Step:5, Action:South
State  110
Old Q Values:  [-1.80600000e+02 -1.06463407e+03 -1.23006946e-01 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -2.64921308e+02 -1.23006946e-01 -1.80600000e+02]
Reward: -1  Episode Reward:  15
xxxxx
x   x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   538.44106874     0.        ]
------
Step:6, Action:East
State  188
Old Q Values:  [-6.52378898e+03 -2.47579940e+02  4.93698103e+00  0.00000000e+00]
New Q values:  [-6523.78898263  -247.57994041    12.21705441     0.        ]
Reward: 9  Episode Reward:  24
xxxxx
x   x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 5.06128434e-01  1.61408733e+01 -1.33592424e-02 -8.40000000e-01]
------
Step:7, Action:South
State  196
Old Q Values:  [-2469.90645144   196.04597662    72.14653931     0.        ]
New Q values:  [-2469.90645144   123.10627483    72.14653931     0.        ]
Reward: 9  Episode Reward:  33
xxxxx
x   x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         130.95961395   52.45471688]
------
Step:8, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  438.79019419  308.92971139]
New Q values:  [  37.74111519 -168.92307549  559.46432994  308.92971139]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  253.31231561 -8656.02923281 -7525.7277781   1261.82750753]
------
Step:9, Action:West
State  288
Old Q Values:  [  253.31231561 -8656.02923281 -7525.7277781   1261.82750753]
New Q values:  [  253.31231561 -8656.02923281 -7525.7277781    517.92267862]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -230.14820988 -6212.61234477 -1970.61654415    45.97225204]
------
Step:10, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         130.95961395   52.45471688]
New Q values:  [   1.6181791  -180.6         130.95961395  119.76610575]
Reward: 9  Episode Reward:  50
xxxxx
x   x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  311.28072998  -289.59534477 -1184.80072168  -251.53897752]
------
Step:11, Action:North
State  260
Old Q Values:  [-1.12466003e+02 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
New Q values:  [ 1.15945919e+02 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xa .x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   538.44106874     0.        ]
------
Step:12, Action:East
State  180
Old Q Values:  [-1.36702476e+03 -1.28068150e+01  1.83303655e+04  0.00000000e+00]
New Q values:  [-1367.02476015   -12.80681502  7368.4780742      0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   123.10627483    72.14653931     0.        ]
------
Step:13, Action:South
State  196
Old Q Values:  [-2469.90645144   123.10627483    72.14653931     0.        ]
New Q values:  [-2469.90645144    87.93039412    72.14653931     0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         130.95961395  119.76610575]
------
Step:14, Action:East
State  276
Old Q Values:  [   1.6181791  -180.6         130.95961395  119.76610575]
New Q values:  [   1.6181791  -180.6         207.16064917  119.76610575]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  253.31231561 -8656.02923281 -7525.7277781    517.92267862]
------
Step:15, Action:North
State  288
Old Q Values:  [  253.31231561 -8656.02923281 -7525.7277781    517.92267862]
New Q values:  [78242.10480668 -8656.02923281 -7525.7277781    517.92267862]
Reward: 100009  Episode Reward:  100055
xxxxx
x   x
x gax
x   x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.69190981e+00  9.67118809e+00]
------
Step:1, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  9.67118809e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.00889132e+01]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    2.61561149    2.73479334 -252.78192178]
------
Step:2, Action:East
State  107
Old Q Values:  [-252.35169558    2.61561149    2.73479334 -252.78192178]
New Q values:  [-252.35169558    2.61561149    1.40383269 -252.78192178]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  1.25127950e+00  3.03305120e+00]
------
Step:3, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  1.25127950e+00  3.03305120e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  1.25127950e+00  1.39790392e+00]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    2.61561149    1.40383269 -252.78192178]
------
Step:4, Action:South
State  109
Old Q Values:  [ -241.10880094    17.55905768 -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    19.61302176 -6443.21937065  -180.6       ]
Reward: 9  Episode Reward:  16
xxxxx
x  gx
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  23.96466231   18.27201064    4.20678389 -244.98066897]
------
Step:5, Action:North
State  181
Old Q Values:  [   2.19276907    1.33335061  896.26426613 -180.6       ]
New Q values:  [   6.16101416    1.33335061  896.26426613 -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
xag.x
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    19.61302176 -6443.21937065  -180.6       ]
------
Step:6, Action:South
State  109
Old Q Values:  [ -241.10880094    19.61302176 -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    14.4346074  -6443.21937065  -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x  gx
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  23.96466231   18.27201064    4.20678389 -244.98066897]
------
Step:7, Action:North
State  189
Old Q Values:  [  23.96466231   18.27201064    4.20678389 -244.98066897]
New Q values:  [  14.51316842   18.27201064    4.20678389 -244.98066897]
Reward: -1  Episode Reward:  13
xxxxx
xa .x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   18.42434499    4.23310798 -272.09726687]
------
Step:8, Action:South
State  109
Old Q Values:  [ -241.10880094    14.4346074  -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    10.65544615 -6443.21937065  -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
x  gx
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842   18.27201064    4.20678389 -244.98066897]
------
Step:9, Action:South
State  181
Old Q Values:  [   6.16101416    1.33335061  896.26426613 -180.6       ]
New Q values:  [   6.16101416   99.31755924  896.26426613 -180.6       ]
Reward: 9  Episode Reward:  21
xxxxx
x g.x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  311.28072998  -289.59534477 -1184.80072168  -251.53897752]
------
Step:10, Action:North
State  260
Old Q Values:  [ 1.15945919e+02 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
New Q values:  [ 2.25632179e+03 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
Reward: -1  Episode Reward:  20
xxxxx
xg .x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   -12.80681502  7368.4780742      0.        ]
------
Step:11, Action:East
State  180
Old Q Values:  [-1367.02476015   -12.80681502  7368.4780742      0.        ]
New Q values:  [-1367.02476015   -12.80681502  2973.17034792     0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x  .x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144    87.93039412    72.14653931     0.        ]
------
Step:12, Action:South
State  198
Old Q Values:  [-2.78872080e-01 -3.23445845e+03  0.00000000e+00  0.00000000e+00]
New Q values:  [-2.78872080e-01 -1.22623518e+03  0.00000000e+00  0.00000000e+00]
Reward: 9  Episode Reward:  28
xxxxx
x  .x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         207.16064917  119.76610575]
------
Step:13, Action:East
State  276
Old Q Values:  [   1.6181791  -180.6         207.16064917  119.76610575]
New Q values:  [ 1.61817910e+00 -1.80600000e+02  2.35608957e+04  1.19766106e+02]
Reward: 9  Episode Reward:  37
xxxxx
x  .x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[78242.10480668 -8656.02923281 -7525.7277781    517.92267862]
------
Step:14, Action:North
State  288
Old Q Values:  [78242.10480668 -8656.02923281 -7525.7277781    517.92267862]
New Q values:  [49437.6218031  -8656.02923281 -7525.7277781    517.92267862]
Reward: 9  Episode Reward:  46
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103 60451.26626811 -2651.70614553   314.32168908]
------
Step:15, Action:South
State  208
Old Q Values:  [-5815.37790103 60451.26626811 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103 39011.19304817 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  45
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[49437.6218031  -8656.02923281 -7525.7277781    517.92267862]
------
Step:16, Action:North
State  288
Old Q Values:  [49437.6218031  -8656.02923281 -7525.7277781    517.92267862]
New Q values:  [31477.80663569 -8656.02923281 -7525.7277781    517.92267862]
Reward: -1  Episode Reward:  44
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103 39011.19304817 -2651.70614553   314.32168908]
------
Step:17, Action:South
State  208
Old Q Values:  [-5815.37790103 39011.19304817 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103 25047.21920998 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  43
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[31477.80663569 -8656.02923281 -7525.7277781    517.92267862]
------
Step:18, Action:North
State  288
Old Q Values:  [31477.80663569 -8656.02923281 -7525.7277781    517.92267862]
New Q values:  [20104.68841727 -8656.02923281 -7525.7277781    517.92267862]
Reward: -1  Episode Reward:  42
xxxxx
x  gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103 25047.21920998 -2651.70614553   314.32168908]
------
Step:19, Action:South
State  208
Old Q Values:  [-5815.37790103 25047.21920998 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103 16049.69420917 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20104.68841727 -8656.02923281 -7525.7277781    517.92267862]
------
Step:20, Action:West
State  288
Old Q Values:  [20104.68841727 -8656.02923281 -7525.7277781    517.92267862]
New Q values:  [20104.68841727 -8656.02923281 -7525.7277781    374.40837043]
Reward: -1  Episode Reward:  40
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  559.46432994  308.92971139]
------
Step:21, Action:East
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.02996406e+03 0.00000000e+00]
New Q values:  [1.64433000e+00 0.00000000e+00 7.24279215e+03 0.00000000e+00]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20104.68841727 -8656.02923281 -7525.7277781    374.40837043]
------
Step:22, Action:West
State  288
Old Q Values:  [20104.68841727 -8656.02923281 -7525.7277781    374.40837043]
New Q values:  [20104.68841727 -8656.02923281 -7525.7277781   7217.43205867]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1.61817910e+00 -1.80600000e+02  2.35608957e+04  1.19766106e+02]
------
Step:23, Action:East
State  276
Old Q Values:  [ 1.61817910e+00 -1.80600000e+02  2.35608957e+04  1.19766106e+02]
New Q values:  [ 1.61817910e+00 -1.80600000e+02  1.54551648e+04  1.19766106e+02]
Reward: -1  Episode Reward:  37
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20104.68841727 -8656.02923281 -7525.7277781   7217.43205867]
------
Step:24, Action:North
State  288
Old Q Values:  [20104.68841727 -8656.02923281 -7525.7277781   7217.43205867]
New Q values:  [12856.18362966 -8656.02923281 -7525.7277781   7217.43205867]
Reward: -1  Episode Reward:  36
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103 16049.69420917 -2651.70614553   314.32168908]
------
Step:25, Action:South
State  208
Old Q Values:  [-5815.37790103 16049.69420917 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103 10276.13277257 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  35
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12856.18362966 -8656.02923281 -7525.7277781   7217.43205867]
------
Step:26, Action:North
State  288
Old Q Values:  [12856.18362966 -8656.02923281 -7525.7277781   7217.43205867]
New Q values:  [ 8224.71328363 -8656.02923281 -7525.7277781   7217.43205867]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103 10276.13277257 -2651.70614553   314.32168908]
------
Step:27, Action:South
State  208
Old Q Values:  [-5815.37790103 10276.13277257 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103  6577.26709412 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  33
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8224.71328363 -8656.02923281 -7525.7277781   7217.43205867]
------
Step:28, Action:North
State  288
Old Q Values:  [ 8224.71328363 -8656.02923281 -7525.7277781   7217.43205867]
New Q values:  [ 5262.46544169 -8656.02923281 -7525.7277781   7217.43205867]
Reward: -1  Episode Reward:  32
xxxxx
x  gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103  6577.26709412 -2651.70614553   314.32168908]
------
Step:29, Action:South
State  208
Old Q Values:  [-5815.37790103  6577.26709412 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103  4795.53645525 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  31
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5262.46544169 -8656.02923281 -7525.7277781   7217.43205867]
------
Step:30, Action:West
State  288
Old Q Values:  [ 5262.46544169 -8656.02923281 -7525.7277781   7217.43205867]
New Q values:  [ 5262.46544169 -8656.02923281 -7525.7277781   7522.92226522]
Reward: -1  Episode Reward:  30
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1.61817910e+00 -1.80600000e+02  1.54551648e+04  1.19766106e+02]
------
Step:31, Action:East
State  276
Old Q Values:  [ 1.61817910e+00 -1.80600000e+02  1.54551648e+04  1.19766106e+02]
New Q values:  [ 1.61817910e+00 -1.80600000e+02  8.43834260e+03  1.19766106e+02]
Reward: -1  Episode Reward:  29
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5262.46544169 -8656.02923281 -7525.7277781   7522.92226522]
------
Step:32, Action:West
State  288
Old Q Values:  [ 5262.46544169 -8656.02923281 -7525.7277781   7522.92226522]
New Q values:  [ 5262.46544169 -8656.02923281 -7525.7277781   5540.07168666]
Reward: -1  Episode Reward:  28
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1.61817910e+00 -1.80600000e+02  8.43834260e+03  1.19766106e+02]
------
Step:33, Action:East
State  276
Old Q Values:  [ 1.61817910e+00 -1.80600000e+02  8.43834260e+03  1.19766106e+02]
New Q values:  [ 1.61817910e+00 -1.80600000e+02  5.03675855e+03  1.19766106e+02]
Reward: -1  Episode Reward:  27
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5262.46544169 -8656.02923281 -7525.7277781   5540.07168666]
------
Step:34, Action:West
State  288
Old Q Values:  [ 5262.46544169 -8656.02923281 -7525.7277781   5540.07168666]
New Q values:  [ 5262.46544169 -8656.02923281 -7525.7277781   3726.45623869]
Reward: -1  Episode Reward:  26
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1.61817910e+00 -1.80600000e+02  5.03675855e+03  1.19766106e+02]
------
Step:35, Action:East
State  276
Old Q Values:  [ 1.61817910e+00 -1.80600000e+02  5.03675855e+03  1.19766106e+02]
New Q values:  [ 1.61817910e+00 -1.80600000e+02  3.59284305e+03  1.19766106e+02]
Reward: -1  Episode Reward:  25
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5262.46544169 -8656.02923281 -7525.7277781   3726.45623869]
------
Step:36, Action:North
State  288
Old Q Values:  [ 5262.46544169 -8656.02923281 -7525.7277781   3726.45623869]
New Q values:  [ 3543.04711325 -8656.02923281 -7525.7277781   3726.45623869]
Reward: -1  Episode Reward:  24
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103  4795.53645525 -2651.70614553   314.32168908]
------
Step:37, Action:South
State  208
Old Q Values:  [-5815.37790103  4795.53645525 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103  3035.55145371 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  23
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3543.04711325 -8656.02923281 -7525.7277781   3726.45623869]
------
Step:38, Action:West
State  288
Old Q Values:  [ 3543.04711325 -8656.02923281 -7525.7277781   3726.45623869]
New Q values:  [ 3543.04711325 -8656.02923281 -7525.7277781   1657.82179446]
Reward: -1  Episode Reward:  22
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  559.46432994  308.92971139]
------
Step:39, Action:East
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 7.24279215e+03 0.00000000e+00]
New Q values:  [1.64433000e+00 0.00000000e+00 3.95943099e+03 0.00000000e+00]
Reward: -1  Episode Reward:  21
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3543.04711325 -8656.02923281 -7525.7277781   1657.82179446]
------
Step:40, Action:West
State  288
Old Q Values:  [ 3543.04711325 -8656.02923281 -7525.7277781   1657.82179446]
New Q values:  [ 3543.04711325 -8656.02923281 -7525.7277781    830.36801676]
Reward: -1  Episode Reward:  20
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  559.46432994  308.92971139]
------
Step:41, Action:East
State  276
Old Q Values:  [ 1.61817910e+00 -1.80600000e+02  3.59284305e+03  1.19766106e+02]
New Q values:  [ 1.61817910e+00 -1.80600000e+02  2.49945135e+03  1.19766106e+02]
Reward: -1  Episode Reward:  19
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3543.04711325 -8656.02923281 -7525.7277781    830.36801676]
------
Step:42, Action:North
State  288
Old Q Values:  [ 3543.04711325 -8656.02923281 -7525.7277781    830.36801676]
New Q values:  [ 2327.28428141 -8656.02923281 -7525.7277781    830.36801676]
Reward: -1  Episode Reward:  18
xxxxx
x  gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103  3035.55145371 -2651.70614553   314.32168908]
------
Step:43, Action:South
State  208
Old Q Values:  [-5815.37790103  3035.55145371 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103  1911.80586591 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  17
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2327.28428141 -8656.02923281 -7525.7277781    830.36801676]
------
Step:44, Action:West
State  288
Old Q Values:  [ 2327.28428141 -8656.02923281 -7525.7277781    830.36801676]
New Q values:  [ 2327.28428141 -8656.02923281 -7525.7277781    499.38650569]
Reward: -1  Episode Reward:  16
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  559.46432994  308.92971139]
------
Step:45, Action:East
State  276
Old Q Values:  [ 1.61817910e+00 -1.80600000e+02  2.49945135e+03  1.19766106e+02]
New Q values:  [ 1.61817910e+00 -1.80600000e+02  1.69736583e+03  1.19766106e+02]
Reward: -1  Episode Reward:  15
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2327.28428141 -8656.02923281 -7525.7277781    499.38650569]
------
Step:46, Action:North
State  288
Old Q Values:  [ 2327.28428141 -8656.02923281 -7525.7277781    499.38650569]
New Q values:  [ 1503.85547234 -8656.02923281 -7525.7277781    499.38650569]
Reward: -1  Episode Reward:  14
xxxxx
x  gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103  1911.80586591 -2651.70614553   314.32168908]
------
Step:47, Action:South
State  208
Old Q Values:  [-5815.37790103  1911.80586591 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103  1215.27898806 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  13
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1503.85547234 -8656.02923281 -7525.7277781    499.38650569]
------
Step:48, Action:North
State  288
Old Q Values:  [ 1503.85547234 -8656.02923281 -7525.7277781    499.38650569]
New Q values:  [  965.52588535 -8656.02923281 -7525.7277781    499.38650569]
Reward: -1  Episode Reward:  12
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103  1215.27898806 -2651.70614553   314.32168908]
------
Step:49, Action:South
State  208
Old Q Values:  [-5815.37790103  1215.27898806 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103   775.16936083 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  11
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  965.52588535 -8656.02923281 -7525.7277781    499.38650569]
------
Step:50, Action:North
State  288
Old Q Values:  [  965.52588535 -8656.02923281 -7525.7277781    499.38650569]
New Q values:  [  618.16116239 -8656.02923281 -7525.7277781    499.38650569]
Reward: -1  Episode Reward:  10
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103   775.16936083 -2651.70614553   314.32168908]
------
Step:51, Action:South
State  208
Old Q Values:  [-5815.37790103   775.16936083 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103   494.91609305 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  9
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  618.16116239 -8656.02923281 -7525.7277781    499.38650569]
------
Step:52, Action:North
State  288
Old Q Values:  [  618.16116239 -8656.02923281 -7525.7277781    499.38650569]
New Q values:  [  395.13929287 -8656.02923281 -7525.7277781    499.38650569]
Reward: -1  Episode Reward:  8
xxxxx
x  gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103   494.91609305 -2651.70614553   314.32168908]
------
Step:53, Action:South
State  208
Old Q Values:  [-5815.37790103   494.91609305 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103   347.18238893 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  7
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  395.13929287 -8656.02923281 -7525.7277781    499.38650569]
------
Step:54, Action:West
State  288
Old Q Values:  [  395.13929287 -8656.02923281 -7525.7277781    499.38650569]
New Q values:  [  395.13929287 -8656.02923281 -7525.7277781    708.36435014]
Reward: -1  Episode Reward:  6
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1.61817910e+00 -1.80600000e+02  1.69736583e+03  1.19766106e+02]
------
Step:55, Action:East
State  276
Old Q Values:  [ 1.61817910e+00 -1.80600000e+02  1.69736583e+03  1.19766106e+02]
New Q values:  [   1.6181791  -180.6         890.85563552  119.76610575]
Reward: -1  Episode Reward:  5
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  395.13929287 -8656.02923281 -7525.7277781    708.36435014]
------
Step:56, Action:West
State  288
Old Q Values:  [  395.13929287 -8656.02923281 -7525.7277781    708.36435014]
New Q values:  [  395.13929287 -8656.02923281 -7525.7277781    550.00243071]
Reward: -1  Episode Reward:  4
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         890.85563552  119.76610575]
------
Step:57, Action:East
State  276
Old Q Values:  [   1.6181791  -180.6         890.85563552  119.76610575]
New Q values:  [   1.6181791  -180.6         520.74298342  119.76610575]
Reward: -1  Episode Reward:  3
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  395.13929287 -8656.02923281 -7525.7277781    550.00243071]
------
Step:58, Action:West
State  288
Old Q Values:  [  395.13929287 -8656.02923281 -7525.7277781    550.00243071]
New Q values:  [  395.13929287 -8656.02923281 -7525.7277781    375.62386731]
Reward: -1  Episode Reward:  2
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         520.74298342  119.76610575]
------
Step:59, Action:East
State  276
Old Q Values:  [   1.6181791  -180.6         520.74298342  119.76610575]
New Q values:  [   1.6181791  -180.6         326.23898123  119.76610575]
Reward: -1  Episode Reward:  1
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  395.13929287 -8656.02923281 -7525.7277781    375.62386731]
------
Step:60, Action:North
State  288
Old Q Values:  [  395.13929287 -8656.02923281 -7525.7277781    375.62386731]
New Q values:  [  261.61043383 -8656.02923281 -7525.7277781    375.62386731]
Reward: -1  Episode Reward:  0
xxxxx
x  gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103   347.18238893 -2651.70614553   314.32168908]
------
Step:61, Action:South
State  208
Old Q Values:  [-5815.37790103   347.18238893 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103   250.96011576 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  -1
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  261.61043383 -8656.02923281 -7525.7277781    375.62386731]
------
Step:62, Action:West
State  288
Old Q Values:  [  261.61043383 -8656.02923281 -7525.7277781    375.62386731]
New Q values:  [  261.61043383 -8656.02923281 -7525.7277781    317.48884591]
Reward: -1  Episode Reward:  -2
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  559.46432994  308.92971139]
------
Step:63, Action:East
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.95943099e+03 0.00000000e+00]
New Q values:  [1.64433000e+00 0.00000000e+00 1.67841905e+03 0.00000000e+00]
Reward: -1  Episode Reward:  -3
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  261.61043383 -8656.02923281 -7525.7277781    317.48884591]
------
Step:64, Action:West
State  288
Old Q Values:  [  261.61043383 -8656.02923281 -7525.7277781    317.48884591]
New Q values:  [  261.61043383 -8656.02923281 -7525.7277781    294.23483734]
Reward: -1  Episode Reward:  -4
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  559.46432994  308.92971139]
------
Step:65, Action:East
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 1.67841905e+03 0.00000000e+00]
New Q values:  [  1.64433      0.         759.03807178   0.        ]
Reward: -1  Episode Reward:  -5
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  261.61043383 -8656.02923281 -7525.7277781    294.23483734]
------
Step:66, Action:West
State  288
Old Q Values:  [  261.61043383 -8656.02923281 -7525.7277781    294.23483734]
New Q values:  [  261.61043383 -8656.02923281 -7525.7277781    284.93323392]
Reward: -1  Episode Reward:  -6
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  559.46432994  308.92971139]
------
Step:67, Action:East
State  277
Old Q Values:  [  1.64433      0.         759.03807178   0.        ]
New Q values:  [  1.64433      0.         388.49519889   0.        ]
Reward: -1  Episode Reward:  -7
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  261.61043383 -8656.02923281 -7525.7277781    284.93323392]
------
Step:68, Action:West
State  288
Old Q Values:  [  261.61043383 -8656.02923281 -7525.7277781    284.93323392]
New Q values:  [  261.61043383 -8656.02923281 -7525.7277781    211.24498794]
Reward: -1  Episode Reward:  -8
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         326.23898123  119.76610575]
------
Step:69, Action:East
State  276
Old Q Values:  [   1.6181791  -180.6         326.23898123  119.76610575]
New Q values:  [   1.6181791  -180.6         208.37872264  119.76610575]
Reward: -1  Episode Reward:  -9
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  261.61043383 -8656.02923281 -7525.7277781    211.24498794]
------
Step:70, Action:North
State  288
Old Q Values:  [  261.61043383 -8656.02923281 -7525.7277781    211.24498794]
New Q values:  [  160.57451889 -8656.02923281 -7525.7277781    211.24498794]
Reward: -1  Episode Reward:  -10
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   4.24777227  188.43448452 -180.6           6.63136808]
------
Step:71, Action:South
State  208
Old Q Values:  [-5815.37790103   250.96011576 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103   163.15754269 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  -11
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  160.57451889 -8656.02923281 -7525.7277781    211.24498794]
------
Step:72, Action:West
State  288
Old Q Values:  [  160.57451889 -8656.02923281 -7525.7277781    211.24498794]
New Q values:  [  160.57451889 -8656.02923281 -7525.7277781    146.41161197]
Reward: -1  Episode Reward:  -12
xxxxx
x  .x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         208.37872264  119.76610575]
------
Step:73, Action:East
State  276
Old Q Values:  [   1.6181791  -180.6         208.37872264  119.76610575]
New Q values:  [   1.6181791  -180.6         130.92384472  119.76610575]
Reward: -1  Episode Reward:  -13
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  160.57451889 -8656.02923281 -7525.7277781    146.41161197]
------
Step:74, Action:North
State  288
Old Q Values:  [  160.57451889 -8656.02923281 -7525.7277781    146.41161197]
New Q values:  [  120.16015291 -8656.02923281 -7525.7277781    146.41161197]
Reward: -1  Episode Reward:  -14
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   4.24777227  188.43448452 -180.6           6.63136808]
------
Step:75, Action:South
State  208
Old Q Values:  [-5815.37790103   163.15754269 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103   108.58650066 -2651.70614553   314.32168908]
Reward: -1  Episode Reward:  -15
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  120.16015291 -8656.02923281 -7525.7277781    146.41161197]
------
Step:76, Action:West
State  288
Old Q Values:  [  120.16015291 -8656.02923281 -7525.7277781    146.41161197]
New Q values:  [  120.16015291 -8656.02923281 -7525.7277781     97.2417982 ]
Reward: -1  Episode Reward:  -16
xxxxx
x  .x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         130.92384472  119.76610575]
------
Step:77, Action:East
State  276
Old Q Values:  [   1.6181791  -180.6         130.92384472  119.76610575]
New Q values:  [   1.6181791  -180.6          87.81758376  119.76610575]
Reward: -1  Episode Reward:  -17
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  120.16015291 -8656.02923281 -7525.7277781     97.2417982 ]
------
Step:78, Action:North
State  288
Old Q Values:  [  120.16015291 -8656.02923281 -7525.7277781     97.2417982 ]
New Q values:  [  141.76056789 -8656.02923281 -7525.7277781     97.2417982 ]
Reward: -1  Episode Reward:  -18
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103   108.58650066 -2651.70614553   314.32168908]
------
Step:79, Action:West
State  208
Old Q Values:  [-5815.37790103   108.58650066 -2651.70614553   314.32168908]
New Q values:  [-5815.37790103   108.58650066 -2651.70614553   151.50779387]
Reward: -1  Episode Reward:  -19
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144    87.93039412    72.14653931     0.        ]
------
Step:80, Action:South
State  196
Old Q Values:  [-2469.90645144    87.93039412    72.14653931     0.        ]
New Q values:  [-2469.90645144    70.50198937    72.14653931     0.        ]
Reward: -1  Episode Reward:  -20
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6          87.81758376  119.76610575]
------
Step:81, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6          87.81758376  119.76610575]
New Q values:  [   1.6181791  -180.6          87.81758376  724.20297929]
Reward: -1  Episode Reward:  -21
xxxxx
x  .x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2.25632179e+03 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
------
Step:82, Action:East
State  260
Old Q Values:  [ 2.25632179e+03 -6.45745980e+03  4.09348504e+00 -2.70217995e+03]
New Q values:  [ 2256.32178999 -6457.4598       218.2982878  -2702.17995449]
Reward: -1  Episode Reward:  -22
xxxxx
x  .x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6          87.81758376  724.20297929]
------
Step:83, Action:East
State  276
Old Q Values:  [   1.6181791  -180.6          87.81758376  724.20297929]
New Q values:  [   1.6181791  -180.6          77.05520387  724.20297929]
Reward: -1  Episode Reward:  -23
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  141.76056789 -8656.02923281 -7525.7277781     97.2417982 ]
------
Step:84, Action:North
State  288
Old Q Values:  [  141.76056789 -8656.02923281 -7525.7277781     97.2417982 ]
New Q values:  [  112.63457251 -8656.02923281 -7525.7277781     97.2417982 ]
Reward: -1  Episode Reward:  -24
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   4.24777227  188.43448452 -180.6           6.63136808]
------
Step:85, Action:South
State  208
Old Q Values:  [-5815.37790103   108.58650066 -2651.70614553   151.50779387]
New Q values:  [-5815.37790103    76.62497202 -2651.70614553   151.50779387]
Reward: -1  Episode Reward:  -25
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  112.63457251 -8656.02923281 -7525.7277781     97.2417982 ]
------
Step:86, Action:North
State  288
Old Q Values:  [  112.63457251 -8656.02923281 -7525.7277781     97.2417982 ]
New Q values:  [   89.90616716 -8656.02923281 -7525.7277781     97.2417982 ]
Reward: -1  Episode Reward:  -26
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103    76.62497202 -2651.70614553   151.50779387]
------
Step:87, Action:South
State  208
Old Q Values:  [-5815.37790103    76.62497202 -2651.70614553   151.50779387]
New Q values:  [-5815.37790103    59.22252827 -2651.70614553   151.50779387]
Reward: -1  Episode Reward:  -27
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   89.90616716 -8656.02923281 -7525.7277781     97.2417982 ]
------
Step:88, Action:West
State  288
Old Q Values:  [   89.90616716 -8656.02923281 -7525.7277781     97.2417982 ]
New Q values:  [   89.90616716 -8656.02923281 -7525.7277781    255.55761307]
Reward: -1  Episode Reward:  -28
xxxxx
x  .x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6          77.05520387  724.20297929]
------
Step:89, Action:East
State  276
Old Q Values:  [   1.6181791  -180.6          77.05520387  724.20297929]
New Q values:  [   1.6181791  -180.6         106.88936547  724.20297929]
Reward: -1  Episode Reward:  -29
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   89.90616716 -8656.02923281 -7525.7277781    255.55761307]
------
Step:90, Action:West
State  288
Old Q Values:  [   89.90616716 -8656.02923281 -7525.7277781    255.55761307]
New Q values:  [   89.90616716 -8656.02923281 -7525.7277781    318.88393902]
Reward: -1  Episode Reward:  -30
xxxxx
x  .x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         106.88936547  724.20297929]
------
Step:91, Action:East
State  276
Old Q Values:  [   1.6181791  -180.6         106.88936547  724.20297929]
New Q values:  [   1.6181791  -180.6         137.82092789  724.20297929]
Reward: -1  Episode Reward:  -31
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   89.90616716 -8656.02923281 -7525.7277781    318.88393902]
------
Step:92, Action:West
State  288
Old Q Values:  [   89.90616716 -8656.02923281 -7525.7277781    318.88393902]
New Q values:  [   89.90616716 -8656.02923281 -7525.7277781    344.21446939]
Reward: -1  Episode Reward:  -32
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  724.20297929]
------
Step:93, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         137.82092789  724.20297929]
New Q values:  [   1.6181791  -180.6         137.82092789  382.46541071]
Reward: -1  Episode Reward:  -33
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  311.28072998  -289.59534477 -1184.80072168  -251.53897752]
------
Step:94, Action:North
State  257
Old Q Values:  [1416.17975972 -180.6         142.30082849 4453.74493219]
New Q values:  [1205.02526603 -180.6         142.30082849 4453.74493219]
Reward: -1  Episode Reward:  -34
xxxxx
x  gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         2130.51120713 -849.04228729    0.        ]
------
Step:95, Action:South
State  181
Old Q Values:  [   6.16101416   99.31755924  896.26426613 -180.6       ]
New Q values:  [   6.16101416  132.51124269  896.26426613 -180.6       ]
Reward: -1  Episode Reward:  -35
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  311.28072998  -289.59534477 -1184.80072168  -251.53897752]
------
Step:96, Action:North
State  257
Old Q Values:  [1205.02526603 -180.6         142.30082849 4453.74493219]
New Q values:  [1120.56346855 -180.6         142.30082849 4453.74493219]
Reward: -1  Episode Reward:  -36
xxxxx
x  gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         2130.51120713 -849.04228729    0.        ]
------
Step:97, Action:South
State  181
Old Q Values:  [   6.16101416  132.51124269  896.26426613 -180.6       ]
New Q values:  [   6.16101416  145.78871607  896.26426613 -180.6       ]
Reward: -1  Episode Reward:  -37
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  311.28072998  -289.59534477 -1184.80072168  -251.53897752]
------
Step:98, Action:North
State  257
Old Q Values:  [1120.56346855 -180.6         142.30082849 4453.74493219]
New Q values:  [1086.77874956 -180.6         142.30082849 4453.74493219]
Reward: -1  Episode Reward:  -38
xxxxx
x  gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         2130.51120713 -849.04228729    0.        ]
------
Step:99, Action:South
State  181
Old Q Values:  [   6.16101416  145.78871607  896.26426613 -180.6       ]
New Q values:  [   6.16101416  151.09970542  896.26426613 -180.6       ]
Reward: -1  Episode Reward:  -39
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  311.28072998  -289.59534477 -1184.80072168  -251.53897752]
------
Step:100, Action:North
State  260
Old Q Values:  [ 2256.32178999 -6457.4598       218.2982878  -2702.17995449]
New Q values:  [ 1793.87982037 -6457.4598       218.2982878  -2702.17995449]
Reward: -1  Episode Reward:  -40
xxxxx
xg .x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   -12.80681502  2973.17034792     0.        ]
------
Step:101, Action:East
State  180
Old Q Values:  [-1367.02476015   -12.80681502  2973.17034792     0.        ]
New Q values:  [-1367.02476015   -12.80681502  1210.31210096     0.        ]
Reward: -1  Episode Reward:  -41
xxxxx
x  .x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144    70.50198937    72.14653931     0.        ]
------
Step:102, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -1.22623518e+03  0.00000000e+00  0.00000000e+00]
New Q values:  [-2.78872080e-01 -1.22623518e+03  5.59303454e+01  0.00000000e+00]
Reward: -1  Episode Reward:  -42
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   4.24777227  188.43448452 -180.6           6.63136808]
------
Step:103, Action:South
State  210
Old Q Values:  [   4.24777227  188.43448452 -180.6           6.63136808]
New Q values:  [   4.24777227  178.03813463 -180.6           6.63136808]
Reward: -1  Episode Reward:  -43
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   89.90616716 -8656.02923281 -7525.7277781    344.21446939]
------
Step:104, Action:North
State  288
Old Q Values:  [   89.90616716 -8656.02923281 -7525.7277781    344.21446939]
New Q values:  [   80.81480503 -8656.02923281 -7525.7277781    344.21446939]
Reward: -1  Episode Reward:  -44
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103    59.22252827 -2651.70614553   151.50779387]
------
Step:105, Action:South
State  208
Old Q Values:  [-5815.37790103    59.22252827 -2651.70614553   151.50779387]
New Q values:  [-5815.37790103   126.35335213 -2651.70614553   151.50779387]
Reward: -1  Episode Reward:  -45
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   80.81480503 -8656.02923281 -7525.7277781    344.21446939]
------
Step:106, Action:West
State  288
Old Q Values:  [   80.81480503 -8656.02923281 -7525.7277781    344.21446939]
New Q values:  [   80.81480503 -8656.02923281 -7525.7277781    251.82541097]
Reward: -1  Episode Reward:  -46
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  382.46541071]
------
Step:107, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         137.82092789  382.46541071]
New Q values:  [   1.6181791  -180.6         137.82092789  690.5501104 ]
Reward: -1  Episode Reward:  -47
xxxxx
x  .x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1793.87982037 -6457.4598       218.2982878  -2702.17995449]
------
Step:108, Action:East
State  260
Old Q Values:  [ 1793.87982037 -6457.4598       218.2982878  -2702.17995449]
New Q values:  [ 1793.87982037 -6457.4598       293.88434824 -2702.17995449]
Reward: -1  Episode Reward:  -48
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  690.5501104 ]
------
Step:109, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         137.82092789  690.5501104 ]
New Q values:  [   1.6181791  -180.6         137.82092789  813.78399027]
Reward: -1  Episode Reward:  -49
xxxxx
xg .x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1793.87982037 -6457.4598       293.88434824 -2702.17995449]
------
Step:110, Action:North
State  261
Old Q Values:  [  311.28072998  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [  392.79157183  -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  -50
xxxxx
x g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.16101416  151.09970542  896.26426613 -180.6       ]
------
Step:111, Action:East
State  180
Old Q Values:  [-1367.02476015   -12.80681502  1210.31210096     0.        ]
New Q values:  [-1367.02476015   -12.80681502   505.16880218     0.        ]
Reward: -1  Episode Reward:  -51
xxxxx
xg .x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144    70.50198937    72.14653931     0.        ]
------
Step:112, Action:East
State  196
Old Q Values:  [-2469.90645144    70.50198937    72.14653931     0.        ]
New Q values:  [-2469.90645144    70.50198937    73.71095389     0.        ]
Reward: -1  Episode Reward:  -52
xxxxx
x g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103   126.35335213 -2651.70614553   151.50779387]
------
Step:113, Action:West
State  208
Old Q Values:  [-5815.37790103   126.35335213 -2651.70614553   151.50779387]
New Q values:  [-5815.37790103   126.35335213 -2651.70614553 -5917.88359629]
Reward: -10001  Episode Reward:  -10053
xxxxx
x  .x
x g x
x   x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   80.81480503 -8656.02923281 -7525.7277781    251.82541097]
------
Step:1, Action:West
State  288
Old Q Values:  [   80.81480503 -8656.02923281 -7525.7277781    251.82541097]
New Q values:  [   80.81480503 -8656.02923281 -7525.7277781    119.92184   ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x ..x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -230.14820988 -6212.61234477 -1970.61654415    45.97225204]
------
Step:2, Action:North
State  272
Old Q Values:  [ -230.14820988 -6212.61234477 -1970.61654415    45.97225204]
New Q values:  [ 7220.99949092 -6212.61234477 -1970.61654415    45.97225204]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xga.x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.62956723e+00 2.43588626e+04 0.00000000e+00]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 9.62956723e+00 2.43588626e+04 0.00000000e+00]
New Q values:  [3.89777037e-01 9.62956723e+00 9.78685104e+03 0.00000000e+00]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103   126.35335213 -2651.70614553 -5917.88359629]
------
Step:4, Action:South
State  208
Old Q Values:  [-5815.37790103   126.35335213 -2651.70614553 -5917.88359629]
New Q values:  [-5815.37790103    85.91789285 -2651.70614553 -5917.88359629]
Reward: -1  Episode Reward:  26
xxxxx
x...x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   80.81480503 -8656.02923281 -7525.7277781    119.92184   ]
------
Step:5, Action:West
State  288
Old Q Values:  [   80.81480503 -8656.02923281 -7525.7277781    119.92184   ]
New Q values:  [   80.81480503 -8656.02923281 -7525.7277781   2213.66858328]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7220.99949092 -6212.61234477 -1970.61654415    45.97225204]
------
Step:6, Action:North
State  272
Old Q Values:  [ 7220.99949092 -6212.61234477 -1970.61654415    45.97225204]
New Q values:  [ 2889.2690917  -6212.61234477 -1970.61654415    45.97225204]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  4.89765111e+00  0.00000000e+00]
------
Step:7, Action:East
State  192
Old Q Values:  [3.89777037e-01 9.62956723e+00 9.78685104e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 9.62956723e+00 3.93991578e+03 0.00000000e+00]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5815.37790103    85.91789285 -2651.70614553 -5917.88359629]
------
Step:8, Action:South
State  208
Old Q Values:  [-5815.37790103    85.91789285 -2651.70614553 -5917.88359629]
New Q values:  [-5815.37790103   697.86773212 -2651.70614553 -5917.88359629]
Reward: -1  Episode Reward:  22
xxxxx
x...x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   80.81480503 -8656.02923281 -7525.7277781   2213.66858328]
------
Step:9, Action:West
State  288
Old Q Values:  [   80.81480503 -8656.02923281 -7525.7277781   2213.66858328]
New Q values:  [   80.81480503 -8656.02923281 -7525.7277781   1751.64816082]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2889.2690917  -6212.61234477 -1970.61654415    45.97225204]
------
Step:10, Action:North
State  272
Old Q Values:  [ 2889.2690917  -6212.61234477 -1970.61654415    45.97225204]
New Q values:  [ 1156.57693201 -6212.61234477 -1970.61654415    45.97225204]
Reward: -1  Episode Reward:  20
xxxxx
x...x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  4.89765111e+00  0.00000000e+00]
------
Step:11, Action:East
State  195
Old Q Values:  [ 38.85388605 227.37949042   5.94588357   0.        ]
New Q values:  [ 38.85388605 227.37949042  55.18979381   0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   4.24777227  178.03813463 -180.6           6.63136808]
------
Step:12, Action:West
State  208
Old Q Values:  [-5815.37790103   697.86773212 -2651.70614553 -5917.88359629]
New Q values:  [-5815.37790103   697.86773212 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   468.70371461 -9791.50993282     0.        ]
------
Step:13, Action:South
State  195
Old Q Values:  [ 38.85388605 227.37949042  55.18979381   0.        ]
New Q values:  [ 38.85388605 258.19109515  55.18979381   0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  559.46432994  308.92971139]
------
Step:14, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  559.46432994  308.92971139]
New Q values:  [  37.74111519 -168.92307549  559.46432994  246.80935611]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  392.79157183  -289.59534477 -1184.80072168  -251.53897752]
------
Step:15, Action:North
State  261
Old Q Values:  [  392.79157183  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [  425.39590857  -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  25
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.16101416  151.09970542  896.26426613 -180.6       ]
------
Step:16, Action:South
State  181
Old Q Values:  [   6.16101416  151.09970542  896.26426613 -180.6       ]
New Q values:  [   6.16101416  187.45865474  896.26426613 -180.6       ]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  425.39590857  -289.59534477 -1184.80072168  -251.53897752]
------
Step:17, Action:North
State  261
Old Q Values:  [  425.39590857  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [  192.86180411  -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  23
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 9.91988058  2.74107485 77.6781356   0.        ]
------
Step:18, Action:East
State  183
Old Q Values:  [ 9.91988058  2.74107485 77.6781356   0.        ]
New Q values:  [ 9.91988058  2.74107485 31.94054957  0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  4.89765111e+00  0.00000000e+00]
------
Step:19, Action:East
State  195
Old Q Values:  [ 38.85388605 258.19109515  55.18979381   0.        ]
New Q values:  [ 38.85388605 258.19109515  74.88735791   0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   4.24777227  178.03813463 -180.6           6.63136808]
------
Step:20, Action:West
State  210
Old Q Values:  [   4.24777227  178.03813463 -180.6           6.63136808]
New Q values:  [   4.24777227  178.03813463 -180.6           3.52184257]
Reward: -1  Episode Reward:  20
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  4.89765111e+00  0.00000000e+00]
------
Step:21, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -2.36054016e+03  4.89765111e+00  0.00000000e+00]
New Q values:  [-6.00000000e-01 -2.36054016e+03  5.47705008e+01  0.00000000e+00]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   4.24777227  178.03813463 -180.6           3.52184257]
------
Step:22, Action:South
State  210
Old Q Values:  [   4.24777227  178.03813463 -180.6           3.52184257]
New Q values:  [   4.24777227  596.1097021  -180.6           3.52184257]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   80.81480503 -8656.02923281 -7525.7277781   1751.64816082]
------
Step:23, Action:West
State  288
Old Q Values:  [   80.81480503 -8656.02923281 -7525.7277781   1751.64816082]
New Q values:  [   80.81480503 -8656.02923281 -7525.7277781  -4952.96765607]
Reward: -10001  Episode Reward:  -9983
xxxxx
x...x
x   x
x g x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           55.65156932  -179.38454759 -6942.80389713]
------
Step:1, Action:South
State  138
Old Q Values:  [-180.6          -0.46471887 -180.6          15.37800056]
New Q values:  [-180.6          15.49002275 -180.6          15.37800056]
Reward: 9  Episode Reward:  9
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   34.25303435  -131.73529819 -6170.35693855 -5999.96410919]
------
Step:2, Action:North
State  208
Old Q Values:  [-5815.37790103   697.86773212 -2651.70614553 -2227.14232413]
New Q values:  [-2310.05568961   697.86773212 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  8
xxxxx
x.gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           55.65156932  -179.38454759 -6942.80389713]
------
Step:3, Action:South
State  136
Old Q Values:  [-6180.6           55.65156932  -179.38454759 -6942.80389713]
New Q values:  [-6180.6           31.93653803  -179.38454759 -6942.80389713]
Reward: -1  Episode Reward:  7
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   34.25303435  -131.73529819 -6170.35693855 -5999.96410919]
------
Step:4, Action:North
State  208
Old Q Values:  [-2310.05568961   697.86773212 -2651.70614553 -2227.14232413]
New Q values:  [ -915.04131444   697.86773212 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  6
xxxxx
x.gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           31.93653803  -179.38454759 -6942.80389713]
------
Step:5, Action:South
State  136
Old Q Values:  [-6180.6           31.93653803  -179.38454759 -6942.80389713]
New Q values:  [-6180.6           22.45052552  -179.38454759 -6942.80389713]
Reward: -1  Episode Reward:  5
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   34.25303435  -131.73529819 -6170.35693855 -5999.96410919]
------
Step:6, Action:North
State  216
Old Q Values:  [   34.25303435  -131.73529819 -6170.35693855 -5999.96410919]
New Q values:  [   17.74822056  -131.73529819 -6170.35693855 -5999.96410919]
Reward: -1  Episode Reward:  4
xxxxx
x. ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          15.49002275 -180.6          15.37800056]
------
Step:7, Action:South
State  138
Old Q Values:  [-180.6          15.49002275 -180.6          15.37800056]
New Q values:  [-180.6          10.92047527 -180.6          15.37800056]
Reward: -1  Episode Reward:  3
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   17.74822056  -131.73529819 -6170.35693855 -5999.96410919]
------
Step:8, Action:North
State  210
Old Q Values:  [   4.24777227  596.1097021  -180.6           3.52184257]
New Q values:  [   5.71250908  596.1097021  -180.6           3.52184257]
Reward: -1  Episode Reward:  2
xxxxx
x. ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          10.92047527 -180.6          15.37800056]
------
Step:9, Action:West
State  138
Old Q Values:  [-180.6          10.92047527 -180.6          15.37800056]
New Q values:  [-180.6          10.92047527 -180.6           8.5778742 ]
Reward: -1  Episode Reward:  1
xxxxx
x.a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.00889132e+01]
------
Step:10, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.00889132e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.02202487e+01]
Reward: 9  Episode Reward:  10
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    2.61561149    1.40383269 -252.78192178]
------
Step:11, Action:South
State  110
Old Q Values:  [-1.80600000e+02 -2.64921308e+02 -1.23006946e-01 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -9.14607598e+01 -1.23006946e-01 -1.80600000e+02]
Reward: 9  Episode Reward:  19
xxxxx
x   x
xa. x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094  0.         30.35921172  0.        ]
------
Step:12, Action:East
State  188
Old Q Values:  [-6523.78898263  -247.57994041    12.21705441     0.        ]
New Q values:  [-6523.78898263  -247.57994041    15.12908376     0.        ]
Reward: 9  Episode Reward:  28
xxxxx
x   x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 5.06128434e-01  1.61408733e+01 -1.33592424e-02 -8.40000000e-01]
------
Step:13, Action:South
State  206
Old Q Values:  [0.         0.         6.60224881 0.        ]
New Q values:  [  0.         249.53519708   6.60224881   0.        ]
Reward: 9  Episode Reward:  37
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  813.78399027]
------
Step:14, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477 -1970.61654415    45.97225204]
New Q values:  [ 1156.57693201 -6212.61234477  -758.60217615    45.97225204]
Reward: 9  Episode Reward:  46
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   80.81480503 -8656.02923281 -7525.7277781  -4952.96765607]
------
Step:15, Action:North
State  288
Old Q Values:  [   80.81480503 -8656.02923281 -7525.7277781  -4952.96765607]
New Q values:  [  210.55883264 -8656.02923281 -7525.7277781  -4952.96765607]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   5.71250908  596.1097021  -180.6           3.52184257]
------
Step:16, Action:South
State  218
Old Q Values:  [ 6.53068391e-01 -1.68217779e+03  0.00000000e+00  1.09836731e+01]
New Q values:  [   0.65306839 -610.30346672    0.           10.98367306]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  210.55883264 -8656.02923281 -7525.7277781  -4952.96765607]
------
Step:17, Action:North
State  288
Old Q Values:  [  210.55883264 -8656.02923281 -7525.7277781  -4952.96765607]
New Q values:  [   86.91863497 -8656.02923281 -7525.7277781  -4952.96765607]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   0.65306839 -610.30346672    0.           10.98367306]
------
Step:18, Action:West
State  218
Old Q Values:  [   0.65306839 -610.30346672    0.           10.98367306]
New Q values:  [   0.65306839 -610.30346672    0.           26.71398745]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    76.40172743     0.        ]
------
Step:19, Action:East
State  200
Old Q Values:  [ 5.06128434e-01  1.61408733e+01 -1.33592424e-02 -8.40000000e-01]
New Q values:  [ 0.50612843 16.14087332  4.71912247 -0.84      ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   17.74822056  -131.73529819 -6170.35693855 -5999.96410919]
------
Step:20, Action:North
State  216
Old Q Values:  [   17.74822056  -131.73529819 -6170.35693855 -5999.96410919]
New Q values:  [    9.77543081  -131.73529819 -6170.35693855 -5999.96410919]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          10.92047527 -180.6           8.5778742 ]
------
Step:21, Action:West
State  136
Old Q Values:  [-6180.6           22.45052552  -179.38454759 -6942.80389713]
New Q values:  [-6180.6           22.45052552  -179.38454759 -2776.25504789]
Reward: -1  Episode Reward:  39
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:22, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -6.51724480e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -2.60704385e+03]
Reward: -1  Episode Reward:  38
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-1.80600000e+02  1.51356435e+00 -3.89520980e+03  0.00000000e+00]
------
Step:23, Action:South
State  104
Old Q Values:  [-8652.84           0.         -2395.35372705 -8652.84      ]
New Q values:  [-8.65284000e+03 -6.00000000e-01 -2.39535373e+03 -8.65284000e+03]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[0. 0. 0. 0.]
------
Step:24, Action:East
State  184
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [0.       0.       4.242262 0.      ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 0.50612843 16.14087332  4.71912247 -0.84      ]
------
Step:25, Action:South
State  200
Old Q Values:  [ 0.50612843 16.14087332  4.71912247 -0.84      ]
New Q values:  [  0.50612843 352.82942893   4.71912247  -0.84      ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477  -758.60217615    45.97225204]
------
Step:26, Action:West
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477  -758.60217615    45.97225204]
New Q values:  [ 1156.57693201 -6212.61234477  -758.60217615 61359.91238047]
Reward: 100009  Episode Reward:  100044
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:1, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -2.60704385e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -1.03422091e+03]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    10.65544615 -6443.21937065  -180.6       ]
------
Step:2, Action:South
State  108
Old Q Values:  [-6.18060000e+03  4.75856216e+01  4.36335945e+00  0.00000000e+00]
New Q values:  [-6.18060000e+03  1.75984889e+02  4.36335945e+00  0.00000000e+00]
Reward: 9  Episode Reward:  18
xxxxx
xg  x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   -12.80681502   505.16880218     0.        ]
------
Step:3, Action:East
State  188
Old Q Values:  [-6523.78898263  -247.57994041    15.12908376     0.        ]
New Q values:  [-6523.78898263  -247.57994041   117.30046218     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  0.50612843 352.82942893   4.71912247  -0.84      ]
------
Step:4, Action:South
State  198
Old Q Values:  [-2.78872080e-01 -1.22623518e+03  5.59303454e+01  0.00000000e+00]
New Q values:  [  -0.27887208 -240.95887638   55.93034536    0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  813.78399027]
------
Step:5, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477  -758.60217615 61359.91238047]
New Q values:  [ 1156.57693201 -6212.61234477  -271.96527997 61359.91238047]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   86.91863497 -8656.02923281 -7525.7277781  -4952.96765607]
------
Step:6, Action:North
State  288
Old Q Values:  [   86.91863497 -8656.02923281 -7525.7277781  -4952.96765607]
New Q values:  [   43.10008323 -8656.02923281 -7525.7277781  -4952.96765607]
Reward: 9  Episode Reward:  54
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[    9.77543081  -131.73529819 -6170.35693855 -5999.96410919]
------
Step:7, Action:North
State  216
Old Q Values:  [    9.77543081  -131.73529819 -6170.35693855 -5999.96410919]
New Q values:  [   10.04532998  -131.73529819 -6170.35693855 -5999.96410919]
Reward: -1  Episode Reward:  53
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           22.45052552  -179.38454759 -2776.25504789]
------
Step:8, Action:South
State  138
Old Q Values:  [-180.6          10.92047527 -180.6           8.5778742 ]
New Q values:  [-180.6          6.7817891 -180.6          8.5778742]
Reward: -1  Episode Reward:  52
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   10.04532998  -131.73529819 -6170.35693855 -5999.96410919]
------
Step:9, Action:North
State  216
Old Q Values:  [   10.04532998  -131.73529819 -6170.35693855 -5999.96410919]
New Q values:  [ 5.99149425e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
Reward: -1  Episode Reward:  51
xxxxx
x  ax
x  gx
x.  x
xxxxx
Step:10, Action:East
State  136
Old Q Values:  [-6180.6           22.45052552  -179.38454759 -2776.25504789]
New Q values:  [-6180.6           22.45052552 -6245.61866138 -2776.25504789]
Reward: -10301  Episode Reward:  -10250
xxxxx
x  gx
x   x
x.  x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477  -271.96527997 61359.91238047]
------
Step:1, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         137.82092789  813.78399027]
New Q values:  [   1.6181791  -180.6         137.82092789  388.77213734]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  192.86180411  -289.59534477 -1184.80072168  -251.53897752]
------
Step:2, Action:North
State  261
Old Q Values:  [  192.86180411  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [   86.12688652  -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 9.91988058  2.74107485 31.94054957  0.        ]
------
Step:3, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534   538.44106874     0.        ]
New Q values:  [    0.         -5969.29177534   237.20757775     0.        ]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  5.47705008e+01  0.00000000e+00]
------
Step:4, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -2.36054016e+03  5.47705008e+01  0.00000000e+00]
New Q values:  [-6.00000000e-01 -2.36054016e+03  2.06141111e+02  0.00000000e+00]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   5.71250908  596.1097021  -180.6           3.52184257]
------
Step:5, Action:South
State  210
Old Q Values:  [   5.71250908  596.1097021  -180.6           3.52184257]
New Q values:  [ 5.71250908e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
Reward: -9991  Episode Reward:  -9965
xxxxx
x...x
x   x
x  gx
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          6.7817891 -180.6          8.5778742]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6          6.7817891 -180.6          8.5778742]
New Q values:  [-180.6          6.7817891 -180.6         11.8972243]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.02202487e+01]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.02202487e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  9.45119741e+00]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -9.14607598e+01 -1.23006946e-01 -1.80600000e+02]
------
Step:3, Action:East
State  110
Old Q Values:  [-1.80600000e+02 -9.14607598e+01 -1.23006946e-01 -1.80600000e+02]
New Q values:  [-180.6         -91.46075976    2.18615645 -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.69190981e+00  9.45119741e+00]
------
Step:4, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  9.45119741e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  3.83632590e+00]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6         -91.46075976    2.18615645 -180.6       ]
------
Step:5, Action:East
State  110
Old Q Values:  [-180.6         -91.46075976    2.18615645 -180.6       ]
New Q values:  [-180.6         -91.46075976    1.42536035 -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.69190981e+00  3.83632590e+00]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  3.83632590e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.71921381e+00]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    2.61561149    1.40383269 -252.78192178]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869   18.42434499    4.23310798 -272.09726687]
New Q values:  [-177.44732869   18.25134119    4.23310798 -272.09726687]
Reward: 9  Episode Reward:  23
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842   18.27201064    4.20678389 -244.98066897]
------
Step:8, Action:South
State  183
Old Q Values:  [ 9.91988058  2.74107485 31.94054957  0.        ]
New Q values:  [ 9.91988058 32.33449589 31.94054957  0.        ]
Reward: 9  Episode Reward:  32
xxxxx
x   x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   86.12688652  -289.59534477 -1184.80072168  -251.53897752]
------
Step:9, Action:North
State  260
Old Q Values:  [ 1793.87982037 -6457.4598       293.88434824 -2702.17995449]
New Q values:  [  726.05969166 -6457.4598       293.88434824 -2702.17995449]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xa .x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094  0.         30.35921172  0.        ]
------
Step:10, Action:East
State  188
Old Q Values:  [-6523.78898263  -247.57994041   117.30046218     0.        ]
New Q values:  [-6523.78898263  -247.57994041    68.43347104     0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144    70.50198937    73.71095389     0.        ]
------
Step:11, Action:East
State  200
Old Q Values:  [  0.50612843 352.82942893   4.71912247  -0.84      ]
New Q values:  [  0.50612843 352.82942893   9.08509726  -0.84      ]
Reward: 9  Episode Reward:  39
xxxxx
xg  x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5.99149425e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
------
Step:12, Action:North
State  216
Old Q Values:  [ 5.99149425e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
New Q values:  [    8.53175536  -131.73529819 -6170.35693855 -5999.96410919]
Reward: -1  Episode Reward:  38
xxxxx
x gax
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           22.45052552 -6245.61866138 -2776.25504789]
------
Step:13, Action:South
State  136
Old Q Values:  [-6180.6           22.45052552 -6245.61866138 -2776.25504789]
New Q values:  [-6180.6           10.93973681 -6245.61866138 -2776.25504789]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[    8.53175536  -131.73529819 -6170.35693855 -5999.96410919]
------
Step:14, Action:North
State  216
Old Q Values:  [    8.53175536  -131.73529819 -6170.35693855 -5999.96410919]
New Q values:  [ 6.09462319e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
Reward: -1  Episode Reward:  36
xxxxx
x gax
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           10.93973681 -6245.61866138 -2776.25504789]
------
Step:15, Action:South
State  138
Old Q Values:  [-180.6          6.7817891 -180.6         11.8972243]
New Q values:  [-180.6          3.9411026 -180.6         11.8972243]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 6.09462319e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
------
Step:16, Action:North
State  216
Old Q Values:  [ 6.09462319e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
New Q values:  [ 5.40701656e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          3.9411026 -180.6         11.8972243]
------
Step:17, Action:West
State  138
Old Q Values:  [-180.6          3.9411026 -180.6         11.8972243]
New Q values:  [-180.6           3.9411026  -180.6           5.26646266]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.71921381e+00]
------
Step:18, Action:East
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  1.25127950e+00  1.39790392e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  1.48045060e+00  1.39790392e+00]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           3.9411026  -180.6           5.26646266]
------
Step:19, Action:West
State  138
Old Q Values:  [-180.6           3.9411026  -180.6           5.26646266]
New Q values:  [-180.6           3.9411026  -180.6           1.95072025]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  1.48045060e+00  1.39790392e+00]
------
Step:20, Action:East
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  1.48045060e+00  1.39790392e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  1.17451102e+00  1.39790392e+00]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           3.9411026  -180.6           1.95072025]
------
Step:21, Action:West
State  136
Old Q Values:  [-6180.6           10.93973681 -6245.61866138 -2776.25504789]
New Q values:  [-6180.6           10.93973681 -6245.61866138 -1109.63550819]
Reward: -1  Episode Reward:  29
xxxxx
x agx
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:22, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -1.03422091e+03]
New Q values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -4.11091729e+02]
Reward: -1  Episode Reward:  28
xxxxx
xag x
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    10.65544615 -6443.21937065  -180.6       ]
------
Step:23, Action:South
State  108
Old Q Values:  [-6.18060000e+03  1.75984889e+02  4.36335945e+00  0.00000000e+00]
New Q values:  [-6.18060000e+03  9.03239970e+01  4.36335945e+00  0.00000000e+00]
Reward: -1  Episode Reward:  27
xxxxx
xg  x
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  -247.57994041    68.43347104     0.        ]
------
Step:24, Action:East
State  188
Old Q Values:  [-6523.78898263  -247.57994041    68.43347104     0.        ]
New Q values:  [-6523.78898263  -247.57994041   132.6222171      0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xga x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  0.50612843 352.82942893   9.08509726  -0.84      ]
------
Step:25, Action:South
State  206
Old Q Values:  [  0.         249.53519708   6.60224881   0.        ]
New Q values:  [  0.         221.84572003   6.60224881   0.        ]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  388.77213734]
------
Step:26, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477  -271.96527997 61359.91238047]
New Q values:  [ 1156.57693201 -6212.61234477 59909.54391298 61359.91238047]
Reward: 100009  Episode Reward:  100044
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   86.12688652  -289.59534477 -1184.80072168  -251.53897752]
------
Step:1, Action:North
State  261
Old Q Values:  [   86.12688652  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [   49.55110337  -289.59534477 -1184.80072168  -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 9.91988058 32.33449589 31.94054957  0.        ]
------
Step:2, Action:South
State  183
Old Q Values:  [ 9.91988058 32.33449589 31.94054957  0.        ]
New Q values:  [ 9.91988058 27.19912937 31.94054957  0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x ..x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   49.55110337  -289.59534477 -1184.80072168  -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [   49.55110337  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [  288.09972119  -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   6.16101416  187.45865474  896.26426613 -180.6       ]
------
Step:4, Action:East
State  181
Old Q Values:  [   6.16101416  187.45865474  896.26426613 -180.6       ]
New Q values:  [    6.16101416   187.45865474 -4454.11955854  -180.6       ]
Reward: -9991  Episode Reward:  -9984
xxxxx
x...x
x g.x
x . x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.62956723e+00 3.93991578e+03 0.00000000e+00]
------
Step:1, Action:East
State  193
Old Q Values:  [-5922.26708831   468.70371461 -9791.50993282     0.        ]
New Q values:  [-5922.26708831   468.70371461 -3909.58186816     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5.40701656e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
------
Step:2, Action:South
State  208
Old Q Values:  [ -915.04131444   697.86773212 -2651.70614553 -2227.14232413]
New Q values:  [ -915.04131444   297.47711782 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
x. gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   43.10008323 -8656.02923281 -7525.7277781  -4952.96765607]
------
Step:3, Action:West
State  288
Old Q Values:  [   43.10008323 -8656.02923281 -7525.7277781  -4952.96765607]
New Q values:  [   43.10008323 -8656.02923281 -7525.7277781  -1807.94776345]
Reward: 9  Episode Reward:  27
xxxxx
x. gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  559.46432994  246.80935611]
------
Step:4, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477 59909.54391298 61359.91238047]
New Q values:  [ 1156.57693201 -6212.61234477 23976.14759016 61359.91238047]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   43.10008323 -8656.02923281 -7525.7277781  -1807.94776345]
------
Step:5, Action:North
State  288
Old Q Values:  [   43.10008323 -8656.02923281 -7525.7277781  -1807.94776345]
New Q values:  [  105.88316864 -8656.02923281 -7525.7277781  -1807.94776345]
Reward: -1  Episode Reward:  25
xxxxx
xg .x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -915.04131444   297.47711782 -2651.70614553 -2227.14232413]
------
Step:6, Action:South
State  208
Old Q Values:  [ -915.04131444   297.47711782 -2651.70614553 -2227.14232413]
New Q values:  [ -915.04131444   150.15579772 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  24
xxxxx
x. .x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  105.88316864 -8656.02923281 -7525.7277781  -1807.94776345]
------
Step:7, Action:North
State  288
Old Q Values:  [  105.88316864 -8656.02923281 -7525.7277781  -1807.94776345]
New Q values:  [   43.46702018 -8656.02923281 -7525.7277781  -1807.94776345]
Reward: -1  Episode Reward:  23
xxxxx
x. .x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5.71250908e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
------
Step:8, Action:North
State  210
Old Q Values:  [ 5.71250908e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 8.86733441e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  32
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           3.9411026  -180.6           1.95072025]
------
Step:9, Action:South
State  130
Old Q Values:  [-2.96121988e+02 -4.74830610e+03 -1.80008075e+02 -7.11140961e-01]
New Q values:  [-2.96121988e+02 -1.89726224e+03 -1.80008075e+02 -7.11140961e-01]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 8.86733441e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
------
Step:10, Action:North
State  210
Old Q Values:  [ 8.86733441e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 4.12926454e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  30
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           3.9411026  -180.6           1.95072025]
------
Step:11, Action:South
State  138
Old Q Values:  [-180.6           3.9411026  -180.6           1.95072025]
New Q values:  [-180.6          46.02318035 -180.6           1.95072025]
Reward: -1  Episode Reward:  29
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -915.04131444   150.15579772 -2651.70614553 -2227.14232413]
------
Step:12, Action:South
State  208
Old Q Values:  [ -915.04131444   150.15579772 -2651.70614553 -2227.14232413]
New Q values:  [ -915.04131444    72.50242514 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  28
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   43.46702018 -8656.02923281 -7525.7277781  -1807.94776345]
------
Step:13, Action:North
State  288
Old Q Values:  [   43.46702018 -8656.02923281 -7525.7277781  -1807.94776345]
New Q values:  [   38.53753561 -8656.02923281 -7525.7277781  -1807.94776345]
Reward: -1  Episode Reward:  27
xxxxx
x. gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -915.04131444    72.50242514 -2651.70614553 -2227.14232413]
------
Step:14, Action:South
State  208
Old Q Values:  [ -915.04131444    72.50242514 -2651.70614553 -2227.14232413]
New Q values:  [ -915.04131444    39.96223074 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   38.53753561 -8656.02923281 -7525.7277781  -1807.94776345]
------
Step:15, Action:West
State  288
Old Q Values:  [   38.53753561 -8656.02923281 -7525.7277781  -1807.94776345]
New Q values:  [   38.53753561 -8656.02923281 -7525.7277781  17684.19460876]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477 23976.14759016 61359.91238047]
------
Step:16, Action:West
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477 23976.14759016 61359.91238047]
New Q values:  [ 1156.57693201 -6212.61234477 23976.14759016 25885.48843185]
Reward: 9  Episode Reward:  34
xxxxx
x.g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[1086.77874956 -180.6         142.30082849 4453.74493219]
------
Step:17, Action:West
State  256
Old Q Values:  [    0.             0.           422.41331248 -6036.76313782]
New Q values:  [    0.             0.           422.41331248 -2468.58126138]
Reward: -301  Episode Reward:  -267
xxxxx
xg  x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.           422.41331248 -2468.58126138]
------
Step:18, Action:East
State  257
Old Q Values:  [1086.77874956 -180.6         142.30082849 4453.74493219]
New Q values:  [1086.77874956 -180.6        7821.96686095 4453.74493219]
Reward: -1  Episode Reward:  -268
xxxxx
x.g x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477 23976.14759016 25885.48843185]
------
Step:19, Action:West
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477 23976.14759016 25885.48843185]
New Q values:  [ 1156.57693201 -6212.61234477 23976.14759016 10480.31936648]
Reward: -1  Episode Reward:  -269
xxxxx
xg  x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.           422.41331248 -2468.58126138]
------
Step:20, Action:East
State  257
Old Q Values:  [1086.77874956 -180.6        7821.96686095 4453.74493219]
New Q values:  [ 1086.77874956  -180.6        10321.03102143  4453.74493219]
Reward: -1  Episode Reward:  -270
xxxxx
x.g x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477 23976.14759016 10480.31936648]
------
Step:21, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477 23976.14759016 10480.31936648]
New Q values:  [ 1156.57693201 -6212.61234477 14895.11741869 10480.31936648]
Reward: -1  Episode Reward:  -271
xxxxx
xg  x
x.  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   38.53753561 -8656.02923281 -7525.7277781  17684.19460876]
------
Step:22, Action:West
State  288
Old Q Values:  [   38.53753561 -8656.02923281 -7525.7277781  17684.19460876]
New Q values:  [   38.53753561 -8656.02923281 -7525.7277781  11541.61306911]
Reward: -1  Episode Reward:  -272
xxxxx
x.g x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477 14895.11741869 10480.31936648]
------
Step:23, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477 14895.11741869 10480.31936648]
New Q values:  [ 1156.57693201 -6212.61234477  9419.93088821 10480.31936648]
Reward: -1  Episode Reward:  -273
xxxxx
xg  x
x.  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   38.53753561 -8656.02923281 -7525.7277781  11541.61306911]
------
Step:24, Action:West
State  288
Old Q Values:  [   38.53753561 -8656.02923281 -7525.7277781  11541.61306911]
New Q values:  [   38.53753561 -8656.02923281 -7525.7277781   7760.14103759]
Reward: -1  Episode Reward:  -274
xxxxx
x.  x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477  9419.93088821 10480.31936648]
------
Step:25, Action:West
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477  9419.93088821 10480.31936648]
New Q values:  [ 1156.57693201 -6212.61234477  9419.93088821  7287.83705302]
Reward: -1  Episode Reward:  -275
xxxxx
x.  x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[ 1086.77874956  -180.6        10321.03102143  4453.74493219]
------
Step:26, Action:East
State  257
Old Q Values:  [ 1086.77874956  -180.6        10321.03102143  4453.74493219]
New Q values:  [1086.77874956 -180.6         953.79167504 4453.74493219]
Reward: -10001  Episode Reward:  -10276
xxxxx
x.  x
x.  x
x g x
xxxxx
xxxxx
x...x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  726.05969166 -6457.4598       293.88434824 -2702.17995449]
------
Step:1, Action:East
State  260
Old Q Values:  [  726.05969166 -6457.4598       293.88434824 -2702.17995449]
New Q values:  [  726.05969166 -6457.4598       239.5853805  -2702.17995449]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  388.77213734]
------
Step:2, Action:West
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477  9419.93088821  7287.83705302]
New Q values:  [ 1156.57693201 -6212.61234477  9419.93088821  3132.35272871]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  726.05969166 -6457.4598       239.5853805  -2702.17995449]
------
Step:3, Action:East
State  260
Old Q Values:  [  726.05969166 -6457.4598       239.5853805  -2702.17995449]
New Q values:  [  726.05969166 -6457.4598       211.8657934  -2702.17995449]
Reward: -1  Episode Reward:  7
xxxxx
xg..x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  388.77213734]
------
Step:4, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         137.82092789  388.77213734]
New Q values:  [   1.6181791  -180.6         137.82092789  372.72676243]
Reward: -1  Episode Reward:  6
xxxxx
xg..x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  726.05969166 -6457.4598       211.8657934  -2702.17995449]
------
Step:5, Action:North
State  260
Old Q Values:  [  726.05969166 -6457.4598       211.8657934  -2702.17995449]
New Q values:  [  441.37451732 -6457.4598       211.8657934  -2702.17995449]
Reward: -1  Episode Reward:  5
xxxxx
xg..x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   -12.80681502   505.16880218     0.        ]
------
Step:6, Action:East
State  180
Old Q Values:  [-1367.02476015   -12.80681502   505.16880218     0.        ]
New Q values:  [-1367.02476015   -12.80681502  1389.44225588     0.        ]
Reward: 9  Episode Reward:  14
xxxxx
x...x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.62956723e+00 3.93991578e+03 0.00000000e+00]
------
Step:7, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -2.36054016e+03  2.06141111e+02  0.00000000e+00]
New Q values:  [-6.00000000e-01 -2.36054016e+03  8.90952237e+01  0.00000000e+00]
Reward: 9  Episode Reward:  23
xxxxx
x...x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.12926454e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
------
Step:8, Action:North
State  208
Old Q Values:  [ -915.04131444    39.96223074 -2651.70614553 -2227.14232413]
New Q values:  [ -360.82986806    39.96223074 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  32
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.96121988e+02 -1.89726224e+03 -1.80008075e+02 -7.11140961e-01]
------
Step:9, Action:West
State  130
Old Q Values:  [-2.96121988e+02 -1.89726224e+03 -1.80008075e+02 -7.11140961e-01]
New Q values:  [ -296.12198833 -1897.26224006  -180.00807518     5.05191599]
Reward: 9  Episode Reward:  41
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03 -2.12092074e-01 -6.00000000e-01]
------
Step:10, Action:East
State  114
Old Q Values:  [-1.80600000e+02 -8.37621480e+03 -2.12092074e-01 -6.00000000e-01]
New Q values:  [-1.80600000e+02 -8.37621480e+03  8.30737968e-01 -6.00000000e-01]
Reward: -1  Episode Reward:  40
xxxxx
x. ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ -296.12198833 -1897.26224006  -180.00807518     5.05191599]
------
Step:11, Action:West
State  130
Old Q Values:  [ -296.12198833 -1897.26224006  -180.00807518     5.05191599]
New Q values:  [-2.96121988e+02 -1.89726224e+03 -1.80008075e+02  1.66998779e+00]
Reward: -1  Episode Reward:  39
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03  8.30737968e-01 -6.00000000e-01]
------
Step:12, Action:East
State  114
Old Q Values:  [-1.80600000e+02 -8.37621480e+03  8.30737968e-01 -6.00000000e-01]
New Q values:  [-1.80600000e+02 -8.37621480e+03  2.33291524e-01 -6.00000000e-01]
Reward: -1  Episode Reward:  38
xxxxx
x. ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.96121988e+02 -1.89726224e+03 -1.80008075e+02  1.66998779e+00]
------
Step:13, Action:West
State  130
Old Q Values:  [-2.96121988e+02 -1.89726224e+03 -1.80008075e+02  1.66998779e+00]
New Q values:  [-2.96121988e+02 -1.89726224e+03 -1.80008075e+02  1.37982572e-01]
Reward: -1  Episode Reward:  37
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03  2.33291524e-01 -6.00000000e-01]
------
Step:14, Action:East
State  114
Old Q Values:  [-1.80600000e+02 -8.37621480e+03  2.33291524e-01 -6.00000000e-01]
New Q values:  [-1.80600000e+02 -8.37621480e+03 -4.65288619e-01 -6.00000000e-01]
Reward: -1  Episode Reward:  36
xxxxx
x. ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.96121988e+02 -1.89726224e+03 -1.80008075e+02  1.37982572e-01]
------
Step:15, Action:West
State  130
Old Q Values:  [-2.96121988e+02 -1.89726224e+03 -1.80008075e+02  1.37982572e-01]
New Q values:  [-2.96121988e+02 -1.89726224e+03 -1.80008075e+02 -6.84393557e-01]
Reward: -1  Episode Reward:  35
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03 -4.65288619e-01 -6.00000000e-01]
------
Step:16, Action:East
State  114
Old Q Values:  [-1.80600000e+02 -8.37621480e+03 -4.65288619e-01 -6.00000000e-01]
New Q values:  [-1.80600000e+02 -8.37621480e+03 -9.91433515e-01 -6.00000000e-01]
Reward: -1  Episode Reward:  34
xxxxx
x. ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.96121988e+02 -1.89726224e+03 -1.80008075e+02 -6.84393557e-01]
------
Step:17, Action:West
State  130
Old Q Values:  [-2.96121988e+02 -1.89726224e+03 -1.80008075e+02 -6.84393557e-01]
New Q values:  [-2.96121988e+02 -1.89726224e+03 -1.80008075e+02 -1.05375742e+00]
Reward: -1  Episode Reward:  33
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03 -9.91433515e-01 -6.00000000e-01]
------
Step:18, Action:West
State  127
Old Q Values:  [ 0.          0.          0.         11.12502103]
New Q values:  [ 0.          0.          0.         15.32541077]
Reward: 9  Episode Reward:  42
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   18.25134119    4.23310798 -272.09726687]
------
Step:19, Action:South
State  109
Old Q Values:  [ -241.10880094    10.65544615 -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094     9.14378165 -6443.21937065  -180.6       ]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842   18.27201064    4.20678389 -244.98066897]
------
Step:20, Action:South
State  189
Old Q Values:  [  14.51316842   18.27201064    4.20678389 -244.98066897]
New Q values:  [  14.51316842   93.13872061    4.20678389 -244.98066897]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  288.09972119  -289.59534477 -1184.80072168  -251.53897752]
------
Step:21, Action:North
State  261
Old Q Values:  [  288.09972119  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [  142.58150466  -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842   93.13872061    4.20678389 -244.98066897]
------
Step:22, Action:South
State  189
Old Q Values:  [  14.51316842   93.13872061    4.20678389 -244.98066897]
New Q values:  [  14.51316842   79.42993964    4.20678389 -244.98066897]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  142.58150466  -289.59534477 -1184.80072168  -251.53897752]
------
Step:23, Action:North
State  261
Old Q Values:  [  142.58150466  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [   80.26158376  -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842   79.42993964    4.20678389 -244.98066897]
------
Step:24, Action:South
State  191
Old Q Values:  [3.06655861 1.79290259 0.         0.        ]
New Q values:  [ 3.06655861 24.19563616  0.          0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   80.26158376  -289.59534477 -1184.80072168  -251.53897752]
------
Step:25, Action:North
State  261
Old Q Values:  [   80.26158376  -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [   55.3336154   -289.59534477 -1184.80072168  -251.53897752]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842   79.42993964    4.20678389 -244.98066897]
------
Step:26, Action:South
State  188
Old Q Values:  [-6523.78898263  -247.57994041   132.6222171      0.        ]
New Q values:  [-6523.78898263    32.78037903   132.6222171      0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  441.37451732 -6457.4598       211.8657934  -2702.17995449]
------
Step:27, Action:East
State  261
Old Q Values:  [   55.3336154   -289.59534477 -1184.80072168  -251.53897752]
New Q values:  [  55.3336154  -289.59534477 -362.70225994 -251.53897752]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  372.72676243]
------
Step:28, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         137.82092789  372.72676243]
New Q values:  [   1.6181791  -180.6         137.82092789  165.09078959]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  55.3336154  -289.59534477 -362.70225994 -251.53897752]
------
Step:29, Action:North
State  257
Old Q Values:  [1086.77874956 -180.6         953.79167504 4453.74493219]
New Q values:  [10516.33202512  -180.6          953.79167504  4453.74493219]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[ 0.00000000e+00 -6.00000000e-01  3.36074018e+04  0.00000000e+00]
------
Step:30, Action:East
State  189
Old Q Values:  [  14.51316842   79.42993964    4.20678389 -244.98066897]
New Q values:  [  14.51316842   79.42993964    3.35286341 -244.98066897]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         7.56716619]
------
Step:31, Action:West
State  204
Old Q Values:  [ 0.          0.         -0.32153244  9.02973096]
New Q values:  [ 0.          0.         -0.32153244 26.84087428]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842   79.42993964    3.35286341 -244.98066897]
------
Step:32, Action:South
State  189
Old Q Values:  [  14.51316842   79.42993964    3.35286341 -244.98066897]
New Q values:  [  14.51316842   47.77206048    3.35286341 -244.98066897]
Reward: -1  Episode Reward:  28
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  55.3336154  -289.59534477 -362.70225994 -251.53897752]
------
Step:33, Action:North
State  260
Old Q Values:  [  441.37451732 -6457.4598       211.8657934  -2702.17995449]
New Q values:  [  215.73647206 -6457.4598       211.8657934  -2702.17995449]
Reward: -1  Episode Reward:  27
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263    32.78037903   132.6222171      0.        ]
------
Step:34, Action:East
State  188
Old Q Values:  [-6523.78898263    32.78037903   132.6222171      0.        ]
New Q values:  [-6523.78898263    32.78037903    60.50114912     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[ 0.          0.         -0.32153244 26.84087428]
------
Step:35, Action:South
State  204
Old Q Values:  [ 0.          0.         -0.32153244 26.84087428]
New Q values:  [ 0.         48.92723688 -0.32153244 26.84087428]
Reward: -1  Episode Reward:  25
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  165.09078959]
------
Step:36, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         137.82092789  165.09078959]
New Q values:  [   1.6181791  -180.6         137.82092789  130.15725745]
Reward: -1  Episode Reward:  24
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  215.73647206 -6457.4598       211.8657934  -2702.17995449]
------
Step:37, Action:East
State  261
Old Q Values:  [  55.3336154  -289.59534477 -362.70225994 -251.53897752]
New Q values:  [  55.3336154  -289.59534477 -104.33462561 -251.53897752]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  130.15725745]
------
Step:38, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  559.46432994  246.80935611]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  6.25572280e+04  2.46809356e+02]
Reward: 100009  Episode Reward:  100032
xxxxx
x   x
x  gx
x  ax
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           10.93973681 -6245.61866138 -1109.63550819]
------
Step:1, Action:South
State  138
Old Q Values:  [-180.6          46.02318035 -180.6           1.95072025]
New Q values:  [-180.6          25.43137711 -180.6           1.95072025]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5.40701656e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
------
Step:2, Action:North
State  208
Old Q Values:  [ -360.82986806    39.96223074 -2651.70614553 -2227.14232413]
New Q values:  [ -141.65002618    39.96223074 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  8
xxxxx
xg.ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6           10.93973681 -6245.61866138 -1109.63550819]
------
Step:3, Action:South
State  136
Old Q Values:  [-6180.6           10.93973681 -6245.61866138 -1109.63550819]
New Q values:  [-6.18060000e+03  5.39799969e+00 -6.24561866e+03 -1.10963551e+03]
Reward: -1  Episode Reward:  7
xxxxx
x g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5.40701656e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
------
Step:4, Action:North
State  208
Old Q Values:  [ -141.65002618    39.96223074 -2651.70614553 -2227.14232413]
New Q values:  [  -55.64061056    39.96223074 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  6
xxxxx
xg.ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  5.39799969e+00 -6.24561866e+03 -1.10963551e+03]
------
Step:5, Action:South
State  136
Old Q Values:  [-6.18060000e+03  5.39799969e+00 -6.24561866e+03 -1.10963551e+03]
New Q values:  [-6.18060000e+03  3.18130485e+00 -6.24561866e+03 -1.10963551e+03]
Reward: -1  Episode Reward:  5
xxxxx
x g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5.40701656e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
------
Step:6, Action:North
State  208
Old Q Values:  [  -55.64061056    39.96223074 -2651.70614553 -2227.14232413]
New Q values:  [-6021.90185277    39.96223074 -2651.70614553 -2227.14232413]
Reward: -10001  Episode Reward:  -9996
xxxxx
x .gx
x.. x
x...x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  55.3336154  -289.59534477 -104.33462561 -251.53897752]
------
Step:1, Action:North
State  260
Old Q Values:  [  215.73647206 -6457.4598       211.8657934  -2702.17995449]
New Q values:  [  162.85686215 -6457.4598       211.8657934  -2702.17995449]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   237.20757775     0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 9.91988058 27.19912937 31.94054957  0.        ]
New Q values:  [ 9.91988058 27.19912937 44.90478695  0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  8.90952237e+01  0.00000000e+00]
------
Step:3, Action:East
State  195
Old Q Values:  [ 38.85388605 258.19109515  74.88735791   0.        ]
New Q values:  [ 38.85388605 258.19109515  36.59372253   0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.12926454e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
------
Step:4, Action:North
State  208
Old Q Values:  [-6021.90185277    39.96223074 -2651.70614553 -2227.14232413]
New Q values:  [-2403.67686834    39.96223074 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.96121988e+02 -1.89726224e+03 -1.80008075e+02 -1.05375742e+00]
------
Step:5, Action:West
State  130
Old Q Values:  [-2.96121988e+02 -1.89726224e+03 -1.80008075e+02 -1.05375742e+00]
New Q values:  [ -296.12198833 -1897.26224006  -180.00807518     4.79849703]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03 -9.91433515e-01 -6.00000000e-01]
------
Step:6, Action:West
State  126
Old Q Values:  [0.         0.         9.68085385 0.        ]
New Q values:  [0.         0.         9.68085385 5.8276081 ]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6         -91.46075976    1.42536035 -180.6       ]
------
Step:7, Action:East
State  108
Old Q Values:  [-6.18060000e+03  9.03239970e+01  4.36335945e+00  0.00000000e+00]
New Q values:  [-6180.6           90.32399703    16.09371094     0.        ]
Reward: -1  Episode Reward:  53
xxxxx
xga x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[ 0.          5.4        49.82789052  0.91053821]
------
Step:8, Action:East
State  124
Old Q Values:  [ 0.          5.4        49.82789052  0.91053821]
New Q values:  [ 0.          5.4        20.28554766  0.91053821]
Reward: -1  Episode Reward:  52
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  3.18130485e+00 -6.24561866e+03 -1.10963551e+03]
------
Step:9, Action:South
State  138
Old Q Values:  [-180.6          25.43137711 -180.6           1.95072025]
New Q values:  [-180.6          11.19465581 -180.6           1.95072025]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5.40701656e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
------
Step:10, Action:North
State  216
Old Q Values:  [ 5.40701656e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
New Q values:  [ 2.51719808e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
Reward: -1  Episode Reward:  50
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  3.18130485e+00 -6.24561866e+03 -1.10963551e+03]
------
Step:11, Action:South
State  138
Old Q Values:  [-180.6          11.19465581 -180.6           1.95072025]
New Q values:  [-180.6           4.63302175 -180.6           1.95072025]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.51719808e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
------
Step:12, Action:North
State  216
Old Q Values:  [ 2.51719808e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
New Q values:  [ 1.79678576e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
Reward: -1  Episode Reward:  48
xxxxx
x  ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           4.63302175 -180.6           1.95072025]
------
Step:13, Action:West
State  130
Old Q Values:  [ -296.12198833 -1897.26224006  -180.00807518     4.79849703]
New Q values:  [ -296.12198833 -1897.26224006  -180.00807518    19.25635799]
Reward: -1  Episode Reward:  47
xxxxx
x a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          59.78986392    6.47656656    0.        ]
------
Step:14, Action:South
State  126
Old Q Values:  [0.         0.         9.68085385 5.8276081 ]
New Q values:  [ 0.         65.95371601  9.68085385  5.8276081 ]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         221.84572003   6.60224881   0.        ]
------
Step:15, Action:East
State  195
Old Q Values:  [ 38.85388605 258.19109515  36.59372253   0.        ]
New Q values:  [ 38.85388605 258.19109515  15.27626837   0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.12926454e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
------
Step:16, Action:North
State  218
Old Q Values:  [   0.65306839 -610.30346672    0.           26.71398745]
New Q values:  [   1.05113388 -610.30346672    0.           26.71398745]
Reward: -1  Episode Reward:  44
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           4.63302175 -180.6           1.95072025]
------
Step:17, Action:South
State  130
Old Q Values:  [ -296.12198833 -1897.26224006  -180.00807518    19.25635799]
New Q values:  [-296.12198833 -758.26611666 -180.00807518   19.25635799]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.12926454e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
------
Step:18, Action:North
State  218
Old Q Values:  [   1.05113388 -610.30346672    0.           26.71398745]
New Q values:  [   1.21036008 -610.30346672    0.           26.71398745]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           4.63302175 -180.6           1.95072025]
------
Step:19, Action:South
State  130
Old Q Values:  [-296.12198833 -758.26611666 -180.00807518   19.25635799]
New Q values:  [-296.12198833 -302.6676673  -180.00807518   19.25635799]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.12926454e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
------
Step:20, Action:North
State  216
Old Q Values:  [ 1.79678576e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
New Q values:  [ 1.50862083e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           4.63302175 -180.6           1.95072025]
------
Step:21, Action:West
State  138
Old Q Values:  [-180.6           4.63302175 -180.6           1.95072025]
New Q values:  [-180.6           4.63302175 -180.6          19.9664029 ]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[ 0.         65.95371601  9.68085385  5.8276081 ]
------
Step:22, Action:East
State  127
Old Q Values:  [ 0.          0.          0.         15.32541077]
New Q values:  [ 0.          0.          5.38992087 15.32541077]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           4.63302175 -180.6          19.9664029 ]
------
Step:23, Action:West
State  136
Old Q Values:  [-6.18060000e+03  3.18130485e+00 -6.24561866e+03 -1.10963551e+03]
New Q values:  [-6.18060000e+03  3.18130485e+00 -6.24561866e+03 -4.44129760e+02]
Reward: -1  Episode Reward:  37
xxxxx
x agx
x   x
x  .x
xxxxx
Step:24, Action:East
State  124
Old Q Values:  [ 0.          5.4        20.28554766  0.91053821]
New Q values:  [ 0.00000000e+00  5.40000000e+00 -5.98640012e+03  9.10538211e-01]
Reward: -10001  Episode Reward:  -9964
xxxxx
x g x
x   x
x  .x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094     9.14378165 -6443.21937065  -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094     9.14378165 -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    65.29510908 -6443.21937065  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x g x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[    6.16101416   187.45865474 -4454.11955854  -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [    6.16101416   187.45865474 -4454.11955854  -180.6       ]
New Q values:  [    6.16101416    96.98354651 -4454.11955854  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  55.3336154  -289.59534477 -104.33462561 -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [  55.3336154  -289.59534477 -104.33462561 -251.53897752]
New Q values:  [  50.62851011 -289.59534477 -104.33462561 -251.53897752]
Reward: -1  Episode Reward:  17
xxxxx
x g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[    6.16101416    96.98354651 -4454.11955854  -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [    6.16101416    96.98354651 -4454.11955854  -180.6       ]
New Q values:  [    6.16101416    53.38197164 -4454.11955854  -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x .gx
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  50.62851011 -289.59534477 -104.33462561 -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [  50.62851011 -289.59534477 -104.33462561 -251.53897752]
New Q values:  [  35.66599554 -289.59534477 -104.33462561 -251.53897752]
Reward: -1  Episode Reward:  15
xxxxx
x g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[    6.16101416    53.38197164 -4454.11955854  -180.6       ]
------
Step:6, Action:South
State  181
Old Q Values:  [    6.16101416    53.38197164 -4454.11955854  -180.6       ]
New Q values:  [    6.16101416    31.45258732 -4454.11955854  -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x . x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  35.66599554 -289.59534477 -104.33462561 -251.53897752]
------
Step:7, Action:North
State  261
Old Q Values:  [  35.66599554 -289.59534477 -104.33462561 -251.53897752]
New Q values:  [  23.10217441 -289.59534477 -104.33462561 -251.53897752]
Reward: -1  Episode Reward:  13
xxxxx
x . x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[    6.16101416    31.45258732 -4454.11955854  -180.6       ]
------
Step:8, Action:South
State  181
Old Q Values:  [    6.16101416    31.45258732 -4454.11955854  -180.6       ]
New Q values:  [    6.16101416    18.91168725 -4454.11955854  -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
x .gx
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  23.10217441 -289.59534477 -104.33462561 -251.53897752]
------
Step:9, Action:North
State  261
Old Q Values:  [  23.10217441 -289.59534477 -104.33462561 -251.53897752]
New Q values:  [  14.31437594 -289.59534477 -104.33462561 -251.53897752]
Reward: -1  Episode Reward:  11
xxxxx
x . x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[    6.16101416    18.91168725 -4454.11955854  -180.6       ]
------
Step:10, Action:South
State  181
Old Q Values:  [    6.16101416    18.91168725 -4454.11955854  -180.6       ]
New Q values:  [    6.16101416    11.25898768 -4454.11955854  -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x . x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  14.31437594 -289.59534477 -104.33462561 -251.53897752]
------
Step:11, Action:North
State  261
Old Q Values:  [  14.31437594 -289.59534477 -104.33462561 -251.53897752]
New Q values:  [   8.50344668 -289.59534477 -104.33462561 -251.53897752]
Reward: -1  Episode Reward:  9
xxxxx
x . x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[    6.16101416    11.25898768 -4454.11955854  -180.6       ]
------
Step:12, Action:South
State  183
Old Q Values:  [ 9.91988058 27.19912937 44.90478695  0.        ]
New Q values:  [ 9.91988058 12.83068575 44.90478695  0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x . x
x ..x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   8.50344668 -289.59534477 -104.33462561 -251.53897752]
------
Step:13, Action:North
State  261
Old Q Values:  [   8.50344668 -289.59534477 -104.33462561 -251.53897752]
New Q values:  [  16.27281476 -289.59534477 -104.33462561 -251.53897752]
Reward: -1  Episode Reward:  7
xxxxx
x . x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 9.91988058 12.83068575 44.90478695  0.        ]
------
Step:14, Action:East
State  183
Old Q Values:  [ 9.91988058 12.83068575 44.90478695  0.        ]
New Q values:  [  9.91988058  12.83068575 100.81924333   0.        ]
Reward: 9  Episode Reward:  16
xxxxx
x . x
x a.x
x .gx
xxxxx
Step:15, Action:South
State  193
Old Q Values:  [-5922.26708831   468.70371461 -3909.58186816     0.        ]
New Q values:  [-5922.26708831 18960.04989882 -3909.58186816     0.        ]
Reward: 9  Episode Reward:  25
xxxxx
x . x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  6.25572280e+04  2.46809356e+02]
------
Step:16, Action:East
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  6.25572280e+04  2.46809356e+02]
New Q values:  [   37.74111519  -168.92307549 27356.33352858   246.80935611]
Reward: 9  Episode Reward:  34
xxxxx
x .gx
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   38.53753561 -8656.02923281 -7525.7277781   7760.14103759]
------
Step:17, Action:West
State  288
Old Q Values:  [   38.53753561 -8656.02923281 -7525.7277781   7760.14103759]
New Q values:  [   38.53753561 -8656.02923281 -7525.7277781  11310.35647361]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 27356.33352858   246.80935611]
------
Step:18, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477  9419.93088821  3132.35272871]
New Q values:  [ 1156.57693201 -6212.61234477  7160.47929737  3132.35272871]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   38.53753561 -8656.02923281 -7525.7277781  11310.35647361]
------
Step:19, Action:West
State  288
Old Q Values:  [   38.53753561 -8656.02923281 -7525.7277781  11310.35647361]
New Q values:  [   38.53753561 -8656.02923281 -7525.7277781    671.68637865]
Reward: -10001  Episode Reward:  -9969
xxxxx
x . x
x  .x
x g x
xxxxx
Episode # 200
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.62956723e+00 3.93991578e+03 0.00000000e+00]
------
Step:1, Action:East
State  200
Old Q Values:  [  0.50612843 352.82942893   9.08509726  -0.84      ]
New Q values:  [  0.50612843 352.82942893   9.48662515  -0.84      ]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.50862083e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
------
Step:2, Action:North
State  216
Old Q Values:  [ 1.50862083e+00 -1.31735298e+02 -6.17035694e+03 -5.99996411e+03]
New Q values:  [   11.9933692   -131.73529819 -6170.35693855 -5999.96410919]
Reward: 9  Episode Reward:  18
xxxxx
x. ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           4.63302175 -180.6          19.9664029 ]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6           4.63302175 -180.6          19.9664029 ]
New Q values:  [-180.6           4.63302175 -180.6           8.4941341 ]
Reward: -1  Episode Reward:  17
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.71921381e+00]
------
Step:4, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.69190981e+00  1.71921381e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.42500416e+00  1.71921381e+00]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           4.63302175 -180.6           8.4941341 ]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6           4.63302175 -180.6           8.4941341 ]
New Q values:  [-180.6           4.63302175 -180.6           3.21702482]
Reward: -1  Episode Reward:  15
xxxxx
x.a x
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  1.17451102e+00  1.39790392e+00]
------
Step:6, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  1.17451102e+00  1.39790392e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  1.17451102e+00  6.74384502e+00]
Reward: 9  Episode Reward:  24
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    2.61561149    1.40383269 -252.78192178]
------
Step:7, Action:South
State  107
Old Q Values:  [-252.35169558    2.61561149    1.40383269 -252.78192178]
New Q values:  [-252.35169558   23.97980525    1.40383269 -252.78192178]
Reward: 9  Episode Reward:  33
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[-0.11058345  0.         58.44520219  0.        ]
------
Step:8, Action:East
State  189
Old Q Values:  [  14.51316842   47.77206048    3.35286341 -244.98066897]
New Q values:  [  14.51316842   47.77206048    2.84195242 -244.98066897]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[7.00269019 0.         5.39642008 0.024     ]
------
Step:9, Action:North
State  200
Old Q Values:  [  0.50612843 352.82942893   9.48662515  -0.84      ]
New Q values:  [  0.62995262 352.82942893   9.48662515  -0.84      ]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.42500416e+00  1.71921381e+00]
------
Step:10, Action:East
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  2.27250374e+00 -4.11091729e+02]
New Q values:  [-1.01561177e+04 -5.99568600e+03  1.26339295e+00 -4.11091729e+02]
Reward: -1  Episode Reward:  30
xxxxx
x gax
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  3.18130485e+00 -6.24561866e+03 -4.44129760e+02]
------
Step:11, Action:South
State  138
Old Q Values:  [-180.6           4.63302175 -180.6           3.21702482]
New Q values:  [-180.6           4.85121946 -180.6           3.21702482]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   11.9933692   -131.73529819 -6170.35693855 -5999.96410919]
------
Step:12, Action:North
State  218
Old Q Values:  [   1.21036008 -610.30346672    0.           26.71398745]
New Q values:  [   1.33950987 -610.30346672    0.           26.71398745]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           4.85121946 -180.6           3.21702482]
------
Step:13, Action:South
State  138
Old Q Values:  [-180.6           4.85121946 -180.6           3.21702482]
New Q values:  [-180.6           9.35468402 -180.6           3.21702482]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   1.33950987 -610.30346672    0.           26.71398745]
------
Step:14, Action:West
State  216
Old Q Values:  [   11.9933692   -131.73529819 -6170.35693855 -5999.96410919]
New Q values:  [   11.9933692   -131.73529819 -6170.35693855 -2398.48483662]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[7.00269019 0.         5.39642008 0.024     ]
------
Step:15, Action:North
State  200
Old Q Values:  [  0.62995262 352.82942893   9.48662515  -0.84      ]
New Q values:  [  0.67948229 352.82942893   9.48662515  -0.84      ]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.42500416e+00  1.71921381e+00]
------
Step:16, Action:East
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  1.17451102e+00  6.74384502e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  6.74384502e+00]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           9.35468402 -180.6           3.21702482]
------
Step:17, Action:West
State  138
Old Q Values:  [-180.6           9.35468402 -180.6           3.21702482]
New Q values:  [-180.6           9.35468402 -180.6           1.71431117]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.42500416e+00  1.71921381e+00]
------
Step:18, Action:East
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  1.26339295e+00 -4.11091729e+02]
New Q values:  [-1.01561177e+04 -5.99568600e+03  8.59748634e-01 -4.11091729e+02]
Reward: -1  Episode Reward:  22
xxxxx
x gax
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  3.18130485e+00 -6.24561866e+03 -4.44129760e+02]
------
Step:19, Action:South
State  138
Old Q Values:  [-180.6           9.35468402 -180.6           1.71431117]
New Q values:  [-180.6           6.73988437 -180.6           1.71431117]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   11.9933692   -131.73529819 -6170.35693855 -2398.48483662]
------
Step:20, Action:North
State  216
Old Q Values:  [   11.9933692   -131.73529819 -6170.35693855 -2398.48483662]
New Q values:  [    6.21931299  -131.73529819 -6170.35693855 -2398.48483662]
Reward: -1  Episode Reward:  20
xxxxx
x  ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           6.73988437 -180.6           1.71431117]
------
Step:21, Action:South
State  138
Old Q Values:  [-180.6           6.73988437 -180.6           1.71431117]
New Q values:  [-180.6          10.11014998 -180.6           1.71431117]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   1.33950987 -610.30346672    0.           26.71398745]
------
Step:22, Action:West
State  216
Old Q Values:  [    6.21931299  -131.73529819 -6170.35693855 -2398.48483662]
New Q values:  [    6.21931299  -131.73529819 -6170.35693855  -854.14510597]
Reward: -1  Episode Reward:  18
xxxxx
x   x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  0.67948229 352.82942893   9.48662515  -0.84      ]
------
Step:23, Action:South
State  200
Old Q Values:  [  0.67948229 352.82942893   9.48662515  -0.84      ]
New Q values:  [ 6.79482295e-01  2.29467556e+03  9.48662515e+00 -8.40000000e-01]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477  7160.47929737  3132.35272871]
------
Step:24, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477  7160.47929737  3132.35272871]
New Q values:  [ 1156.57693201 -6212.61234477  3071.09763254  3132.35272871]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   38.53753561 -8656.02923281 -7525.7277781    671.68637865]
------
Step:25, Action:West
State  288
Old Q Values:  [   38.53753561 -8656.02923281 -7525.7277781    671.68637865]
New Q values:  [   38.53753561 -8656.02923281 -7525.7277781   1207.78037007]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477  3071.09763254  3132.35272871]
------
Step:26, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477  3071.09763254  3132.35272871]
New Q values:  [ 1156.57693201 -6212.61234477  1590.17316404  3132.35272871]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   38.53753561 -8656.02923281 -7525.7277781   1207.78037007]
------
Step:27, Action:West
State  288
Old Q Values:  [   38.53753561 -8656.02923281 -7525.7277781   1207.78037007]
New Q values:  [   38.53753561 -8656.02923281 -7525.7277781   1422.21796664]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477  1590.17316404  3132.35272871]
------
Step:28, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477  1590.17316404  3132.35272871]
New Q values:  [ 1156.57693201 -6212.61234477  1062.13465561  3132.35272871]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   38.53753561 -8656.02923281 -7525.7277781   1422.21796664]
------
Step:29, Action:North
State  288
Old Q Values:  [   38.53753561 -8656.02923281 -7525.7277781   1422.21796664]
New Q values:  [   16.05379361 -8656.02923281 -7525.7277781   1422.21796664]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.12926454e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
------
Step:30, Action:North
State  216
Old Q Values:  [    6.21931299  -131.73529819 -6170.35693855  -854.14510597]
New Q values:  [ 4.92077019e+00 -1.31735298e+02 -6.17035694e+03 -8.54145106e+02]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          10.11014998 -180.6           1.71431117]
------
Step:31, Action:South
State  130
Old Q Values:  [-296.12198833 -302.6676673  -180.00807518   19.25635799]
New Q values:  [-296.12198833 -120.42828756 -180.00807518   19.25635799]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.12926454e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
------
Step:32, Action:North
State  218
Old Q Values:  [   1.33950987 -610.30346672    0.           26.71398745]
New Q values:  [   2.96884894 -610.30346672    0.           26.71398745]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          10.11014998 -180.6           1.71431117]
------
Step:33, Action:South
State  130
Old Q Values:  [-296.12198833 -120.42828756 -180.00807518   19.25635799]
New Q values:  [-296.12198833  -47.53253566 -180.00807518   19.25635799]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.12926454e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
------
Step:34, Action:North
State  218
Old Q Values:  [   2.96884894 -610.30346672    0.           26.71398745]
New Q values:  [   3.62058457 -610.30346672    0.           26.71398745]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6          10.11014998 -180.6           1.71431117]
------
Step:35, Action:South
State  138
Old Q Values:  [-180.6          10.11014998 -180.6           1.71431117]
New Q values:  [-180.6           4.92029105 -180.6           1.71431117]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4.92077019e+00 -1.31735298e+02 -6.17035694e+03 -8.54145106e+02]
------
Step:36, Action:North
State  216
Old Q Values:  [ 4.92077019e+00 -1.31735298e+02 -6.17035694e+03 -8.54145106e+02]
New Q values:  [ 2.84439539e+00 -1.31735298e+02 -6.17035694e+03 -8.54145106e+02]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           4.92029105 -180.6           1.71431117]
------
Step:37, Action:West
State  138
Old Q Values:  [-180.6           4.92029105 -180.6           1.71431117]
New Q values:  [-180.6           4.92029105 -180.6           2.10887797]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  2.67620961e+00  6.74384502e+00]
------
Step:38, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.42500416e+00  1.71921381e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.42500416e+00  7.28162710e+00]
Reward: -1  Episode Reward:  22
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   23.97980525    1.40383269 -252.78192178]
------
Step:39, Action:South
State  98
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [  0.         199.63143496   0.           0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x   x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[  0.           0.         667.43811653   0.        ]
------
Step:40, Action:East
State  187
Old Q Values:  [-0.11058345  0.         58.44520219  0.        ]
New Q values:  [-0.11058345  0.         45.6985991   0.        ]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    76.40172743     0.        ]
------
Step:41, Action:East
State  203
Old Q Values:  [0.89767243 0.         0.         0.        ]
New Q values:  [0.89767243 0.         7.41419624 0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   3.62058457 -610.30346672    0.           26.71398745]
------
Step:42, Action:West
State  218
Old Q Values:  [   3.62058457 -610.30346672    0.           26.71398745]
New Q values:  [   3.62058457 -610.30346672    0.           33.00611321]
Reward: -1  Episode Reward:  18
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    76.40172743     0.        ]
------
Step:43, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -2.36054016e+03  8.90952237e+01  0.00000000e+00]
New Q values:  [-6.00000000e-01 -2.36054016e+03  3.62768689e+01  0.00000000e+00]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.12926454e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
------
Step:44, Action:North
State  218
Old Q Values:  [   3.62058457 -610.30346672    0.           33.00611321]
New Q values:  [   2.32432114 -610.30346672    0.           33.00611321]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           4.92029105 -180.6           2.10887797]
------
Step:45, Action:South
State  138
Old Q Values:  [-180.6           4.92029105 -180.6           2.10887797]
New Q values:  [-180.6           2.22143504 -180.6           2.10887797]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.84439539e+00 -1.31735298e+02 -6.17035694e+03 -8.54145106e+02]
------
Step:46, Action:North
State  216
Old Q Values:  [ 2.84439539e+00 -1.31735298e+02 -6.17035694e+03 -8.54145106e+02]
New Q values:  [ 1.20418867e+00 -1.31735298e+02 -6.17035694e+03 -8.54145106e+02]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.22143504 -180.6           2.10887797]
------
Step:47, Action:West
State  136
Old Q Values:  [-6.18060000e+03  3.18130485e+00 -6.24561866e+03 -4.44129760e+02]
New Q values:  [-6.18060000e+03  3.18130485e+00 -6.24561866e+03 -1.76785393e+02]
Reward: -1  Episode Reward:  13
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:48, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  6.74384502e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  9.29147958e+00]
Reward: -1  Episode Reward:  12
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   23.97980525    1.40383269 -252.78192178]
------
Step:49, Action:South
State  107
Old Q Values:  [-252.35169558   23.97980525    1.40383269 -252.78192178]
New Q values:  [-252.35169558    8.9919221     1.40383269 -252.78192178]
Reward: -1  Episode Reward:  11
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -2.39790111e+03 -1.78980000e+02]
------
Step:50, Action:South
State  179
Old Q Values:  [ 0.00000000e+00 -6.00000000e-01  3.36074018e+04  0.00000000e+00]
New Q values:  [    0.         63160.05960754 33607.40175098     0.        ]
Reward: 100009  Episode Reward:  100020
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  16.27281476 -289.59534477 -104.33462561 -251.53897752]
------
Step:1, Action:North
State  261
Old Q Values:  [  16.27281476 -289.59534477 -104.33462561 -251.53897752]
New Q values:  [  42.1548989  -289.59534477 -104.33462561 -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  9.91988058  12.83068575 100.81924333   0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [  9.91988058  12.83068575 100.81924333   0.        ]
New Q values:  [  9.91988058  12.83068575 123.18502587   0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[ 38.85388605 258.19109515  15.27626837   0.        ]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831 18960.04989882 -3909.58186816     0.        ]
New Q values:  [-5922.26708831 15796.3200181  -3909.58186816     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 27356.33352858   246.80935611]
------
Step:4, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477  1062.13465561  3132.35272871]
New Q values:  [ 1156.57693201 -6212.61234477   850.91925224  3132.35272871]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   16.05379361 -8656.02923281 -7525.7277781   1422.21796664]
------
Step:5, Action:West
State  288
Old Q Values:  [   16.05379361 -8656.02923281 -7525.7277781   1422.21796664]
New Q values:  [   16.05379361 -8656.02923281 -7525.7277781  -4492.00699473]
Reward: -10001  Episode Reward:  -9975
xxxxx
x...x
x  .x
x g x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   16.05379361 -8656.02923281 -7525.7277781  -4492.00699473]
------
Step:1, Action:North
State  288
Old Q Values:  [   16.05379361 -8656.02923281 -7525.7277781  -4492.00699473]
New Q values:  [   13.06029681 -8656.02923281 -7525.7277781  -4492.00699473]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.12926454e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
------
Step:2, Action:North
State  208
Old Q Values:  [-2403.67686834    39.96223074 -2651.70614553 -2227.14232413]
New Q values:  [ -955.40431682    39.96223074 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.22143504 -180.6           2.10887797]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6           2.22143504 -180.6           2.10887797]
New Q values:  [-180.6           2.22143504 -180.6           9.03099506]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  2.67620961e+00  9.29147958e+00]
------
Step:4, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  9.29147958e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.18141685e+01]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    8.9919221     1.40383269 -252.78192178]
------
Step:5, Action:South
State  105
Old Q Values:  [-1.80600000e+02  1.51356435e+00 -3.89520980e+03  0.00000000e+00]
New Q values:  [ -180.6            9.38312205 -3895.20980426     0.        ]
Reward: 9  Episode Reward:  45
xxxxx
x  gx
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[    6.16101416    11.25898768 -4454.11955854  -180.6       ]
------
Step:6, Action:South
State  181
Old Q Values:  [    6.16101416    11.25898768 -4454.11955854  -180.6       ]
New Q values:  [    6.16101416    22.55006474 -4454.11955854  -180.6       ]
Reward: 9  Episode Reward:  54
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  42.1548989  -289.59534477 -104.33462561 -251.53897752]
------
Step:7, Action:North
State  261
Old Q Values:  [  42.1548989  -289.59534477 -104.33462561 -251.53897752]
New Q values:  [  53.21746732 -289.59534477 -104.33462561 -251.53897752]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  9.91988058  12.83068575 123.18502587   0.        ]
------
Step:8, Action:East
State  179
Old Q Values:  [    0.         63160.05960754 33607.40175098     0.        ]
New Q values:  [    0.         63160.05960754 73459.24376105     0.        ]
Reward: 100009  Episode Reward:  100062
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 27356.33352858   246.80935611]
------
Step:1, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477   850.91925224  3132.35272871]
New Q values:  [ 1156.57693201 -6212.61234477   349.68578994  3132.35272871]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   13.06029681 -8656.02923281 -7525.7277781  -4492.00699473]
------
Step:2, Action:North
State  288
Old Q Values:  [   13.06029681 -8656.02923281 -7525.7277781  -4492.00699473]
New Q values:  [   16.61278794 -8656.02923281 -7525.7277781  -4492.00699473]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -955.40431682    39.96223074 -2651.70614553 -2227.14232413]
------
Step:3, Action:South
State  210
Old Q Values:  [ 4.12926454e+00 -5.74322609e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [    4.12926454 -2292.90660129  -180.6            3.52184257]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   16.61278794 -8656.02923281 -7525.7277781  -4492.00699473]
------
Step:4, Action:North
State  288
Old Q Values:  [   16.61278794 -8656.02923281 -7525.7277781  -4492.00699473]
New Q values:  [ 7.28389454e+00 -8.65602923e+03 -7.52572778e+03 -4.49200699e+03]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[    4.12926454 -2292.90660129  -180.6            3.52184257]
------
Step:5, Action:North
State  210
Old Q Values:  [    4.12926454 -2292.90660129  -180.6            3.52184257]
New Q values:  [   12.82861321 -2292.90660129  -180.6            3.52184257]
Reward: 9  Episode Reward:  15
xxxxx
x..ax
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-296.12198833  -47.53253566 -180.00807518   19.25635799]
------
Step:6, Action:West
State  138
Old Q Values:  [-180.6           2.22143504 -180.6           9.03099506]
New Q values:  [-180.6           2.22143504 -180.6          11.19688615]
Reward: 9  Episode Reward:  24
xxxxx
x.a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.42500416e+00  7.28162710e+00]
------
Step:7, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.18141685e+01]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.28232440e+01]
Reward: 9  Episode Reward:  33
xxxxx
xa  x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    8.9919221     1.40383269 -252.78192178]
------
Step:8, Action:South
State  107
Old Q Values:  [-252.35169558    8.9919221     1.40383269 -252.78192178]
New Q values:  [-252.35169558   15.76178826    1.40383269 -252.78192178]
Reward: 9  Episode Reward:  42
xxxxx
x   x
xa.gx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[    6.16101416    22.55006474 -4454.11955854  -180.6       ]
------
Step:9, Action:South
State  177
Old Q Values:  [   0.         2130.51120713 -849.04228729    0.        ]
New Q values:  [   0.         4012.50409039 -849.04228729    0.        ]
Reward: 9  Episode Reward:  51
xxxxx
x   x
x g x
xa  x
xxxxx
Step:10, Action:East
State  261
Old Q Values:  [  53.21746732 -289.59534477 -104.33462561 -251.53897752]
New Q values:  [  53.21746732 -289.59534477 8164.56620833 -251.53897752]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 27356.33352858   246.80935611]
------
Step:11, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 27356.33352858   246.80935611]
New Q values:  [  37.74111519 -168.92307549 4944.11857979  246.80935611]
Reward: -10001  Episode Reward:  -9951
xxxxx
x   x
x . x
x  gx
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.62956723e+00 3.93991578e+03 0.00000000e+00]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -2.36054016e+03  3.62768689e+01  0.00000000e+00]
New Q values:  [-6.00000000e-01 -2.36054016e+03  2.37593315e+01  0.00000000e+00]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[   12.82861321 -2292.90660129  -180.6            3.52184257]
------
Step:2, Action:North
State  216
Old Q Values:  [ 1.20418867e+00 -1.31735298e+02 -6.17035694e+03 -8.54145106e+02]
New Q values:  [    9.24074131  -131.73529819 -6170.35693855  -854.14510597]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.22143504 -180.6          11.19688615]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6           2.22143504 -180.6          11.19688615]
New Q values:  [-180.6           2.22143504 -180.6          12.06324259]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  3.42500416e+00  7.28162710e+00]
------
Step:4, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.42500416e+00  7.28162710e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.42500416e+00  1.30411873e+01]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   15.76178826    1.40383269 -252.78192178]
------
Step:5, Action:South
State  110
Old Q Values:  [-180.6         -91.46075976    1.42536035 -180.6       ]
New Q values:  [-180.6         -28.07654039    1.42536035 -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa  x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094  0.         30.35921172  0.        ]
------
Step:6, Action:East
State  188
Old Q Values:  [-6523.78898263    32.78037903    60.50114912     0.        ]
New Q values:  [-6523.78898263    32.78037903   712.00312788     0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 6.79482295e-01  2.29467556e+03  9.48662515e+00 -8.40000000e-01]
------
Step:7, Action:South
State  206
Old Q Values:  [  0.         221.84572003   6.60224881   0.        ]
New Q values:  [  0.         135.48456638   6.60224881   0.        ]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  130.15725745]
------
Step:8, Action:East
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477   349.68578994  3132.35272871]
New Q values:  [ 1156.57693201 -6212.61234477   147.45948434  3132.35272871]
Reward: 9  Episode Reward:  52
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7.28389454e+00 -8.65602923e+03 -7.52572778e+03 -4.49200699e+03]
------
Step:9, Action:North
State  288
Old Q Values:  [ 7.28389454e+00 -8.65602923e+03 -7.52572778e+03 -4.49200699e+03]
New Q values:  [ 5.08578021e+00 -8.65602923e+03 -7.52572778e+03 -4.49200699e+03]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[    9.24074131  -131.73529819 -6170.35693855  -854.14510597]
------
Step:10, Action:North
State  216
Old Q Values:  [    9.24074131  -131.73529819 -6170.35693855  -854.14510597]
New Q values:  [ 4.05068798e+00 -1.31735298e+02 -6.17035694e+03 -8.54145106e+02]
Reward: -1  Episode Reward:  50
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  3.18130485e+00 -6.24561866e+03 -1.76785393e+02]
------
Step:11, Action:South
State  136
Old Q Values:  [-6.18060000e+03  3.18130485e+00 -6.24561866e+03 -1.76785393e+02]
New Q values:  [-6.18060000e+03  1.88772833e+00 -6.24561866e+03 -1.76785393e+02]
Reward: -1  Episode Reward:  49
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4.05068798e+00 -1.31735298e+02 -6.17035694e+03 -8.54145106e+02]
------
Step:12, Action:South
State  216
Old Q Values:  [ 4.05068798e+00 -1.31735298e+02 -6.17035694e+03 -8.54145106e+02]
New Q values:  [ 4.05068798e+00 -5.17683852e+01 -6.17035694e+03 -8.54145106e+02]
Reward: -1  Episode Reward:  48
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5.08578021e+00 -8.65602923e+03 -7.52572778e+03 -4.49200699e+03]
------
Step:13, Action:North
State  288
Old Q Values:  [ 5.08578021e+00 -8.65602923e+03 -7.52572778e+03 -4.49200699e+03]
New Q values:  [ 2.64951848e+00 -8.65602923e+03 -7.52572778e+03 -4.49200699e+03]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4.05068798e+00 -5.17683852e+01 -6.17035694e+03 -8.54145106e+02]
------
Step:14, Action:North
State  218
Old Q Values:  [   2.32432114 -610.30346672    0.           33.00611321]
New Q values:  [   3.94870123 -610.30346672    0.           33.00611321]
Reward: -1  Episode Reward:  46
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.22143504 -180.6          12.06324259]
------
Step:15, Action:West
State  130
Old Q Values:  [-296.12198833  -47.53253566 -180.00807518   19.25635799]
New Q values:  [-296.12198833  -47.53253566 -180.00807518    6.92254319]
Reward: -1  Episode Reward:  45
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03 -9.91433515e-01 -6.00000000e-01]
------
Step:16, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.42500416e+00  1.30411873e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  3.42500416e+00  5.13850002e+00]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -6.00060000e+03  1.74008365e+00 -1.80600000e+02]
------
Step:17, Action:East
State  104
Old Q Values:  [-8.65284000e+03 -6.00000000e-01 -2.39535373e+03 -8.65284000e+03]
New Q values:  [-8.65284000e+03 -6.00000000e-01 -9.58483566e+02 -8.65284000e+03]
Reward: -1  Episode Reward:  43
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-1.01561177e+04 -5.99568600e+03  8.59748634e-01 -4.11091729e+02]
------
Step:18, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  3.42500416e+00  5.13850002e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.38897444e+00  5.13850002e+00]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.22143504 -180.6          12.06324259]
------
Step:19, Action:West
State  136
Old Q Values:  [-6.18060000e+03  1.88772833e+00 -6.24561866e+03 -1.76785393e+02]
New Q values:  [-6.18060000e+03  1.88772833e+00 -6.24561866e+03 -7.10562326e+01]
Reward: -1  Episode Reward:  41
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-1.01561177e+04 -5.99568600e+03  8.59748634e-01 -4.11091729e+02]
------
Step:20, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.38897444e+00  5.13850002e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  5.13850002e+00]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.22143504 -180.6          12.06324259]
------
Step:21, Action:West
State  136
Old Q Values:  [-6.18060000e+03  1.88772833e+00 -6.24561866e+03 -7.10562326e+01]
New Q values:  [-6.18060000e+03  1.88772833e+00 -6.24561866e+03 -2.87645685e+01]
Reward: -1  Episode Reward:  39
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-1.01561177e+04 -5.99568600e+03  8.59748634e-01 -4.11091729e+02]
------
Step:22, Action:East
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  8.59748634e-01 -4.11091729e+02]
New Q values:  [-1.01561177e+04 -5.99568600e+03  3.10217953e-01 -4.11091729e+02]
Reward: -1  Episode Reward:  38
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  1.88772833e+00 -6.24561866e+03 -2.87645685e+01]
------
Step:23, Action:South
State  138
Old Q Values:  [-180.6           2.22143504 -180.6          12.06324259]
New Q values:  [-180.6           1.50378041 -180.6          12.06324259]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4.05068798e+00 -5.17683852e+01 -6.17035694e+03 -8.54145106e+02]
------
Step:24, Action:North
State  218
Old Q Values:  [   3.94870123 -610.30346672    0.           33.00611321]
New Q values:  [   4.59845327 -610.30346672    0.           33.00611321]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           1.50378041 -180.6          12.06324259]
------
Step:25, Action:West
State  138
Old Q Values:  [-180.6           1.50378041 -180.6          12.06324259]
New Q values:  [-180.6           1.50378041 -180.6           5.76684704]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  4.77456255e+00  5.13850002e+00]
------
Step:26, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  5.13850002e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  6.18393649e+00]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   15.76178826    1.40383269 -252.78192178]
------
Step:27, Action:South
State  98
Old Q Values:  [  0.         199.63143496   0.           0.        ]
New Q values:  [  0.         279.48400894   0.           0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[  0.           0.         667.43811653   0.        ]
------
Step:28, Action:East
State  187
Old Q Values:  [-0.11058345  0.         45.6985991   0.        ]
New Q values:  [-0.11058345  0.         40.59995787  0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    76.40172743     0.        ]
------
Step:29, Action:East
State  203
Old Q Values:  [0.89767243 0.         7.41419624 0.        ]
New Q values:  [ 0.89767243  0.         12.26751246  0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   4.59845327 -610.30346672    0.           33.00611321]
------
Step:30, Action:West
State  216
Old Q Values:  [ 4.05068798e+00 -5.17683852e+01 -6.17035694e+03 -8.54145106e+02]
New Q values:  [ 4.05068798e+00 -5.17683852e+01 -6.17035694e+03 -3.40157235e+02]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[7.00269019 0.         5.39642008 0.024     ]
------
Step:31, Action:North
State  203
Old Q Values:  [ 0.89767243  0.         12.26751246  0.        ]
New Q values:  [ 3.60604218  0.         12.26751246  0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.28232440e+01]
------
Step:32, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.28232440e+01]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  9.25783408e+00]
Reward: -1  Episode Reward:  28
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   15.76178826    1.40383269 -252.78192178]
------
Step:33, Action:South
State  107
Old Q Values:  [-252.35169558   15.76178826    1.40383269 -252.78192178]
New Q values:  [-252.35169558   17.88470267    1.40383269 -252.78192178]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[-0.11058345  0.         40.59995787  0.        ]
------
Step:34, Action:East
State  185
Old Q Values:  [-6.00000000e-01  0.00000000e+00 -2.39790111e+03 -1.78980000e+02]
New Q values:  [-6.00000000e-01  0.00000000e+00 -9.57659638e+02 -1.78980000e+02]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[7.00269019 0.         5.39642008 0.024     ]
------
Step:35, Action:North
State  201
Old Q Values:  [7.00269019 0.         5.39642008 0.024     ]
New Q values:  [3.66758704 0.         5.39642008 0.024     ]
Reward: -1  Episode Reward:  25
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:36, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  3.10217953e-01 -4.11091729e+02]
New Q values:  [-1.01561177e+04 -5.99568600e+03  3.10217953e-01 -1.62221755e+02]
Reward: -1  Episode Reward:  24
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6            9.38312205 -3895.20980426     0.        ]
------
Step:37, Action:South
State  105
Old Q Values:  [ -180.6            9.38312205 -3895.20980426     0.        ]
New Q values:  [-1.80600000e+02  3.15324882e+00 -3.89520980e+03  0.00000000e+00]
Reward: -1  Episode Reward:  23
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -9.57659638e+02 -1.78980000e+02]
------
Step:38, Action:South
State  177
Old Q Values:  [   0.         4012.50409039 -849.04228729    0.        ]
New Q values:  [    0.         64765.30124369  -849.04228729     0.        ]
Reward: 100009  Episode Reward:  100032
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2.64951848e+00 -8.65602923e+03 -7.52572778e+03 -4.49200699e+03]
------
Step:1, Action:North
State  288
Old Q Values:  [ 2.64951848e+00 -8.65602923e+03 -7.52572778e+03 -4.49200699e+03]
New Q values:  [   18.44847661 -8656.02923281 -7525.7277781  -4492.00699473]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -955.40431682    39.96223074 -2651.70614553 -2227.14232413]
------
Step:2, Action:South
State  210
Old Q Values:  [   12.82861321 -2292.90660129  -180.6            3.52184257]
New Q values:  [  12.82861321 -912.22809753 -180.6           3.52184257]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   18.44847661 -8656.02923281 -7525.7277781  -4492.00699473]
------
Step:3, Action:North
State  288
Old Q Values:  [   18.44847661 -8656.02923281 -7525.7277781  -4492.00699473]
New Q values:  [   10.62797461 -8656.02923281 -7525.7277781  -4492.00699473]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x .ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  12.82861321 -912.22809753 -180.6           3.52184257]
------
Step:4, Action:North
State  210
Old Q Values:  [  12.82861321 -912.22809753 -180.6           3.52184257]
New Q values:  [  12.60820824 -912.22809753 -180.6           3.52184257]
Reward: 9  Episode Reward:  16
xxxxx
x..ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-296.12198833  -47.53253566 -180.00807518    6.92254319]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6           1.50378041 -180.6           5.76684704]
New Q values:  [-180.6           1.50378041 -180.6          10.48408904]
Reward: 9  Episode Reward:  25
xxxxx
x.a x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  2.67620961e+00  9.25783408e+00]
------
Step:6, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  9.25783408e+00]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.45785360e+01]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
x .gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   18.25134119    4.23310798 -272.09726687]
------
Step:7, Action:South
State  109
Old Q Values:  [ -241.10880094    65.29510908 -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    32.28306306 -6443.21937065  -180.6       ]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
xa. x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[    6.16101416    22.55006474 -4454.11955854  -180.6       ]
------
Step:8, Action:South
State  181
Old Q Values:  [    6.16101416    22.55006474 -4454.11955854  -180.6       ]
New Q values:  [    6.16101416  2463.7898884  -4454.11955854  -180.6       ]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x .gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  53.21746732 -289.59534477 8164.56620833 -251.53897752]
------
Step:9, Action:East
State  261
Old Q Values:  [  53.21746732 -289.59534477 8164.56620833 -251.53897752]
New Q values:  [  53.21746732 -289.59534477 4754.46205727 -251.53897752]
Reward: 9  Episode Reward:  51
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4944.11857979  246.80935611]
------
Step:10, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4944.11857979  246.80935611]
New Q values:  [  37.74111519 -168.92307549 4944.11857979 1524.46235962]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  53.21746732 -289.59534477 4754.46205727 -251.53897752]
------
Step:11, Action:East
State  257
Old Q Values:  [10516.33202512  -180.6          953.79167504  4453.74493219]
New Q values:  [10516.33202512  -180.6         1320.62248863  4453.74493219]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477   147.45948434  3132.35272871]
------
Step:12, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4944.11857979 1524.46235962]
New Q values:  [  37.74111519 -168.92307549 4944.11857979 2035.52356103]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  53.21746732 -289.59534477 4754.46205727 -251.53897752]
------
Step:13, Action:East
State  257
Old Q Values:  [10516.33202512  -180.6         1320.62248863  4453.74493219]
New Q values:  [10516.33202512  -180.6         1467.35481406  4453.74493219]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477   147.45948434  3132.35272871]
------
Step:14, Action:West
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477   147.45948434  3132.35272871]
New Q values:  [ 1156.57693201 -6212.61234477   147.45948434  1315.9008295 ]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  162.85686215 -6457.4598       211.8657934  -2702.17995449]
------
Step:15, Action:East
State  257
Old Q Values:  [10516.33202512  -180.6         1467.35481406  4453.74493219]
New Q values:  [10516.33202512  -180.6          981.11217448  4453.74493219]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477   147.45948434  1315.9008295 ]
------
Step:16, Action:West
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477   147.45948434  1315.9008295 ]
New Q values:  [ 1156.57693201 -6212.61234477   147.45948434   589.32006982]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  162.85686215 -6457.4598       211.8657934  -2702.17995449]
------
Step:17, Action:East
State  260
Old Q Values:  [  162.85686215 -6457.4598       211.8657934  -2702.17995449]
New Q values:  [  162.85686215 -6457.4598       431.11939696 -2702.17995449]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1156.57693201 -6212.61234477   147.45948434   589.32006982]
------
Step:18, Action:North
State  272
Old Q Values:  [ 1156.57693201 -6212.61234477   147.45948434   589.32006982]
New Q values:  [61650.00550782 -6212.61234477   147.45948434   589.32006982]
Reward: 100009  Episode Reward:  100052
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   10.62797461 -8656.02923281 -7525.7277781  -4492.00699473]
------
Step:1, Action:North
State  288
Old Q Values:  [   10.62797461 -8656.02923281 -7525.7277781  -4492.00699473]
New Q values:  [-5978.36014093 -8656.02923281 -7525.7277781  -4492.00699473]
Reward: -9991  Episode Reward:  -9991
xxxxx
x.. x
x..gx
x.. x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  2.37593315e+01  0.00000000e+00]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -2.36054016e+03  2.37593315e+01  0.00000000e+00]
New Q values:  [-6.00000000e-01 -2.36054016e+03  1.86861951e+01  0.00000000e+00]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  12.60820824 -912.22809753 -180.6           3.52184257]
------
Step:2, Action:North
State  210
Old Q Values:  [  12.60820824 -912.22809753 -180.6           3.52184257]
New Q values:  [  12.52004626 -912.22809753 -180.6           3.52184257]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.  x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-296.12198833  -47.53253566 -180.00807518    6.92254319]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6           1.50378041 -180.6          10.48408904]
New Q values:  [-180.6           1.50378041 -180.6           9.41363562]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03 -9.91433515e-01 -6.00000000e-01]
------
Step:4, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  6.18393649e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  8.30118270e+00]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6         -28.07654039    1.42536035 -180.6       ]
------
Step:5, Action:East
State  107
Old Q Values:  [-252.35169558   17.88470267    1.40383269 -252.78192178]
New Q values:  [-252.35169558   17.88470267    2.45188789 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  4.77456255e+00  8.30118270e+00]
------
Step:6, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.45785360e+01]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.05968252e+01]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x.  x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   17.88470267    2.45188789 -252.78192178]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869   18.25134119    4.23310798 -272.09726687]
New Q values:  [-177.44732869   19.95922732    4.23310798 -272.09726687]
Reward: 9  Episode Reward:  43
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[ 3.06655861 24.19563616  0.          0.        ]
------
Step:8, Action:South
State  191
Old Q Values:  [ 3.06655861 24.19563616  0.          0.        ]
New Q values:  [   3.06655861 1435.41687165    0.            0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x   x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  53.21746732 -289.59534477 4754.46205727 -251.53897752]
------
Step:9, Action:East
State  261
Old Q Values:  [  53.21746732 -289.59534477 4754.46205727 -251.53897752]
New Q values:  [  53.21746732 -289.59534477 2023.73338257 -251.53897752]
Reward: 9  Episode Reward:  51
xxxxx
x   x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         388.49519889   0.        ]
------
Step:10, Action:East
State  272
Old Q Values:  [61650.00550782 -6212.61234477   147.45948434   589.32006982]
New Q values:  [61650.00550782 -6212.61234477 58716.78169532   589.32006982]
Reward: 100009  Episode Reward:  100060
xxxxx
x   x
x g x
x  ax
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           1.50378041 -180.6           9.41363562]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6           1.50378041 -180.6           9.41363562]
New Q values:  [-180.6           1.50378041 -180.6          12.34450181]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.05968252e+01]
------
Step:2, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.05968252e+01]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.56264983e+01]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   19.95922732    4.23310798 -272.09726687]
------
Step:3, Action:South
State  109
Old Q Values:  [ -241.10880094    32.28306306 -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    32.64484337 -6443.21937065  -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x  gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842   47.77206048    2.84195242 -244.98066897]
------
Step:4, Action:South
State  189
Old Q Values:  [  14.51316842   47.77206048    2.84195242 -244.98066897]
New Q values:  [  14.51316842  631.62883896    2.84195242 -244.98066897]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  53.21746732 -289.59534477 2023.73338257 -251.53897752]
------
Step:5, Action:East
State  261
Old Q Values:  [  53.21746732 -289.59534477 2023.73338257 -251.53897752]
New Q values:  [  53.21746732 -289.59534477 2298.12892697 -251.53897752]
Reward: 9  Episode Reward:  45
xxxxx
x  gx
x . x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4944.11857979 2035.52356103]
------
Step:6, Action:East
State  272
Old Q Values:  [61650.00550782 -6212.61234477 58716.78169532   589.32006982]
New Q values:  [61650.00550782 -6212.61234477 22144.51057971   589.32006982]
Reward: 9  Episode Reward:  54
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5978.36014093 -8656.02923281 -7525.7277781  -4492.00699473]
------
Step:7, Action:West
State  288
Old Q Values:  [-5978.36014093 -8656.02923281 -7525.7277781  -4492.00699473]
New Q values:  [-5978.36014093 -8656.02923281 -7525.7277781  16697.59885445]
Reward: -1  Episode Reward:  53
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[61650.00550782 -6212.61234477 22144.51057971   589.32006982]
------
Step:8, Action:East
State  272
Old Q Values:  [61650.00550782 -6212.61234477 22144.51057971   589.32006982]
New Q values:  [61650.00550782 -6212.61234477 13866.48388822   589.32006982]
Reward: -1  Episode Reward:  52
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5978.36014093 -8656.02923281 -7525.7277781  16697.59885445]
------
Step:9, Action:North
State  288
Old Q Values:  [-5978.36014093 -8656.02923281 -7525.7277781  16697.59885445]
New Q values:  [-2388.1880425  -8656.02923281 -7525.7277781  16697.59885445]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  12.52004626 -912.22809753 -180.6           3.52184257]
------
Step:10, Action:North
State  208
Old Q Values:  [ -955.40431682    39.96223074 -2651.70614553 -2227.14232413]
New Q values:  [ -379.05837619    39.96223074 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           1.50378041 -180.6          12.34450181]
------
Step:11, Action:West
State  138
Old Q Values:  [-180.6           1.50378041 -180.6          12.34450181]
New Q values:  [-180.6           1.50378041 -180.6           9.0257502 ]
Reward: -1  Episode Reward:  49
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.56264983e+01]
------
Step:12, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  8.30118270e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  8.70824128e+00]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   19.95922732    4.23310798 -272.09726687]
------
Step:13, Action:South
State  111
Old Q Values:  [-177.44732869   19.95922732    4.23310798 -272.09726687]
New Q values:  [-177.44732869   44.33919869    4.23310798 -272.09726687]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  9.91988058  12.83068575 123.18502587   0.        ]
------
Step:14, Action:East
State  177
Old Q Values:  [    0.         64765.30124369  -849.04228729     0.        ]
New Q values:  [    0.         64765.30124369 64404.67909051     0.        ]
Reward: 100009  Episode Reward:  100056
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  53.21746732 -289.59534477 2298.12892697 -251.53897752]
------
Step:1, Action:North
State  261
Old Q Values:  [  53.21746732 -289.59534477 2298.12892697 -251.53897752]
New Q values:  [  63.64249469 -289.59534477 2298.12892697 -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  9.91988058  12.83068575 123.18502587   0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [  9.91988058  12.83068575 123.18502587   0.        ]
New Q values:  [ 9.91988058 12.83068575 60.27986887  0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  1.86861951e+01  0.00000000e+00]
------
Step:3, Action:East
State  195
Old Q Values:  [ 38.85388605 258.19109515  15.27626837   0.        ]
New Q values:  [ 38.85388605 258.19109515  15.26652123   0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  12.52004626 -912.22809753 -180.6           3.52184257]
------
Step:4, Action:North
State  210
Old Q Values:  [  12.52004626 -912.22809753 -180.6           3.52184257]
New Q values:  [  12.48478146 -912.22809753 -180.6           3.52184257]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-296.12198833  -47.53253566 -180.00807518    6.92254319]
------
Step:5, Action:West
State  130
Old Q Values:  [-296.12198833  -47.53253566 -180.00807518    6.92254319]
New Q values:  [-296.12198833  -47.53253566 -180.00807518    7.98901728]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03 -9.91433515e-01 -6.00000000e-01]
------
Step:6, Action:West
State  126
Old Q Values:  [ 0.         65.95371601  9.68085385  5.8276081 ]
New Q values:  [ 0.         65.95371601  9.68085385  8.15865135]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6         -28.07654039    1.42536035 -180.6       ]
------
Step:7, Action:East
State  111
Old Q Values:  [-177.44732869   44.33919869    4.23310798 -272.09726687]
New Q values:  [-177.44732869   44.33919869   20.879358   -272.09726687]
Reward: -1  Episode Reward:  53
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[ 0.         65.95371601  9.68085385  8.15865135]
------
Step:8, Action:East
State  126
Old Q Values:  [ 0.         65.95371601  9.68085385  8.15865135]
New Q values:  [ 0.         65.95371601  5.9800666   8.15865135]
Reward: -1  Episode Reward:  52
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           1.50378041 -180.6           9.0257502 ]
------
Step:9, Action:West
State  130
Old Q Values:  [-296.12198833  -47.53253566 -180.00807518    7.98901728]
New Q values:  [-296.12198833  -47.53253566 -180.00807518   20.53256609]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          59.78986392    6.47656656    0.        ]
------
Step:10, Action:South
State  127
Old Q Values:  [ 0.          0.          5.38992087 15.32541077]
New Q values:  [ 0.          1.67014986  5.38992087 15.32541077]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         7.56716619]
------
Step:11, Action:West
State  204
Old Q Values:  [ 0.         48.92723688 -0.32153244 26.84087428]
New Q values:  [  0.          48.92723688  -0.32153244 199.6250014 ]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842  631.62883896    2.84195242 -244.98066897]
------
Step:12, Action:South
State  189
Old Q Values:  [  14.51316842  631.62883896    2.84195242 -244.98066897]
New Q values:  [  14.51316842  941.49021368    2.84195242 -244.98066897]
Reward: -1  Episode Reward:  48
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  63.64249469 -289.59534477 2298.12892697 -251.53897752]
------
Step:13, Action:East
State  261
Old Q Values:  [  63.64249469 -289.59534477 2298.12892697 -251.53897752]
New Q values:  [  63.64249469 -289.59534477 1035.20013045 -251.53897752]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         388.49519889   0.        ]
------
Step:14, Action:East
State  272
Old Q Values:  [61650.00550782 -6212.61234477 13866.48388822   589.32006982]
New Q values:  [61650.00550782 -6212.61234477 70561.27321162   589.32006982]
Reward: 100009  Episode Reward:  100056
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  63.64249469 -289.59534477 1035.20013045 -251.53897752]
------
Step:1, Action:East
State  261
Old Q Values:  [  63.64249469 -289.59534477 1035.20013045 -251.53897752]
New Q values:  [   63.64249469  -289.59534477 15587.86201567  -251.53897752]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. .x
x g.x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.62956723e+00 3.93991578e+03 0.00000000e+00]
------
Step:1, Action:East
State  200
Old Q Values:  [ 6.79482295e-01  2.29467556e+03  9.48662515e+00 -8.40000000e-01]
New Q values:  [ 6.79482295e-01  2.29467556e+03  1.04098565e+01 -8.40000000e-01]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4.05068798e+00 -5.17683852e+01 -6.17035694e+03 -3.40157235e+02]
------
Step:2, Action:North
State  216
Old Q Values:  [ 4.05068798e+00 -5.17683852e+01 -6.17035694e+03 -3.40157235e+02]
New Q values:  [    9.72800025   -51.76838521 -6170.35693855  -340.15723533]
Reward: 9  Episode Reward:  18
xxxxx
x. ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           1.50378041 -180.6           9.0257502 ]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6           1.50378041 -180.6           9.0257502 ]
New Q values:  [-180.6           1.50378041 -180.6           5.62277247]
Reward: -1  Episode Reward:  17
xxxxx
x.a x
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  4.77456255e+00  8.70824128e+00]
------
Step:4, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  8.70824128e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  9.31090462e+00]
Reward: 9  Episode Reward:  26
xxxxx
xa  x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6         -28.07654039    1.42536035 -180.6       ]
------
Step:5, Action:East
State  110
Old Q Values:  [-180.6         -28.07654039    1.42536035 -180.6       ]
New Q values:  [-180.6         -28.07654039    2.76341552 -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  4.77456255e+00  9.31090462e+00]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  9.31090462e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  8.48977265e+00]
Reward: -1  Episode Reward:  24
xxxxx
xa  x
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   17.88470267    2.45188789 -252.78192178]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869   44.33919869   20.879358   -272.09726687]
New Q values:  [-177.44732869  305.58274358   20.879358   -272.09726687]
Reward: 9  Episode Reward:  33
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842  941.49021368    2.84195242 -244.98066897]
------
Step:8, Action:South
State  189
Old Q Values:  [  14.51316842  941.49021368    2.84195242 -244.98066897]
New Q values:  [ 1.45131684e+01  5.05835469e+03  2.84195242e+00 -2.44980669e+02]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   63.64249469  -289.59534477 15587.86201567  -251.53897752]
------
Step:9, Action:East
State  261
Old Q Values:  [   63.64249469  -289.59534477 15587.86201567  -251.53897752]
New Q values:  [  63.64249469 -289.59534477 6357.09336593 -251.53897752]
Reward: 9  Episode Reward:  51
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         388.49519889   0.        ]
------
Step:10, Action:East
State  272
Old Q Values:  [61650.00550782 -6212.61234477 70561.27321162   589.32006982]
New Q values:  [61650.00550782 -6212.61234477 93239.18894099   589.32006982]
Reward: 100009  Episode Reward:  100060
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 9.91988058 12.83068575 60.27986887  0.        ]
------
Step:1, Action:East
State  181
Old Q Values:  [    6.16101416  2463.7898884  -4454.11955854  -180.6       ]
New Q values:  [ 6.16101416e+00  2.46378989e+03 -6.59427309e+03 -1.80600000e+02]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x g.x
x. .x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[61650.00550782 -6212.61234477 93239.18894099   589.32006982]
------
Step:1, Action:East
State  272
Old Q Values:  [61650.00550782 -6212.61234477 93239.18894099   589.32006982]
New Q values:  [61650.00550782 -6212.61234477 42310.35523273   589.32006982]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2388.1880425  -8656.02923281 -7525.7277781  16697.59885445]
------
Step:2, Action:West
State  288
Old Q Values:  [-2388.1880425  -8656.02923281 -7525.7277781  16697.59885445]
New Q values:  [-2388.1880425  -8656.02923281 -7525.7277781  25173.44119413]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x ..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[61650.00550782 -6212.61234477 42310.35523273   589.32006982]
------
Step:3, Action:North
State  272
Old Q Values:  [61650.00550782 -6212.61234477 42310.35523273   589.32006982]
New Q values:  [24687.51548929 -6212.61234477 42310.35523273   589.32006982]
Reward: 9  Episode Reward:  17
xxxxx
xg..x
x a.x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144    70.50198937    73.71095389     0.        ]
------
Step:4, Action:East
State  192
Old Q Values:  [3.89777037e-01 9.62956723e+00 3.93991578e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 9.62956723e+00 1.59335498e+03 0.00000000e+00]
Reward: 9  Episode Reward:  26
xxxxx
x...x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -379.05837619    39.96223074 -2651.70614553 -2227.14232413]
------
Step:5, Action:South
State  208
Old Q Values:  [ -379.05837619    39.96223074 -2651.70614553 -2227.14232413]
New Q values:  [ -379.05837619  7567.41725053 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2388.1880425  -8656.02923281 -7525.7277781  25173.44119413]
------
Step:6, Action:West
State  288
Old Q Values:  [-2388.1880425  -8656.02923281 -7525.7277781  25173.44119413]
New Q values:  [-2388.1880425  -8656.02923281 -7525.7277781  11552.01205159]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4944.11857979 2035.52356103]
------
Step:7, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 4944.11857979 2035.52356103]
New Q values:  [  37.74111519 -168.92307549 -557.34895261 2035.52356103]
Reward: -10001  Episode Reward:  -9977
xxxxx
x...x
x   x
x. gx
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 15796.3200181  -3909.58186816     0.        ]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 9.62956723e+00 1.59335498e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 1.27023584e+04 1.59335498e+03 0.00000000e+00]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[24687.51548929 -6212.61234477 42310.35523273   589.32006982]
------
Step:2, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 -557.34895261 2035.52356103]
New Q values:  [  37.74111519 -168.92307549 3248.06403443 2035.52356103]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2388.1880425  -8656.02923281 -7525.7277781  11552.01205159]
------
Step:3, Action:West
State  288
Old Q Values:  [-2388.1880425  -8656.02923281 -7525.7277781  11552.01205159]
New Q values:  [-2388.1880425  -8656.02923281 -7525.7277781   5594.62403097]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 3248.06403443 2035.52356103]
------
Step:4, Action:East
State  272
Old Q Values:  [24687.51548929 -6212.61234477 42310.35523273   589.32006982]
New Q values:  [24687.51548929 -6212.61234477 18601.92930238   589.32006982]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2388.1880425  -8656.02923281 -7525.7277781   5594.62403097]
------
Step:5, Action:West
State  288
Old Q Values:  [-2388.1880425  -8656.02923281 -7525.7277781   5594.62403097]
New Q values:  [-2388.1880425  -8656.02923281 -7525.7277781   3643.50425917]
Reward: -10001  Episode Reward:  -9985
xxxxx
x.. x
x. .x
x.g x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -379.05837619  7567.41725053 -2651.70614553 -2227.14232413]
------
Step:1, Action:South
State  208
Old Q Values:  [ -379.05837619  7567.41725053 -2651.70614553 -2227.14232413]
New Q values:  [ -379.05837619  4125.41817797 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2388.1880425  -8656.02923281 -7525.7277781   3643.50425917]
------
Step:2, Action:West
State  288
Old Q Values:  [-2388.1880425  -8656.02923281 -7525.7277781   3643.50425917]
New Q values:  [-2388.1880425  -8656.02923281 -7525.7277781   8869.05635046]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[24687.51548929 -6212.61234477 18601.92930238   589.32006982]
------
Step:3, Action:East
State  272
Old Q Values:  [24687.51548929 -6212.61234477 18601.92930238   589.32006982]
New Q values:  [24687.51548929 -6212.61234477 10100.88862609   589.32006982]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2388.1880425  -8656.02923281 -7525.7277781   8869.05635046]
------
Step:4, Action:West
State  288
Old Q Values:  [-2388.1880425  -8656.02923281 -7525.7277781   8869.05635046]
New Q values:  [-2388.1880425  -8656.02923281 -7525.7277781  10953.27718697]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[24687.51548929 -6212.61234477 10100.88862609   589.32006982]
------
Step:5, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 3248.06403443 2035.52356103]
New Q values:  [  37.74111519 -168.92307549 4584.60876986 2035.52356103]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2388.1880425  -8656.02923281 -7525.7277781  10953.27718697]
------
Step:6, Action:West
State  288
Old Q Values:  [-2388.1880425  -8656.02923281 -7525.7277781  10953.27718697]
New Q values:  [-2388.1880425  -8656.02923281 -7525.7277781  11786.96552158]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[24687.51548929 -6212.61234477 10100.88862609   589.32006982]
------
Step:7, Action:East
State  272
Old Q Values:  [24687.51548929 -6212.61234477 10100.88862609   589.32006982]
New Q values:  [24687.51548929 -6212.61234477  7575.84510691   589.32006982]
Reward: -1  Episode Reward:  13
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2388.1880425  -8656.02923281 -7525.7277781  11786.96552158]
------
Step:8, Action:West
State  288
Old Q Values:  [-2388.1880425  -8656.02923281 -7525.7277781  11786.96552158]
New Q values:  [-2388.1880425  -8656.02923281 -7525.7277781  12120.44085542]
Reward: -1  Episode Reward:  12
xxxxx
xg. x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[24687.51548929 -6212.61234477  7575.84510691   589.32006982]
------
Step:9, Action:North
State  272
Old Q Values:  [24687.51548929 -6212.61234477  7575.84510691   589.32006982]
New Q values:  [13691.11371473 -6212.61234477  7575.84510691   589.32006982]
Reward: 9  Episode Reward:  21
xxxxx
xg. x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.27023584e+04 1.59335498e+03 0.00000000e+00]
------
Step:10, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.27023584e+04 1.59335498e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 9.18767747e+03 1.59335498e+03 0.00000000e+00]
Reward: -1  Episode Reward:  20
xxxxx
x.g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[13691.11371473 -6212.61234477  7575.84510691   589.32006982]
------
Step:11, Action:North
State  272
Old Q Values:  [13691.11371473 -6212.61234477  7575.84510691   589.32006982]
New Q values:  [ 8232.14872782 -6212.61234477  7575.84510691   589.32006982]
Reward: -1  Episode Reward:  19
xxxxx
xg. x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.18767747e+03 1.59335498e+03 0.00000000e+00]
------
Step:12, Action:South
State  192
Old Q Values:  [3.89777037e-01 9.18767747e+03 1.59335498e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 6.14411561e+03 1.59335498e+03 0.00000000e+00]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 8232.14872782 -6212.61234477  7575.84510691   589.32006982]
------
Step:13, Action:North
State  272
Old Q Values:  [ 8232.14872782 -6212.61234477  7575.84510691   589.32006982]
New Q values:  [ 5135.49417341 -6212.61234477  7575.84510691   589.32006982]
Reward: -1  Episode Reward:  17
xxxxx
xg. x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.14411561e+03 1.59335498e+03 0.00000000e+00]
------
Step:14, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.14411561e+03 1.59335498e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 4.72979978e+03 1.59335498e+03 0.00000000e+00]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5135.49417341 -6212.61234477  7575.84510691   589.32006982]
------
Step:15, Action:East
State  272
Old Q Values:  [ 5135.49417341 -6212.61234477  7575.84510691   589.32006982]
New Q values:  [ 5135.49417341 -6212.61234477  6665.87029939   589.32006982]
Reward: -1  Episode Reward:  15
xxxxx
xg. x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2388.1880425  -8656.02923281 -7525.7277781  12120.44085542]
------
Step:16, Action:West
State  288
Old Q Values:  [-2388.1880425  -8656.02923281 -7525.7277781  12120.44085542]
New Q values:  [-2388.1880425  -8656.02923281 -7525.7277781   6847.33743198]
Reward: -1  Episode Reward:  14
xxxxx
x.g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5135.49417341 -6212.61234477  6665.87029939   589.32006982]
------
Step:17, Action:East
State  272
Old Q Values:  [ 5135.49417341 -6212.61234477  6665.87029939   589.32006982]
New Q values:  [ 5135.49417341 -6212.61234477  4719.94934935   589.32006982]
Reward: -1  Episode Reward:  13
xxxxx
xg. x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2388.1880425  -8656.02923281 -7525.7277781   6847.33743198]
------
Step:18, Action:West
State  288
Old Q Values:  [-2388.1880425  -8656.02923281 -7525.7277781   6847.33743198]
New Q values:  [-2388.1880425  -8656.02923281 -7525.7277781   4278.98322482]
Reward: -1  Episode Reward:  12
xxxxx
x.g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5135.49417341 -6212.61234477  4719.94934935   589.32006982]
------
Step:19, Action:North
State  272
Old Q Values:  [ 5135.49417341 -6212.61234477  4719.94934935   589.32006982]
New Q values:  [-2527.46239811 -6212.61234477  4719.94934935   589.32006982]
Reward: -10001  Episode Reward:  -9989
xxxxx
x.. x
x.g x
x.  x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  1.86861951e+01  0.00000000e+00]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.72979978e+03 1.59335498e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 4.72979978e+03 1.88036745e+03 0.00000000e+00]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -379.05837619  4125.41817797 -2651.70614553 -2227.14232413]
------
Step:2, Action:South
State  210
Old Q Values:  [  12.48478146 -912.22809753 -180.6           3.52184257]
New Q values:  [  12.48478146  924.20372843 -180.6           3.52184257]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2388.1880425  -8656.02923281 -7525.7277781   4278.98322482]
------
Step:3, Action:North
State  288
Old Q Values:  [-2388.1880425  -8656.02923281 -7525.7277781   4278.98322482]
New Q values:  [ -678.61409847 -8656.02923281 -7525.7277781   4278.98322482]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  12.48478146  924.20372843 -180.6           3.52184257]
------
Step:4, Action:North
State  208
Old Q Values:  [ -379.05837619  4125.41817797 -2651.70614553 -2227.14232413]
New Q values:  [ -140.06358065  4125.41817797 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  26
xxxxx
x..ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-296.12198833  -47.53253566 -180.00807518   20.53256609]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6           1.50378041 -180.6           5.62277247]
New Q values:  [-180.6           1.50378041 -180.6          12.33705847]
Reward: 9  Episode Reward:  35
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.56264983e+01]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  8.48977265e+00]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  1.41613199e+01]
Reward: 9  Episode Reward:  44
xxxxx
xa  x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   17.88470267    2.45188789 -252.78192178]
------
Step:7, Action:South
State  107
Old Q Values:  [-252.35169558   17.88470267    2.45188789 -252.78192178]
New Q values:  [-252.35169558   24.73386843    2.45188789 -252.78192178]
Reward: 9  Episode Reward:  53
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[-0.11058345  0.         40.59995787  0.        ]
------
Step:8, Action:East
State  187
Old Q Values:  [-0.11058345  0.         40.59995787  0.        ]
New Q values:  [-0.11058345  0.         19.32023688  0.        ]
Reward: -1  Episode Reward:  52
xxxxx
x   x
x a x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[ 3.60604218  0.         12.26751246  0.        ]
------
Step:9, Action:East
State  201
Old Q Values:  [3.66758704 0.         5.39642008 0.024     ]
New Q values:  [ 3.66758704e+00  0.00000000e+00 -5.99552303e+03  2.40000000e-02]
Reward: -10001  Episode Reward:  -9949
xxxxx
x   x
x  gx
x.  x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  1.88772833e+00 -6.24561866e+03 -2.87645685e+01]
------
Step:1, Action:South
State  138
Old Q Values:  [-180.6           1.50378041 -180.6          12.33705847]
New Q values:  [-180.6           8.91991224 -180.6          12.33705847]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[    9.72800025   -51.76838521 -6170.35693855  -340.15723533]
------
Step:2, Action:North
State  216
Old Q Values:  [    9.72800025   -51.76838521 -6170.35693855  -340.15723533]
New Q values:  [    6.99231764   -51.76838521 -6170.35693855  -340.15723533]
Reward: -1  Episode Reward:  8
xxxxx
x .ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           8.91991224 -180.6          12.33705847]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6           8.91991224 -180.6          12.33705847]
New Q values:  [-180.6           8.91991224 -180.6          14.58321934]
Reward: 9  Episode Reward:  17
xxxxx
x a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.91418985e+03  4.77456255e+00  1.41613199e+01]
------
Step:4, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  1.41613199e+01]
New Q values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  1.24846885e+01]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   24.73386843    2.45188789 -252.78192178]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869  305.58274358   20.879358   -272.09726687]
New Q values:  [-177.44732869  145.71705809   20.879358   -272.09726687]
Reward: 9  Episode Reward:  25
xxxxx
x   x
xa. x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 9.91988058 12.83068575 60.27986887  0.        ]
------
Step:6, Action:East
State  187
Old Q Values:  [-0.11058345  0.         19.32023688  0.        ]
New Q values:  [-0.11058345  0.         16.80834849  0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[ 3.60604218  0.         12.26751246  0.        ]
------
Step:7, Action:East
State  201
Old Q Values:  [ 3.66758704e+00  0.00000000e+00 -5.99552303e+03  2.40000000e-02]
New Q values:  [ 3.66758704e+00  0.00000000e+00 -8.39671152e+03  2.40000000e-02]
Reward: -10001  Episode Reward:  -9967
xxxxx
x   x
x  gx
x...x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[ 38.85388605 258.19109515  15.26652123   0.        ]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831 15796.3200181  -3909.58186816     0.        ]
New Q values:  [-5922.26708831  7699.3106382  -3909.58186816     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4584.60876986 2035.52356103]
------
Step:2, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 4584.60876986 2035.52356103]
New Q values:  [  37.74111519 -168.92307549 3116.93847539 2035.52356103]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -678.61409847 -8656.02923281 -7525.7277781   4278.98322482]
------
Step:3, Action:West
State  288
Old Q Values:  [ -678.61409847 -8656.02923281 -7525.7277781   4278.98322482]
New Q values:  [ -678.61409847 -8656.02923281 -7525.7277781   3126.97809473]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  4719.94934935   589.32006982]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  4719.94934935   589.32006982]
New Q values:  [-2527.46239811 -6212.61234477  2825.47316816   589.32006982]
Reward: -1  Episode Reward:  6
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -678.61409847 -8656.02923281 -7525.7277781   3126.97809473]
------
Step:5, Action:West
State  288
Old Q Values:  [ -678.61409847 -8656.02923281 -7525.7277781   3126.97809473]
New Q values:  [ -678.61409847 -8656.02923281 -7525.7277781   2097.83318834]
Reward: -1  Episode Reward:  5
xxxxx
x.g.x
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  2825.47316816   589.32006982]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2825.47316816   589.32006982]
New Q values:  [-2527.46239811 -6212.61234477  1758.93922377   589.32006982]
Reward: -1  Episode Reward:  4
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -678.61409847 -8656.02923281 -7525.7277781   2097.83318834]
------
Step:7, Action:West
State  288
Old Q Values:  [ -678.61409847 -8656.02923281 -7525.7277781   2097.83318834]
New Q values:  [ -678.61409847 -8656.02923281 -7525.7277781   1366.21504247]
Reward: -1  Episode Reward:  3
xxxxx
x...x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1758.93922377   589.32006982]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1758.93922377   589.32006982]
New Q values:  [-2527.46239811 -6212.61234477  1112.84020225   589.32006982]
Reward: -1  Episode Reward:  2
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -678.61409847 -8656.02923281 -7525.7277781   1366.21504247]
------
Step:9, Action:West
State  288
Old Q Values:  [ -678.61409847 -8656.02923281 -7525.7277781   1366.21504247]
New Q values:  [ -678.61409847 -8656.02923281 -7525.7277781    879.73807766]
Reward: -1  Episode Reward:  1
xxxxx
x.g.x
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1112.84020225   589.32006982]
------
Step:10, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 3116.93847539 2035.52356103]
New Q values:  [  37.74111519 -168.92307549 1510.09681345 2035.52356103]
Reward: -1  Episode Reward:  0
xxxxx
x..gx
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -678.61409847 -8656.02923281 -7525.7277781    879.73807766]
------
Step:11, Action:West
State  288
Old Q Values:  [ -678.61409847 -8656.02923281 -7525.7277781    879.73807766]
New Q values:  [ -678.61409847 -8656.02923281 -7525.7277781    961.95229937]
Reward: -1  Episode Reward:  -1
xxxxx
x...x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1510.09681345 2035.52356103]
------
Step:12, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1112.84020225   589.32006982]
New Q values:  [-2527.46239811 -6212.61234477  1112.84020225  2148.25603771]
Reward: 9  Episode Reward:  8
xxxxx
x...x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  63.64249469 -289.59534477 6357.09336593 -251.53897752]
------
Step:13, Action:East
State  261
Old Q Values:  [  63.64249469 -289.59534477 6357.09336593 -251.53897752]
New Q values:  [   63.64249469  -289.59534477 -2813.28584231  -251.53897752]
Reward: -10001  Episode Reward:  -9993
xxxxx
x...x
x. .x
x g x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -140.06358065  4125.41817797 -2651.70614553 -2227.14232413]
------
Step:1, Action:South
State  208
Old Q Values:  [ -140.06358065  4125.41817797 -2651.70614553 -2227.14232413]
New Q values:  [ -140.06358065  1944.152961   -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -678.61409847 -8656.02923281 -7525.7277781    961.95229937]
------
Step:2, Action:West
State  288
Old Q Values:  [ -678.61409847 -8656.02923281 -7525.7277781    961.95229937]
New Q values:  [ -678.61409847 -8656.02923281 -7525.7277781   1034.65773106]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.  x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1112.84020225  2148.25603771]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1112.84020225  2148.25603771]
New Q values:  [-2527.46239811 -6212.61234477   754.93340022  2148.25603771]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -678.61409847 -8656.02923281 -7525.7277781   1034.65773106]
------
Step:4, Action:North
State  288
Old Q Values:  [ -678.61409847 -8656.02923281 -7525.7277781   1034.65773106]
New Q values:  [  311.20024891 -8656.02923281 -7525.7277781   1034.65773106]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -140.06358065  1944.152961   -2651.70614553 -2227.14232413]
------
Step:5, Action:South
State  210
Old Q Values:  [  12.48478146  924.20372843 -180.6           3.52184257]
New Q values:  [  12.48478146  679.47881069 -180.6           3.52184257]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  311.20024891 -8656.02923281 -7525.7277781   1034.65773106]
------
Step:6, Action:North
State  288
Old Q Values:  [  311.20024891 -8656.02923281 -7525.7277781   1034.65773106]
New Q values:  [  327.72374277 -8656.02923281 -7525.7277781   1034.65773106]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  12.48478146  679.47881069 -180.6           3.52184257]
------
Step:7, Action:South
State  210
Old Q Values:  [  12.48478146  679.47881069 -180.6           3.52184257]
New Q values:  [  12.48478146  581.5888436  -180.6           3.52184257]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  327.72374277 -8656.02923281 -7525.7277781   1034.65773106]
------
Step:8, Action:North
State  288
Old Q Values:  [  327.72374277 -8656.02923281 -7525.7277781   1034.65773106]
New Q values:  [  304.96615019 -8656.02923281 -7525.7277781   1034.65773106]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  12.48478146  581.5888436  -180.6           3.52184257]
------
Step:9, Action:South
State  210
Old Q Values:  [  12.48478146  581.5888436  -180.6           3.52184257]
New Q values:  [  12.48478146  542.43285676 -180.6           3.52184257]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  304.96615019 -8656.02923281 -7525.7277781   1034.65773106]
------
Step:10, Action:North
State  288
Old Q Values:  [  304.96615019 -8656.02923281 -7525.7277781   1034.65773106]
New Q values:  [  284.1163171  -8656.02923281 -7525.7277781   1034.65773106]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  12.48478146  542.43285676 -180.6           3.52184257]
------
Step:11, Action:South
State  208
Old Q Values:  [ -140.06358065  1944.152961   -2651.70614553 -2227.14232413]
New Q values:  [ -140.06358065  1087.45850372 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  9
xxxxx
x...x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  284.1163171  -8656.02923281 -7525.7277781   1034.65773106]
------
Step:12, Action:West
State  288
Old Q Values:  [  284.1163171  -8656.02923281 -7525.7277781   1034.65773106]
New Q values:  [  284.1163171  -8656.02923281 -7525.7277781   1057.73990374]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   754.93340022  2148.25603771]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   754.93340022  2148.25603771]
New Q values:  [-2527.46239811 -6212.61234477   754.93340022   994.03823417]
Reward: 9  Episode Reward:  17
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  162.85686215 -6457.4598       431.11939696 -2702.17995449]
------
Step:14, Action:East
State  257
Old Q Values:  [10516.33202512  -180.6          981.11217448  4453.74493219]
New Q values:  [10516.33202512  -180.6          690.05634004  4453.74493219]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   754.93340022   994.03823417]
------
Step:15, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   754.93340022   994.03823417]
New Q values:  [-2527.46239811 -6212.61234477   754.93340022  3551.9149012 ]
Reward: -1  Episode Reward:  15
xxxxx
x.g.x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[10516.33202512  -180.6          690.05634004  4453.74493219]
------
Step:16, Action:North
State  257
Old Q Values:  [10516.33202512  -180.6          690.05634004  4453.74493219]
New Q values:  [23641.52318315  -180.6          690.05634004  4453.74493219]
Reward: 9  Episode Reward:  24
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         64765.30124369 64404.67909051     0.        ]
------
Step:17, Action:South
State  177
Old Q Values:  [    0.         64765.30124369 64404.67909051     0.        ]
New Q values:  [    0.         32997.97745242 64404.67909051     0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[23641.52318315  -180.6          690.05634004  4453.74493219]
------
Step:18, Action:North
State  257
Old Q Values:  [23641.52318315  -180.6          690.05634004  4453.74493219]
New Q values:  [28777.41300042  -180.6          690.05634004  4453.74493219]
Reward: -1  Episode Reward:  22
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         32997.97745242 64404.67909051     0.        ]
------
Step:19, Action:East
State  177
Old Q Values:  [    0.         32997.97745242 64404.67909051     0.        ]
New Q values:  [    0.         32997.97745242 27180.21156874     0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x.g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.72979978e+03 1.88036745e+03 0.00000000e+00]
------
Step:20, Action:South
State  193
Old Q Values:  [-5922.26708831  7699.3106382  -3909.58186816     0.        ]
New Q values:  [-5922.26708831  3689.78132359 -3909.58186816     0.        ]
Reward: -1  Episode Reward:  20
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1510.09681345 2035.52356103]
------
Step:21, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 1510.09681345 2035.52356103]
New Q values:  [  37.74111519 -168.92307549 1510.09681345  832.70217282]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   63.64249469  -289.59534477 -2813.28584231  -251.53897752]
------
Step:22, Action:North
State  261
Old Q Values:  [   63.64249469  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [   42.94095854  -289.59534477 -2813.28584231  -251.53897752]
Reward: -1  Episode Reward:  18
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 9.91988058 12.83068575 60.27986887  0.        ]
------
Step:23, Action:East
State  183
Old Q Values:  [ 9.91988058 12.83068575 60.27986887  0.        ]
New Q values:  [ 9.91988058 12.83068575 29.11780607  0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.36054016e+03  1.86861951e+01  0.00000000e+00]
------
Step:24, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.72979978e+03 1.88036745e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 4.72979978e+03 1.07778453e+03 0.00000000e+00]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -140.06358065  1087.45850372 -2651.70614553 -2227.14232413]
------
Step:25, Action:South
State  210
Old Q Values:  [  12.48478146  542.43285676 -180.6           3.52184257]
New Q values:  [  12.48478146  533.69511382 -180.6           3.52184257]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  284.1163171  -8656.02923281 -7525.7277781   1057.73990374]
------
Step:26, Action:North
State  288
Old Q Values:  [  284.1163171  -8656.02923281 -7525.7277781   1057.73990374]
New Q values:  [  273.15506099 -8656.02923281 -7525.7277781   1057.73990374]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  12.48478146  533.69511382 -180.6           3.52184257]
------
Step:27, Action:North
State  210
Old Q Values:  [  12.48478146  533.69511382 -180.6           3.52184257]
New Q values:  [  16.55368241  533.69511382 -180.6           3.52184257]
Reward: 9  Episode Reward:  23
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-296.12198833  -47.53253566 -180.00807518   20.53256609]
------
Step:28, Action:West
State  130
Old Q Values:  [-296.12198833  -47.53253566 -180.00807518   20.53256609]
New Q values:  [-296.12198833  -47.53253566 -180.00807518   13.43302643]
Reward: 9  Episode Reward:  32
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.37621480e+03 -9.91433515e-01 -6.00000000e-01]
------
Step:29, Action:West
State  114
Old Q Values:  [-1.80600000e+02 -8.37621480e+03 -9.91433515e-01 -6.00000000e-01]
New Q values:  [-1.80600000e+02 -8.37621480e+03 -9.91433515e-01  6.00890052e+04]
Reward: 100009  Episode Reward:  100041
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  273.15506099 -8656.02923281 -7525.7277781   1057.73990374]
------
Step:1, Action:West
State  288
Old Q Values:  [  273.15506099 -8656.02923281 -7525.7277781   1057.73990374]
New Q values:  [  273.15506099 -8656.02923281 -7525.7277781   1494.07043186]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   754.93340022  3551.9149012 ]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   754.93340022  3551.9149012 ]
New Q values:  [-2527.46239811 -6212.61234477   754.93340022  1439.04824804]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   42.94095854  -289.59534477 -2813.28584231  -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [   42.94095854  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [  755.71334993  -289.59534477 -2813.28584231  -251.53897752]
Reward: -1  Episode Reward:  17
xxxxx
x..gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  2.46378989e+03 -6.59427309e+03 -1.80600000e+02]
------
Step:4, Action:South
State  181
Old Q Values:  [ 6.16101416e+00  2.46378989e+03 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 6.16101416e+00  1.21162996e+03 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  755.71334993  -289.59534477 -2813.28584231  -251.53897752]
------
Step:5, Action:North
State  260
Old Q Values:  [  162.85686215 -6457.4598       431.11939696 -2702.17995449]
New Q values:  [  481.37542162 -6457.4598       431.11939696 -2702.17995449]
Reward: -1  Episode Reward:  15
xxxxx
xg..x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   -12.80681502  1389.44225588     0.        ]
------
Step:6, Action:East
State  180
Old Q Values:  [-1367.02476015   -12.80681502  1389.44225588     0.        ]
New Q values:  [-1367.02476015   -12.80681502  1980.11683489     0.        ]
Reward: 9  Episode Reward:  24
xxxxx
x...x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.72979978e+03 1.07778453e+03 0.00000000e+00]
------
Step:7, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -2.36054016e+03  1.86861951e+01  0.00000000e+00]
New Q values:  [  -0.6        -513.10159134   18.68619508    0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   754.93340022  1439.04824804]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   754.93340022  1439.04824804]
New Q values:  [-2527.46239811 -6212.61234477   749.59448964  1439.04824804]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  273.15506099 -8656.02923281 -7525.7277781   1494.07043186]
------
Step:9, Action:North
State  288
Old Q Values:  [  273.15506099 -8656.02923281 -7525.7277781   1494.07043186]
New Q values:  [  274.77055854 -8656.02923281 -7525.7277781   1494.07043186]
Reward: 9  Episode Reward:  31
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  16.55368241  533.69511382 -180.6           3.52184257]
------
Step:10, Action:South
State  210
Old Q Values:  [  16.55368241  533.69511382 -180.6           3.52184257]
New Q values:  [  16.55368241  661.09917509 -180.6           3.52184257]
Reward: -1  Episode Reward:  30
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  274.77055854 -8656.02923281 -7525.7277781   1494.07043186]
------
Step:11, Action:North
State  288
Old Q Values:  [  274.77055854 -8656.02923281 -7525.7277781   1494.07043186]
New Q values:  [  307.63797594 -8656.02923281 -7525.7277781   1494.07043186]
Reward: -1  Episode Reward:  29
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  16.55368241  661.09917509 -180.6           3.52184257]
------
Step:12, Action:North
State  208
Old Q Values:  [ -140.06358065  1087.45850372 -2651.70614553 -2227.14232413]
New Q values:  [  -46.59552433  1087.45850372 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  38
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-296.12198833  -47.53253566 -180.00807518   13.43302643]
------
Step:13, Action:West
State  128
Old Q Values:  [ 6067.82062533  1141.37341868 -8652.84       11155.58059144]
New Q values:  [ 6067.82062533  1141.37341868 -8652.84        4547.03370611]
Reward: 9  Episode Reward:  47
xxxxx
x.agx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:14, Action:South
State  112
Old Q Values:  [    0.           253.42377226 13703.34638233 60005.4       ]
New Q values:  [    0.          1519.70944144 13703.34638233 60005.4       ]
Reward: -1  Episode Reward:  46
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.72979978e+03 1.07778453e+03 0.00000000e+00]
------
Step:15, Action:South
State  192
Old Q Values:  [3.89777037e-01 4.72979978e+03 1.07778453e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 2.32303438e+03 1.07778453e+03 0.00000000e+00]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   749.59448964  1439.04824804]
------
Step:16, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   749.59448964  1439.04824804]
New Q values:  [-2527.46239811 -6212.61234477   749.59448964   701.74329296]
Reward: -1  Episode Reward:  44
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.           422.41331248 -2468.58126138]
------
Step:17, Action:East
State  257
Old Q Values:  [28777.41300042  -180.6          690.05634004  4453.74493219]
New Q values:  [28777.41300042  -180.6          500.30088291  4453.74493219]
Reward: -1  Episode Reward:  43
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   749.59448964   701.74329296]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   749.59448964   701.74329296]
New Q values:  [-2527.46239811 -6212.61234477   747.45892541   701.74329296]
Reward: -1  Episode Reward:  42
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  307.63797594 -8656.02923281 -7525.7277781   1494.07043186]
------
Step:19, Action:West
State  288
Old Q Values:  [  307.63797594 -8656.02923281 -7525.7277781   1494.07043186]
New Q values:  [  307.63797594 -8656.02923281 -7525.7277781    821.26585037]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   747.45892541   701.74329296]
------
Step:20, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   747.45892541   701.74329296]
New Q values:  [-2527.46239811 -6212.61234477   544.76332528   701.74329296]
Reward: -1  Episode Reward:  40
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  307.63797594 -8656.02923281 -7525.7277781    821.26585037]
------
Step:21, Action:West
State  288
Old Q Values:  [  307.63797594 -8656.02923281 -7525.7277781    821.26585037]
New Q values:  [  307.63797594 -8656.02923281 -7525.7277781    538.42932804]
Reward: -1  Episode Reward:  39
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   544.76332528   701.74329296]
------
Step:22, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   544.76332528   701.74329296]
New Q values:  [-2527.46239811 -6212.61234477   378.83412852   701.74329296]
Reward: -1  Episode Reward:  38
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  307.63797594 -8656.02923281 -7525.7277781    538.42932804]
------
Step:23, Action:West
State  288
Old Q Values:  [  307.63797594 -8656.02923281 -7525.7277781    538.42932804]
New Q values:  [  307.63797594 -8656.02923281 -7525.7277781    425.2947191 ]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   378.83412852   701.74329296]
------
Step:24, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   378.83412852   701.74329296]
New Q values:  [-2527.46239811 -6212.61234477   378.83412852   406.82131093]
Reward: -1  Episode Reward:  36
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.           422.41331248 -2468.58126138]
------
Step:25, Action:East
State  256
Old Q Values:  [    0.             0.           422.41331248 -2468.58126138]
New Q values:  [    0.             0.           290.41171827 -2468.58126138]
Reward: -1  Episode Reward:  35
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   378.83412852   406.82131093]
------
Step:26, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   378.83412852   406.82131093]
New Q values:  [-2527.46239811 -6212.61234477   378.83412852  8795.3524245 ]
Reward: -1  Episode Reward:  34
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[28777.41300042  -180.6          500.30088291  4453.74493219]
------
Step:27, Action:North
State  257
Old Q Values:  [28777.41300042  -180.6          500.30088291  4453.74493219]
New Q values:  [21409.75843589  -180.6          500.30088291  4453.74493219]
Reward: -1  Episode Reward:  33
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         32997.97745242 27180.21156874     0.        ]
------
Step:28, Action:South
State  177
Old Q Values:  [    0.         32997.97745242 27180.21156874     0.        ]
New Q values:  [    0.         19621.51851174 27180.21156874     0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[21409.75843589  -180.6          500.30088291  4453.74493219]
------
Step:29, Action:North
State  257
Old Q Values:  [21409.75843589  -180.6          500.30088291  4453.74493219]
New Q values:  [16717.36684498  -180.6          500.30088291  4453.74493219]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         19621.51851174 27180.21156874     0.        ]
------
Step:30, Action:South
State  177
Old Q Values:  [    0.         19621.51851174 27180.21156874     0.        ]
New Q values:  [    0.         12863.21745819 27180.21156874     0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[16717.36684498  -180.6          500.30088291  4453.74493219]
------
Step:31, Action:North
State  257
Old Q Values:  [16717.36684498  -180.6          500.30088291  4453.74493219]
New Q values:  [28724.11986631  -180.6          500.30088291  4453.74493219]
Reward: -1  Episode Reward:  29
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         63160.05960754 73459.24376105     0.        ]
------
Step:32, Action:East
State  177
Old Q Values:  [    0.         12863.21745819 27180.21156874     0.        ]
New Q values:  [    0.         12863.21745819 11978.41902457     0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3689.78132359 -3909.58186816     0.        ]
------
Step:33, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.32303438e+03 1.07778453e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 3.56721948e+03 1.07778453e+03 0.00000000e+00]
Reward: -1  Episode Reward:  27
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   378.83412852  8795.3524245 ]
------
Step:34, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   378.83412852  8795.3524245 ]
New Q values:  [-2527.46239811 -6212.61234477   378.83412852  3604.66448528]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[    0.             0.           290.41171827 -2468.58126138]
------
Step:35, Action:East
State  256
Old Q Values:  [    0.             0.           290.41171827 -2468.58126138]
New Q values:  [    0.             0.          1196.96403289 -2468.58126138]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   378.83412852  3604.66448528]
------
Step:36, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   378.83412852  3604.66448528]
New Q values:  [-2527.46239811 -6212.61234477   278.52206714  3604.66448528]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  307.63797594 -8656.02923281 -7525.7277781    425.2947191 ]
------
Step:37, Action:North
State  288
Old Q Values:  [  307.63797594 -8656.02923281 -7525.7277781    425.2947191 ]
New Q values:  [  320.7849429  -8656.02923281 -7525.7277781    425.2947191 ]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  16.55368241  661.09917509 -180.6           3.52184257]
------
Step:38, Action:North
State  210
Old Q Values:  [  16.55368241  661.09917509 -180.6           3.52184257]
New Q values:  [  10.05138089  661.09917509 -180.6           3.52184257]
Reward: -1  Episode Reward:  22
xxxxx
x. ax
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-296.12198833  -47.53253566 -180.00807518   13.43302643]
------
Step:39, Action:West
State  130
Old Q Values:  [-296.12198833  -47.53253566 -180.00807518   13.43302643]
New Q values:  [-296.12198833  -47.53253566 -180.00807518   22.71016975]
Reward: -1  Episode Reward:  21
xxxxx
x.a x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-180.6          59.78986392    6.47656656    0.        ]
------
Step:40, Action:South
State  114
Old Q Values:  [-1.80600000e+02 -8.37621480e+03 -9.91433515e-01  6.00890052e+04]
New Q values:  [-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  6.00890052e+04]
Reward: -10001  Episode Reward:  -9980
xxxxx
x.  x
x g x
x   x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  145.71705809   20.879358   -272.09726687]
------
Step:1, Action:South
State  110
Old Q Values:  [-180.6         -28.07654039    2.76341552 -180.6       ]
New Q values:  [-180.6          65.33165717    2.76341552 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   237.20757775     0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 9.91988058 12.83068575 29.11780607  0.        ]
New Q values:  [ 9.91988058 12.83068575 33.82622604  0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[  -0.27887208 -240.95887638   55.93034536    0.        ]
------
Step:3, Action:East
State  194
Old Q Values:  [  -0.6        -513.10159134   18.68619508    0.        ]
New Q values:  [  -0.6        -513.10159134  211.20423056    0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  10.05138089  661.09917509 -180.6           3.52184257]
------
Step:4, Action:South
State  208
Old Q Values:  [  -46.59552433  1087.45850372 -2651.70614553 -2227.14232413]
New Q values:  [  -46.59552433   567.97181722 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  320.7849429  -8656.02923281 -7525.7277781    425.2947191 ]
------
Step:5, Action:West
State  288
Old Q Values:  [  320.7849429  -8656.02923281 -7525.7277781    425.2947191 ]
New Q values:  [  320.7849429  -8656.02923281 -7525.7277781   1250.91723323]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   278.52206714  3604.66448528]
------
Step:6, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         137.82092789  130.15725745]
New Q values:  [   1.6181791  -180.6         137.82092789  284.17690796]
Reward: 9  Episode Reward:  44
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  755.71334993  -289.59534477 -2813.28584231  -251.53897752]
------
Step:7, Action:North
State  261
Old Q Values:  [  755.71334993  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [  665.17432808  -289.59534477 -2813.28584231  -251.53897752]
Reward: -1  Episode Reward:  43
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  1.21162996e+03 -6.59427309e+03 -1.80600000e+02]
------
Step:8, Action:South
State  183
Old Q Values:  [ 9.91988058 12.83068575 33.82622604  0.        ]
New Q values:  [  9.91988058 204.08457272  33.82622604   0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  665.17432808  -289.59534477 -2813.28584231  -251.53897752]
------
Step:9, Action:North
State  261
Old Q Values:  [  665.17432808  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [  326.69510305  -289.59534477 -2813.28584231  -251.53897752]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  9.91988058 204.08457272  33.82622604   0.        ]
------
Step:10, Action:South
State  181
Old Q Values:  [ 6.16101416e+00  1.21162996e+03 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 6.16101416e+00  5.82060515e+02 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  40
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  326.69510305  -289.59534477 -2813.28584231  -251.53897752]
------
Step:11, Action:North
State  261
Old Q Values:  [  326.69510305  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [  191.30341304  -289.59534477 -2813.28584231  -251.53897752]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  9.91988058 204.08457272  33.82622604   0.        ]
------
Step:12, Action:South
State  181
Old Q Values:  [ 6.16101416e+00  5.82060515e+02 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 6.16101416e+00  2.89615230e+02 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  191.30341304  -289.59534477 -2813.28584231  -251.53897752]
------
Step:13, Action:North
State  261
Old Q Values:  [  191.30341304  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [  162.80593419  -289.59534477 -2813.28584231  -251.53897752]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  2.89615230e+02 -6.59427309e+03 -1.80600000e+02]
------
Step:14, Action:South
State  180
Old Q Values:  [-1367.02476015   -12.80681502  1980.11683489     0.        ]
New Q values:  [-1367.02476015   138.68990048  1980.11683489     0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  481.37542162 -6457.4598       431.11939696 -2702.17995449]
------
Step:15, Action:East
State  260
Old Q Values:  [  481.37542162 -6457.4598       431.11939696 -2702.17995449]
New Q values:  [  481.37542162 -6457.4598      1253.24710437 -2702.17995449]
Reward: -1  Episode Reward:  35
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   278.52206714  3604.66448528]
------
Step:16, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   278.52206714  3604.66448528]
New Q values:  [-2527.46239811 -6212.61234477   278.52206714  1817.23992542]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  481.37542162 -6457.4598      1253.24710437 -2702.17995449]
------
Step:17, Action:East
State  260
Old Q Values:  [  481.37542162 -6457.4598      1253.24710437 -2702.17995449]
New Q values:  [  481.37542162 -6457.4598      1045.87081937 -2702.17995449]
Reward: -1  Episode Reward:  33
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   278.52206714  1817.23992542]
------
Step:18, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         137.82092789  284.17690796]
New Q values:  [   1.6181791  -180.6         137.82092789  161.91254344]
Reward: -1  Episode Reward:  32
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  162.80593419  -289.59534477 -2813.28584231  -251.53897752]
------
Step:19, Action:North
State  261
Old Q Values:  [  162.80593419  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [  151.40694266  -289.59534477 -2813.28584231  -251.53897752]
Reward: -1  Episode Reward:  31
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  2.89615230e+02 -6.59427309e+03 -1.80600000e+02]
------
Step:20, Action:South
State  181
Old Q Values:  [ 6.16101416e+00  2.89615230e+02 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 6.16101416e+00  1.60668175e+02 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  30
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  151.40694266  -289.59534477 -2813.28584231  -251.53897752]
------
Step:21, Action:North
State  261
Old Q Values:  [  151.40694266  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [  108.16322949  -289.59534477 -2813.28584231  -251.53897752]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  1.60668175e+02 -6.59427309e+03 -1.80600000e+02]
------
Step:22, Action:South
State  181
Old Q Values:  [ 6.16101416e+00  1.60668175e+02 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 6.16101416e+00  9.61162388e+01 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  108.16322949  -289.59534477 -2813.28584231  -251.53897752]
------
Step:23, Action:North
State  261
Old Q Values:  [  108.16322949  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [   71.50016342  -289.59534477 -2813.28584231  -251.53897752]
Reward: -1  Episode Reward:  27
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  9.61162388e+01 -6.59427309e+03 -1.80600000e+02]
------
Step:24, Action:South
State  181
Old Q Values:  [ 6.16101416e+00  9.61162388e+01 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 6.16101416e+00  5.92965445e+01 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   71.50016342  -289.59534477 -2813.28584231  -251.53897752]
------
Step:25, Action:North
State  261
Old Q Values:  [   71.50016342  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [   45.78902873  -289.59534477 -2813.28584231  -251.53897752]
Reward: -1  Episode Reward:  25
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  5.92965445e+01 -6.59427309e+03 -1.80600000e+02]
------
Step:26, Action:South
State  181
Old Q Values:  [ 6.16101416e+00  5.92965445e+01 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 6.16101416e+00  3.68553264e+01 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   45.78902873  -289.59534477 -2813.28584231  -251.53897752]
------
Step:27, Action:North
State  261
Old Q Values:  [   45.78902873  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [   28.77220942  -289.59534477 -2813.28584231  -251.53897752]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  3.68553264e+01 -6.59427309e+03 -1.80600000e+02]
------
Step:28, Action:South
State  181
Old Q Values:  [ 6.16101416e+00  3.68553264e+01 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 6.16101416e+00  2.27737934e+01 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   28.77220942  -289.59534477 -2813.28584231  -251.53897752]
------
Step:29, Action:North
State  261
Old Q Values:  [   28.77220942  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [   17.74102179  -289.59534477 -2813.28584231  -251.53897752]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  2.27737934e+01 -6.59427309e+03 -1.80600000e+02]
------
Step:30, Action:South
State  181
Old Q Values:  [ 6.16101416e+00  2.27737934e+01 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 6.16101416e+00  1.38318239e+01 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  20
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   17.74102179  -289.59534477 -2813.28584231  -251.53897752]
------
Step:31, Action:North
State  260
Old Q Values:  [  481.37542162 -6457.4598      1045.87081937 -2702.17995449]
New Q values:  [  785.98521912 -6457.4598      1045.87081937 -2702.17995449]
Reward: -1  Episode Reward:  19
xxxxx
xg..x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   138.68990048  1980.11683489     0.        ]
------
Step:32, Action:East
State  180
Old Q Values:  [-1367.02476015   138.68990048  1980.11683489     0.        ]
New Q values:  [-1367.02476015   138.68990048  1861.61257829     0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.56721948e+03 1.07778453e+03 0.00000000e+00]
------
Step:33, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.56721948e+03 1.07778453e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 1.97145977e+03 1.07778453e+03 0.00000000e+00]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   278.52206714  1817.23992542]
------
Step:34, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   278.52206714  1817.23992542]
New Q values:  [-2527.46239811 -6212.61234477   278.52206714  1040.05721598]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  785.98521912 -6457.4598      1045.87081937 -2702.17995449]
------
Step:35, Action:East
State  261
Old Q Values:  [   17.74102179  -289.59534477 -2813.28584231  -251.53897752]
New Q values:  [  17.74102179 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   278.52206714  1040.05721598]
------
Step:36, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 1510.09681345  832.70217282]
New Q values:  [  37.74111519 -168.92307549 1510.09681345  337.80317566]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  17.74102179 -289.59534477 -813.89717213 -251.53897752]
------
Step:37, Action:North
State  261
Old Q Values:  [  17.74102179 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  10.64595588 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  1.38318239e+01 -6.59427309e+03 -1.80600000e+02]
------
Step:38, Action:South
State  181
Old Q Values:  [ 6.16101416e+00  1.38318239e+01 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 6.16101416e+00  8.12651632e+00 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  10.64595588 -289.59534477 -813.89717213 -251.53897752]
------
Step:39, Action:North
State  261
Old Q Values:  [  10.64595588 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [   6.09633725 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  11
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  8.12651632e+00 -6.59427309e+03 -1.80600000e+02]
------
Step:40, Action:South
State  181
Old Q Values:  [ 6.16101416e+00  8.12651632e+00 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 6.16101416e+00  4.47950770e+00 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  10
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   6.09633725 -289.59534477 -813.89717213 -251.53897752]
------
Step:41, Action:North
State  261
Old Q Values:  [   6.09633725 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [   3.68683915 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  9
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  4.47950770e+00 -6.59427309e+03 -1.80600000e+02]
------
Step:42, Action:North
State  183
Old Q Values:  [  9.91988058 204.08457272  33.82622604   0.        ]
New Q values:  [  5.02499993 204.08457272  33.82622604   0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xa..x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6           5.52349233    0.            0.        ]
------
Step:43, Action:South
State  103
Old Q Values:  [-180.6           5.52349233    0.            0.        ]
New Q values:  [-180.6          62.83476875    0.            0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  5.02499993 204.08457272  33.82622604   0.        ]
------
Step:44, Action:South
State  181
Old Q Values:  [ 6.16101416e+00  4.47950770e+00 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 6.16101416e+00  2.29785483e+00 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   3.68683915 -289.59534477 -813.89717213 -251.53897752]
------
Step:45, Action:North
State  261
Old Q Values:  [   3.68683915 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  62.10010748 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  5.02499993 204.08457272  33.82622604   0.        ]
------
Step:46, Action:South
State  183
Old Q Values:  [  5.02499993 204.08457272  33.82622604   0.        ]
New Q values:  [ 5.02499993 99.66386133 33.82622604  0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  62.10010748 -289.59534477 -813.89717213 -251.53897752]
------
Step:47, Action:North
State  261
Old Q Values:  [  62.10010748 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  54.13920139 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  3
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 5.02499993 99.66386133 33.82622604  0.        ]
------
Step:48, Action:South
State  183
Old Q Values:  [ 5.02499993 99.66386133 33.82622604  0.        ]
New Q values:  [ 5.02499993 55.50730495 33.82622604  0.        ]
Reward: -1  Episode Reward:  2
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  54.13920139 -289.59534477 -813.89717213 -251.53897752]
------
Step:49, Action:North
State  260
Old Q Values:  [  785.98521912 -6457.4598      1045.87081937 -2702.17995449]
New Q values:  [  384.95636097 -6457.4598      1045.87081937 -2702.17995449]
Reward: -1  Episode Reward:  1
xxxxx
x ..x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   237.20757775     0.        ]
------
Step:50, Action:East
State  180
Old Q Values:  [-1367.02476015   138.68990048  1861.61257829     0.        ]
New Q values:  [-1367.02476015   138.68990048  1335.48296234     0.        ]
Reward: -1  Episode Reward:  0
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.97145977e+03 1.07778453e+03 0.00000000e+00]
------
Step:51, Action:South
State  194
Old Q Values:  [  -0.6        -513.10159134  211.20423056    0.        ]
New Q values:  [ -0.6        106.17652826 211.20423056   0.        ]
Reward: -1  Episode Reward:  -1
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   278.52206714  1040.05721598]
------
Step:52, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   278.52206714  1040.05721598]
New Q values:  [-2527.46239811 -6212.61234477   486.08399682  1040.05721598]
Reward: -1  Episode Reward:  -2
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  320.7849429  -8656.02923281 -7525.7277781   1250.91723323]
------
Step:53, Action:West
State  288
Old Q Values:  [  320.7849429  -8656.02923281 -7525.7277781   1250.91723323]
New Q values:  [  320.7849429  -8656.02923281 -7525.7277781    811.78405808]
Reward: -1  Episode Reward:  -3
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   486.08399682  1040.05721598]
------
Step:54, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   486.08399682  1040.05721598]
New Q values:  [-2527.46239811 -6212.61234477   437.36881615  1040.05721598]
Reward: -1  Episode Reward:  -4
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  320.7849429  -8656.02923281 -7525.7277781    811.78405808]
------
Step:55, Action:North
State  288
Old Q Values:  [  320.7849429  -8656.02923281 -7525.7277781    811.78405808]
New Q values:  [  326.04372969 -8656.02923281 -7525.7277781    811.78405808]
Reward: -1  Episode Reward:  -5
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  10.05138089  661.09917509 -180.6           3.52184257]
------
Step:56, Action:North
State  210
Old Q Values:  [  10.05138089  661.09917509 -180.6           3.52184257]
New Q values:  [  16.23360328  661.09917509 -180.6           3.52184257]
Reward: 9  Episode Reward:  4
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-296.12198833  -47.53253566 -180.00807518   22.71016975]
------
Step:57, Action:West
State  130
Old Q Values:  [-296.12198833  -47.53253566 -180.00807518   22.71016975]
New Q values:  [-2.96121988e+02 -4.75325357e+01 -1.80008075e+02  7.80411856e+04]
Reward: 100009  Episode Reward:  100013
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:1, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  3.10217953e-01 -1.62221755e+02]
New Q values:  [-1.01561177e+04 -5.99568600e+03  3.10217953e-01 -4.96952489e+01]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    32.64484337 -6443.21937065  -180.6       ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  145.71705809   20.879358   -272.09726687]
New Q values:  [-177.44732869 1581.19323029   20.879358   -272.09726687]
Reward: 9  Episode Reward:  18
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 1.45131684e+01  5.05835469e+03  2.84195242e+00 -2.44980669e+02]
------
Step:3, Action:South
State  183
Old Q Values:  [ 5.02499993 55.50730495 33.82622604  0.        ]
New Q values:  [ 5.02499993 43.8446824  33.82622604  0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  54.13920139 -289.59534477 -813.89717213 -251.53897752]
------
Step:4, Action:North
State  261
Old Q Values:  [  54.13920139 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [1538.56208761 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 1.45131684e+01  5.05835469e+03  2.84195242e+00 -2.44980669e+02]
------
Step:5, Action:South
State  183
Old Q Values:  [ 5.02499993 43.8446824  33.82622604  0.        ]
New Q values:  [  5.02499993 478.50649924  33.82622604   0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1538.56208761 -289.59534477 -813.89717213 -251.53897752]
------
Step:6, Action:North
State  261
Old Q Values:  [1538.56208761 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [2132.33124209 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  24
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 1.45131684e+01  5.05835469e+03  2.84195242e+00 -2.44980669e+02]
------
Step:7, Action:South
State  189
Old Q Values:  [ 1.45131684e+01  5.05835469e+03  2.84195242e+00 -2.44980669e+02]
New Q values:  [  14.51316842 2662.4412487     2.84195242 -244.98066897]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2132.33124209 -289.59534477 -813.89717213 -251.53897752]
------
Step:8, Action:North
State  261
Old Q Values:  [2132.33124209 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 854.18080108 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  22
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 6.16101416e+00  2.29785483e+00 -6.59427309e+03 -1.80600000e+02]
------
Step:9, Action:North
State  181
Old Q Values:  [ 6.16101416e+00  2.29785483e+00 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 1.16578587e+01  2.29785483e+00 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  21
xxxxx
xag x
x ..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    32.64484337 -6443.21937065  -180.6       ]
------
Step:10, Action:South
State  108
Old Q Values:  [-6180.6           90.32399703    16.09371094     0.        ]
New Q values:  [-6180.6          436.17448752    16.09371094     0.        ]
Reward: -1  Episode Reward:  20
xxxxx
xg  x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   138.68990048  1335.48296234     0.        ]
------
Step:11, Action:East
State  189
Old Q Values:  [  14.51316842 2662.4412487     2.84195242 -244.98066897]
New Q values:  [  14.51316842 2662.4412487    28.65006714 -244.98066897]
Reward: 9  Episode Reward:  29
xxxxx
x g x
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144    70.50198937    73.71095389     0.        ]
------
Step:12, Action:East
State  200
Old Q Values:  [ 6.79482295e-01  2.29467556e+03  1.04098565e+01 -8.40000000e-01]
New Q values:  [ 6.79482295e-01  2.29467556e+03  1.16616379e+01 -8.40000000e-01]
Reward: 9  Episode Reward:  38
xxxxx
xg  x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[    6.99231764   -51.76838521 -6170.35693855  -340.15723533]
------
Step:13, Action:North
State  216
Old Q Values:  [    6.99231764   -51.76838521 -6170.35693855  -340.15723533]
New Q values:  [ 2.76324556e+00 -5.17683852e+01 -6.17035694e+03 -3.40157235e+02]
Reward: -1  Episode Reward:  37
xxxxx
x gax
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  1.88772833e+00 -6.24561866e+03 -2.87645685e+01]
------
Step:14, Action:South
State  136
Old Q Values:  [-6.18060000e+03  1.88772833e+00 -6.24561866e+03 -2.87645685e+01]
New Q values:  [-6.18060000e+03  9.84065000e-01 -6.24561866e+03 -2.87645685e+01]
Reward: -1  Episode Reward:  36
xxxxx
xg  x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.76324556e+00 -5.17683852e+01 -6.17035694e+03 -3.40157235e+02]
------
Step:15, Action:North
State  216
Old Q Values:  [ 2.76324556e+00 -5.17683852e+01 -6.17035694e+03 -3.40157235e+02]
New Q values:  [ 8.00517723e-01 -5.17683852e+01 -6.17035694e+03 -3.40157235e+02]
Reward: -1  Episode Reward:  35
xxxxx
xg ax
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  9.84065000e-01 -6.24561866e+03 -2.87645685e+01]
------
Step:16, Action:South
State  138
Old Q Values:  [-180.6           8.91991224 -180.6          14.58321934]
New Q values:  [-180.6           3.20812021 -180.6          14.58321934]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 8.00517723e-01 -5.17683852e+01 -6.17035694e+03 -3.40157235e+02]
------
Step:17, Action:North
State  216
Old Q Values:  [ 8.00517723e-01 -5.17683852e+01 -6.17035694e+03 -3.40157235e+02]
New Q values:  [ 4.09517289e+00 -5.17683852e+01 -6.17035694e+03 -3.40157235e+02]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           3.20812021 -180.6          14.58321934]
------
Step:18, Action:West
State  138
Old Q Values:  [-180.6           3.20812021 -180.6          14.58321934]
New Q values:  [-180.6           3.20812021 -180.6          25.01940254]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[ 0.         65.95371601  5.9800666   8.15865135]
------
Step:19, Action:South
State  122
Old Q Values:  [-2.81736000e+02 -8.91418985e+03  4.77456255e+00  1.24846885e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.24846885e+01]
Reward: -10001  Episode Reward:  -9969
xxxxx
x   x
x g x
x ..x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:1, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  3.10217953e-01 -4.96952489e+01]
New Q values:  [-1.01561177e+04 -5.99568600e+03  3.10217953e-01 -4.68464655e+00]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    32.64484337 -6443.21937065  -180.6       ]
------
Step:2, Action:South
State  108
Old Q Values:  [-6180.6          436.17448752    16.09371094     0.        ]
New Q values:  [-6180.6          580.51468371    16.09371094     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xg  x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   138.68990048  1335.48296234     0.        ]
------
Step:3, Action:East
State  188
Old Q Values:  [-6523.78898263    32.78037903   712.00312788     0.        ]
New Q values:  [-6523.78898263    32.78037903   978.60391939     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 6.79482295e-01  2.29467556e+03  1.16616379e+01 -8.40000000e-01]
------
Step:4, Action:South
State  196
Old Q Values:  [-2469.90645144    70.50198937    73.71095389     0.        ]
New Q values:  [-2469.90645144    82.17455878    73.71095389     0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  161.91254344]
------
Step:5, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         137.82092789  161.91254344]
New Q values:  [   1.6181791  -180.6         137.82092789  326.4192577 ]
Reward: 9  Episode Reward:  45
xxxxx
x g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 854.18080108 -289.59534477 -813.89717213 -251.53897752]
------
Step:6, Action:North
State  261
Old Q Values:  [ 854.18080108 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 344.56967804 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  44
xxxxx
x  gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1.16578587e+01  2.29785483e+00 -6.59427309e+03 -1.80600000e+02]
------
Step:7, Action:North
State  181
Old Q Values:  [ 1.16578587e+01  2.29785483e+00 -6.59427309e+03 -1.80600000e+02]
New Q values:  [ 1.38565965e+01  2.29785483e+00 -6.59427309e+03 -1.80600000e+02]
Reward: -1  Episode Reward:  43
xxxxx
xag x
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    32.64484337 -6443.21937065  -180.6       ]
------
Step:8, Action:South
State  108
Old Q Values:  [-6180.6          580.51468371    16.09371094     0.        ]
New Q values:  [-6180.6          632.25076219    16.09371094     0.        ]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   138.68990048  1335.48296234     0.        ]
------
Step:9, Action:East
State  180
Old Q Values:  [-1367.02476015   138.68990048  1335.48296234     0.        ]
New Q values:  [-1367.02476015   138.68990048   558.24555257     0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144    82.17455878    73.71095389     0.        ]
------
Step:10, Action:South
State  198
Old Q Values:  [  -0.27887208 -240.95887638   55.93034536    0.        ]
New Q values:  [-0.27887208  0.94222676 55.93034536  0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         137.82092789  326.4192577 ]
------
Step:11, Action:East
State  276
Old Q Values:  [   1.6181791  -180.6         137.82092789  326.4192577 ]
New Q values:  [   1.6181791  -180.6         304.06358858  326.4192577 ]
Reward: 9  Episode Reward:  49
xxxxx
x   x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  326.04372969 -8656.02923281 -7525.7277781    811.78405808]
------
Step:12, Action:North
State  288
Old Q Values:  [  326.04372969 -8656.02923281 -7525.7277781    811.78405808]
New Q values:  [60306.20903704 -8656.02923281 -7525.7277781    811.78405808]
Reward: 100009  Episode Reward:  100058
xxxxx
x   x
x gax
x   x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3689.78132359 -3909.58186816     0.        ]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.97145977e+03 1.07778453e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 1.10600107e+03 1.07778453e+03 0.00000000e+00]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   437.36881615  1040.05721598]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   437.36881615  1040.05721598]
New Q values:  [-2527.46239811 -6212.61234477   437.36881615  9038.65884628]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[28724.11986631  -180.6          500.30088291  4453.74493219]
------
Step:3, Action:North
State  260
Old Q Values:  [  384.95636097 -6457.4598      1045.87081937 -2702.17995449]
New Q values:  [  326.85621016 -6457.4598      1045.87081937 -2702.17995449]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   138.68990048   558.24555257     0.        ]
------
Step:4, Action:East
State  177
Old Q Values:  [    0.         12863.21745819 11978.41902457     0.        ]
New Q values:  [    0.         12863.21745819  5122.56793168     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.10600107e+03 1.07778453e+03 0.00000000e+00]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.10600107e+03 1.07778453e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 5.39726206e+02 1.07778453e+03 0.00000000e+00]
Reward: -1  Episode Reward:  25
xxxxx
xg..x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         304.06358858  326.4192577 ]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   437.36881615  9038.65884628]
New Q values:  [-2527.46239811 -6212.61234477   437.36881615  3928.62478433]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  326.85621016 -6457.4598      1045.87081937 -2702.17995449]
------
Step:7, Action:East
State  260
Old Q Values:  [  326.85621016 -6457.4598      1045.87081937 -2702.17995449]
New Q values:  [  326.85621016 -6457.4598       515.67410506 -2702.17995449]
Reward: -1  Episode Reward:  23
xxxxx
xg..x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         304.06358858  326.4192577 ]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   437.36881615  3928.62478433]
New Q values:  [-2527.46239811 -6212.61234477   437.36881615  1725.55214525]
Reward: -1  Episode Reward:  22
xxxxx
x...x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  326.85621016 -6457.4598       515.67410506 -2702.17995449]
------
Step:9, Action:East
State  260
Old Q Values:  [  326.85621016 -6457.4598       515.67410506 -2702.17995449]
New Q values:  [  326.85621016 -6457.4598       303.59541934 -2702.17995449]
Reward: -1  Episode Reward:  21
xxxxx
xg..x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         304.06358858  326.4192577 ]
------
Step:10, Action:West
State  276
Old Q Values:  [   1.6181791  -180.6         304.06358858  326.4192577 ]
New Q values:  [   1.6181791  -180.6         304.06358858  233.33860649]
Reward: -1  Episode Reward:  20
xxxxx
x.g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 344.56967804 -289.59534477 -813.89717213 -251.53897752]
------
Step:11, Action:North
State  261
Old Q Values:  [ 344.56967804 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [3996.19310867 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  19
xxxxx
x..gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         12863.21745819  5122.56793168     0.        ]
------
Step:12, Action:South
State  181
Old Q Values:  [ 1.38565965e+01  2.29785483e+00 -6.59427309e+03 -1.80600000e+02]
New Q values:  [   13.85659648  1199.17707453 -6594.2730884   -180.6       ]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3996.19310867 -289.59534477 -813.89717213 -251.53897752]
------
Step:13, Action:North
State  261
Old Q Values:  [3996.19310867 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [5456.84248092 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  17
xxxxx
x..gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         12863.21745819  5122.56793168     0.        ]
------
Step:14, Action:South
State  177
Old Q Values:  [    0.         12863.21745819  5122.56793168     0.        ]
New Q values:  [   0.         6781.73972755 5122.56793168    0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5456.84248092 -289.59534477 -813.89717213 -251.53897752]
------
Step:15, Action:North
State  261
Old Q Values:  [5456.84248092 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [4216.65891064 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  15
xxxxx
x..gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         6781.73972755 5122.56793168    0.        ]
------
Step:16, Action:South
State  177
Old Q Values:  [   0.         6781.73972755 5122.56793168    0.        ]
New Q values:  [   0.         3977.09356421 5122.56793168    0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x.g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4216.65891064 -289.59534477 -813.89717213 -251.53897752]
------
Step:17, Action:North
State  261
Old Q Values:  [4216.65891064 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [2045.81668661 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  13
xxxxx
x...x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648  1199.17707453 -6594.2730884   -180.6       ]
------
Step:18, Action:South
State  177
Old Q Values:  [   0.         3977.09356421 5122.56793168    0.        ]
New Q values:  [   0.         2203.98243167 5122.56793168    0.        ]
Reward: -1  Episode Reward:  12
xxxxx
x.g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2045.81668661 -289.59534477 -813.89717213 -251.53897752]
------
Step:19, Action:North
State  261
Old Q Values:  [2045.81668661 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [2354.49705415 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  11
xxxxx
x..gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         2203.98243167 5122.56793168    0.        ]
------
Step:20, Action:East
State  177
Old Q Values:  [   0.         2203.98243167 5122.56793168    0.        ]
New Q values:  [   0.         2203.98243167 3155.36156975    0.        ]
Reward: -1  Episode Reward:  10
xxxxx
x..gx
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3689.78132359 -3909.58186816     0.        ]
------
Step:21, Action:South
State  193
Old Q Values:  [-5922.26708831  3689.78132359 -3909.58186816     0.        ]
New Q values:  [-5922.26708831  1928.34157347 -3909.58186816     0.        ]
Reward: -1  Episode Reward:  9
xxxxx
x...x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1510.09681345  337.80317566]
------
Step:22, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 1510.09681345  337.80317566]
New Q values:  [   37.74111519  -168.92307549 18701.30143649   337.80317566]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[60306.20903704 -8656.02923281 -7525.7277781    811.78405808]
------
Step:23, Action:North
State  288
Old Q Values:  [60306.20903704 -8656.02923281 -7525.7277781    811.78405808]
New Q values:  [24292.27515998 -8656.02923281 -7525.7277781    811.78405808]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -46.59552433   567.97181722 -2651.70614553 -2227.14232413]
------
Step:24, Action:South
State  208
Old Q Values:  [  -46.59552433   567.97181722 -2651.70614553 -2227.14232413]
New Q values:  [  -46.59552433  7514.27127488 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[24292.27515998 -8656.02923281 -7525.7277781    811.78405808]
------
Step:25, Action:North
State  288
Old Q Values:  [24292.27515998 -8656.02923281 -7525.7277781    811.78405808]
New Q values:  [ 5970.59144646 -8656.02923281 -7525.7277781    811.78405808]
Reward: -10001  Episode Reward:  -9985
xxxxx
x...x
x  gx
x   x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6          65.33165717    2.76341552 -180.6       ]
------
Step:1, Action:East
State  110
Old Q Values:  [-180.6          65.33165717    2.76341552 -180.6       ]
New Q values:  [-180.6          65.33165717   10.25077275 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.24846885e+01]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.24846885e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  4.78751844e+02]
Reward: -1  Episode Reward:  8
xxxxx
xa .x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1581.19323029   20.879358   -272.09726687]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 1581.19323029   20.879358   -272.09726687]
New Q values:  [-177.44732869  775.42924189   20.879358   -272.09726687]
Reward: -1  Episode Reward:  7
xxxxx
x  .x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  5.02499993 478.50649924  33.82622604   0.        ]
------
Step:4, Action:South
State  181
Old Q Values:  [   13.85659648  1199.17707453 -6594.2730884   -180.6       ]
New Q values:  [   13.85659648  1191.41994606 -6594.2730884   -180.6       ]
Reward: 9  Episode Reward:  16
xxxxx
x  .x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2354.49705415 -289.59534477 -813.89717213 -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [2354.49705415 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [1298.62480548 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  15
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648  1191.41994606 -6594.2730884   -180.6       ]
------
Step:6, Action:South
State  181
Old Q Values:  [   13.85659648  1191.41994606 -6594.2730884   -180.6       ]
New Q values:  [   13.85659648   865.55542007 -6594.2730884   -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x  .x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1298.62480548 -289.59534477 -813.89717213 -251.53897752]
------
Step:7, Action:North
State  261
Old Q Values:  [1298.62480548 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 778.51654821 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  13
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   865.55542007 -6594.2730884   -180.6       ]
------
Step:8, Action:South
State  181
Old Q Values:  [   13.85659648   865.55542007 -6594.2730884   -180.6       ]
New Q values:  [   13.85659648   579.17713249 -6594.2730884   -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
x  .x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 778.51654821 -289.59534477 -813.89717213 -251.53897752]
------
Step:9, Action:North
State  261
Old Q Values:  [ 778.51654821 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 484.55975903 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  11
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   579.17713249 -6594.2730884   -180.6       ]
------
Step:10, Action:South
State  181
Old Q Values:  [   13.85659648   579.17713249 -6594.2730884   -180.6       ]
New Q values:  [   13.85659648   376.43878071 -6594.2730884   -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x  .x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 484.55975903 -289.59534477 -813.89717213 -251.53897752]
------
Step:11, Action:North
State  261
Old Q Values:  [ 484.55975903 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 306.15553782 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  9
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   376.43878071 -6594.2730884   -180.6       ]
------
Step:12, Action:South
State  181
Old Q Values:  [   13.85659648   376.43878071 -6594.2730884   -180.6       ]
New Q values:  [   13.85659648   241.82217363 -6594.2730884   -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x  .x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 306.15553782 -289.59534477 -813.89717213 -251.53897752]
------
Step:13, Action:North
State  261
Old Q Values:  [ 306.15553782 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 194.40886722 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  7
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   241.82217363 -6594.2730884   -180.6       ]
------
Step:14, Action:South
State  181
Old Q Values:  [   13.85659648   241.82217363 -6594.2730884   -180.6       ]
New Q values:  [   13.85659648   154.45152962 -6594.2730884   -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
x g.x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 194.40886722 -289.59534477 -813.89717213 -251.53897752]
------
Step:15, Action:North
State  261
Old Q Values:  [ 194.40886722 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 123.49900577 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  5
xxxxx
x  .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   154.45152962 -6594.2730884   -180.6       ]
------
Step:16, Action:South
State  183
Old Q Values:  [  5.02499993 478.50649924  33.82622604   0.        ]
New Q values:  [  5.02499993 227.85230143  33.82622604   0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x  .x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 123.49900577 -289.59534477 -813.89717213 -251.53897752]
------
Step:17, Action:North
State  261
Old Q Values:  [ 123.49900577 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  95.13506119 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  3
xxxxx
x  .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   154.45152962 -6594.2730884   -180.6       ]
------
Step:18, Action:South
State  181
Old Q Values:  [   13.85659648   154.45152962 -6594.2730884   -180.6       ]
New Q values:  [   13.85659648    89.72113021 -6594.2730884   -180.6       ]
Reward: -1  Episode Reward:  2
xxxxx
x g.x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  95.13506119 -289.59534477 -813.89717213 -251.53897752]
------
Step:19, Action:North
State  260
Old Q Values:  [  326.85621016 -6457.4598       303.59541934 -2702.17995449]
New Q values:  [  297.61614983 -6457.4598       303.59541934 -2702.17995449]
Reward: -1  Episode Reward:  1
xxxxx
xg .x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   138.68990048   558.24555257     0.        ]
------
Step:20, Action:East
State  180
Old Q Values:  [-1367.02476015   138.68990048   558.24555257     0.        ]
New Q values:  [-1367.02476015   138.68990048   253.35058866     0.        ]
Reward: 9  Episode Reward:  10
xxxxx
x  .x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144    82.17455878    73.71095389     0.        ]
------
Step:21, Action:South
State  196
Old Q Values:  [-2469.90645144    82.17455878    73.71095389     0.        ]
New Q values:  [-2469.90645144   129.48890009    73.71095389     0.        ]
Reward: 9  Episode Reward:  19
xxxxx
xg .x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   1.6181791  -180.6         304.06358858  233.33860649]
------
Step:22, Action:East
State  276
Old Q Values:  [   1.6181791  -180.6         304.06358858  233.33860649]
New Q values:  [ 1.61817910e+00 -1.80600000e+02  1.91820287e+03  2.33338606e+02]
Reward: 9  Episode Reward:  28
xxxxx
x g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5970.59144646 -8656.02923281 -7525.7277781    811.78405808]
------
Step:23, Action:North
State  288
Old Q Values:  [ 5970.59144646 -8656.02923281 -7525.7277781    811.78405808]
New Q values:  [ 4647.91796105 -8656.02923281 -7525.7277781    811.78405808]
Reward: 9  Episode Reward:  37
xxxxx
x  gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -46.59552433  7514.27127488 -2651.70614553 -2227.14232413]
------
Step:24, Action:South
State  208
Old Q Values:  [  -46.59552433  7514.27127488 -2651.70614553 -2227.14232413]
New Q values:  [  -46.59552433  4399.48389827 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  36
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4647.91796105 -8656.02923281 -7525.7277781    811.78405808]
------
Step:25, Action:North
State  288
Old Q Values:  [ 4647.91796105 -8656.02923281 -7525.7277781    811.78405808]
New Q values:  [ 3178.4123539  -8656.02923281 -7525.7277781    811.78405808]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -46.59552433  4399.48389827 -2651.70614553 -2227.14232413]
------
Step:26, Action:South
State  208
Old Q Values:  [  -46.59552433  4399.48389827 -2651.70614553 -2227.14232413]
New Q values:  [  -46.59552433  2712.71726548 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3178.4123539  -8656.02923281 -7525.7277781    811.78405808]
------
Step:27, Action:West
State  288
Old Q Values:  [ 3178.4123539  -8656.02923281 -7525.7277781    811.78405808]
New Q values:  [ 3178.4123539  -8656.02923281 -7525.7277781    899.57448404]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1.61817910e+00 -1.80600000e+02  1.91820287e+03  2.33338606e+02]
------
Step:28, Action:East
State  276
Old Q Values:  [ 1.61817910e+00 -1.80600000e+02  1.91820287e+03  2.33338606e+02]
New Q values:  [ 1.61817910e+00 -1.80600000e+02  1.72020485e+03  2.33338606e+02]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3178.4123539  -8656.02923281 -7525.7277781    899.57448404]
------
Step:29, Action:North
State  288
Old Q Values:  [ 3178.4123539  -8656.02923281 -7525.7277781    899.57448404]
New Q values:  [ 2084.5801212  -8656.02923281 -7525.7277781    899.57448404]
Reward: -1  Episode Reward:  31
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  -46.59552433  2712.71726548 -2651.70614553 -2227.14232413]
------
Step:30, Action:South
State  208
Old Q Values:  [  -46.59552433  2712.71726548 -2651.70614553 -2227.14232413]
New Q values:  [  -46.59552433  1709.86094255 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  30
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2084.5801212  -8656.02923281 -7525.7277781    899.57448404]
------
Step:31, Action:West
State  288
Old Q Values:  [ 2084.5801212  -8656.02923281 -7525.7277781    899.57448404]
New Q values:  [ 2084.5801212  -8656.02923281 -7525.7277781    475.77835328]
Reward: -1  Episode Reward:  29
xxxxx
x  .x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         388.49519889   0.        ]
------
Step:32, Action:North
State  276
Old Q Values:  [ 1.61817910e+00 -1.80600000e+02  1.72020485e+03  2.33338606e+02]
New Q values:  [  16.82637525 -180.6        1720.20485392  233.33860649]
Reward: -1  Episode Reward:  28
xxxxx
x  .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-0.27887208  0.94222676 55.93034536  0.        ]
------
Step:33, Action:East
State  199
Old Q Values:  [  22.48535485 1478.00809166  880.423904      0.        ]
New Q values:  [  22.48535485 1478.00809166  549.89931413    0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x  .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  16.23360328  661.09917509 -180.6           3.52184257]
------
Step:34, Action:North
State  208
Old Q Values:  [  -46.59552433  1709.86094255 -2651.70614553 -2227.14232413]
New Q values:  [83399.11747888  1709.86094255 -2651.70614553 -2227.14232413]
Reward: 100009  Episode Reward:  100036
xxxxx
x  ax
x  gx
x   x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.39726206e+02 1.07778453e+03 0.00000000e+00]
------
Step:1, Action:East
State  200
Old Q Values:  [ 6.79482295e-01  2.29467556e+03  1.16616379e+01 -8.40000000e-01]
New Q values:  [ 6.79482295e-01  2.29467556e+03  1.12932070e+01 -8.40000000e-01]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4.09517289e+00 -5.17683852e+01 -6.17035694e+03 -3.40157235e+02]
------
Step:2, Action:North
State  216
Old Q Values:  [ 4.09517289e+00 -5.17683852e+01 -6.17035694e+03 -3.40157235e+02]
New Q values:  [    7.33328866   -51.76838521 -6170.35693855  -340.15723533]
Reward: 9  Episode Reward:  18
xxxxx
xg.ax
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  9.84065000e-01 -6.24561866e+03 -2.87645685e+01]
------
Step:3, Action:South
State  138
Old Q Values:  [-180.6           3.20812021 -180.6          25.01940254]
New Q values:  [-180.6           2.88323468 -180.6          25.01940254]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[    7.33328866   -51.76838521 -6170.35693855  -340.15723533]
------
Step:4, Action:North
State  210
Old Q Values:  [  16.23360328  661.09917509 -180.6           3.52184257]
New Q values:  [  13.39926208  661.09917509 -180.6           3.52184257]
Reward: -1  Episode Reward:  16
xxxxx
x .ax
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6          25.01940254]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          25.01940254]
New Q values:  [-180.6           2.88323468 -180.6         159.03331436]
Reward: 9  Episode Reward:  25
xxxxx
x a x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  4.78751844e+02]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  4.78751844e+02]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  2.10500235e+02]
Reward: -1  Episode Reward:  24
xxxxx
xa  x
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6          65.33165717   10.25077275 -180.6       ]
------
Step:7, Action:South
State  107
Old Q Values:  [-252.35169558   24.73386843    2.45188789 -252.78192178]
New Q values:  [-252.35169558   20.33605192    2.45188789 -252.78192178]
Reward: 9  Episode Reward:  33
xxxxx
x   x
xa  x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[-0.11058345  0.         16.80834849  0.        ]
------
Step:8, Action:East
State  187
Old Q Values:  [-0.11058345  0.         16.80834849  0.        ]
New Q values:  [-0.11058345  0.          9.80359313  0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[ 3.60604218  0.         12.26751246  0.        ]
------
Step:9, Action:East
State  202
Old Q Values:  [    0.         -5884.35407458    76.40172743     0.        ]
New Q values:  [    0.         -5884.35407458    39.86252493     0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   4.59845327 -610.30346672    0.           33.00611321]
------
Step:10, Action:West
State  218
Old Q Values:  [   4.59845327 -610.30346672    0.           33.00611321]
New Q values:  [   4.59845327 -610.30346672    0.           24.56120276]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    39.86252493     0.        ]
------
Step:11, Action:East
State  202
Old Q Values:  [    0.         -5884.35407458    39.86252493     0.        ]
New Q values:  [    0.         -5884.35407458    22.7133708      0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   4.59845327 -610.30346672    0.           24.56120276]
------
Step:12, Action:West
State  218
Old Q Values:  [   4.59845327 -610.30346672    0.           24.56120276]
New Q values:  [   4.59845327 -610.30346672    0.           12.90473484]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[ 3.60604218  0.         12.26751246  0.        ]
------
Step:13, Action:East
State  201
Old Q Values:  [ 3.66758704e+00  0.00000000e+00 -8.39671152e+03  2.40000000e-02]
New Q values:  [ 3.66758704e+00  0.00000000e+00 -9.35708462e+03  2.40000000e-02]
Reward: -10001  Episode Reward:  -9973
xxxxx
x   x
x  gx
x...x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    32.64484337 -6443.21937065  -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094    32.64484337 -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    45.37427641 -6443.21937065  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x  gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    89.72113021 -6594.2730884   -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [   13.85659648    89.72113021 -6594.2730884   -180.6       ]
New Q values:  [   13.85659648    69.82897044 -6594.2730884   -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  95.13506119 -289.59534477 -813.89717213 -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [  95.13506119 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  58.40271561 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  17
xxxxx
x  .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    69.82897044 -6594.2730884   -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [   13.85659648    69.82897044 -6594.2730884   -180.6       ]
New Q values:  [   13.85659648    44.85240286 -6594.2730884   -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x  .x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  58.40271561 -289.59534477 -813.89717213 -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [  58.40271561 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  36.2168071  -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  15
xxxxx
x  .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    44.85240286 -6594.2730884   -180.6       ]
------
Step:6, Action:South
State  181
Old Q Values:  [   13.85659648    44.85240286 -6594.2730884   -180.6       ]
New Q values:  [   13.85659648    28.20600327 -6594.2730884   -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x  .x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  36.2168071  -289.59534477 -813.89717213 -251.53897752]
------
Step:7, Action:North
State  261
Old Q Values:  [  36.2168071  -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  22.34852382 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  13
xxxxx
x  .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    28.20600327 -6594.2730884   -180.6       ]
------
Step:8, Action:South
State  181
Old Q Values:  [   13.85659648    28.20600327 -6594.2730884   -180.6       ]
New Q values:  [   13.85659648    17.38695846 -6594.2730884   -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
x g.x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  22.34852382 -289.59534477 -813.89717213 -251.53897752]
------
Step:9, Action:North
State  261
Old Q Values:  [  22.34852382 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  13.55549707 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  11
xxxxx
x  .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    17.38695846 -6594.2730884   -180.6       ]
------
Step:10, Action:South
State  181
Old Q Values:  [   13.85659648    17.38695846 -6594.2730884   -180.6       ]
New Q values:  [   13.85659648    10.4214325  -6594.2730884   -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x g.x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  13.55549707 -289.59534477 -813.89717213 -251.53897752]
------
Step:11, Action:North
State  260
Old Q Values:  [  297.61614983 -6457.4598       303.59541934 -2702.17995449]
New Q values:  [  194.45163653 -6457.4598       303.59541934 -2702.17995449]
Reward: -1  Episode Reward:  9
xxxxx
xg .x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   138.68990048   253.35058866     0.        ]
------
Step:12, Action:East
State  181
Old Q Values:  [   13.85659648    10.4214325  -6594.2730884   -180.6       ]
New Q values:  [   13.85659648    10.4214325  -2593.46256533  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x g.x
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   129.48890009    73.71095389     0.        ]
------
Step:13, Action:South
State  196
Old Q Values:  [-2469.90645144   129.48890009    73.71095389     0.        ]
New Q values:  [-2469.90645144   573.25701621    73.71095389     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1720.20485392  233.33860649]
------
Step:14, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        1720.20485392  233.33860649]
New Q values:  [  16.82637525 -180.6        1318.85597793  233.33860649]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2084.5801212  -8656.02923281 -7525.7277781    475.77835328]
------
Step:15, Action:North
State  288
Old Q Values:  [ 2084.5801212  -8656.02923281 -7525.7277781    475.77835328]
New Q values:  [25858.96729214 -8656.02923281 -7525.7277781    475.77835328]
Reward: 9  Episode Reward:  45
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[83399.11747888  1709.86094255 -2651.70614553 -2227.14232413]
------
Step:16, Action:North
State  208
Old Q Values:  [83399.11747888  1709.86094255 -2651.70614553 -2227.14232413]
New Q values:  [95185.39317915  1709.86094255 -2651.70614553 -2227.14232413]
Reward: 100009  Episode Reward:  100054
xxxxx
x gax
x   x
x   x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  775.42924189   20.879358   -272.09726687]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6          62.83476875    0.            0.        ]
New Q values:  [-180.6          98.88959793    0.            0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  5.02499993 227.85230143  33.82622604   0.        ]
------
Step:2, Action:South
State  183
Old Q Values:  [  5.02499993 227.85230143  33.82622604   0.        ]
New Q values:  [  5.02499993 100.60756969  33.82622604   0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  13.55549707 -289.59534477 -813.89717213 -251.53897752]
------
Step:3, Action:North
State  260
Old Q Values:  [  194.45163653 -6457.4598       303.59541934 -2702.17995449]
New Q values:  [  148.34292794 -6457.4598       303.59541934 -2702.17995449]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   237.20757775     0.        ]
------
Step:4, Action:East
State  180
Old Q Values:  [-1367.02476015   138.68990048   253.35058866     0.        ]
New Q values:  [-1367.02476015   138.68990048   278.71734033     0.        ]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   573.25701621    73.71095389     0.        ]
------
Step:5, Action:South
State  198
Old Q Values:  [-0.27887208  0.94222676 55.93034536  0.        ]
New Q values:  [-2.78872080e-01  3.95433684e+02  5.59303454e+01  0.00000000e+00]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1318.85597793  233.33860649]
------
Step:6, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        1318.85597793  233.33860649]
New Q values:  [  16.82637525 -180.6        8290.63257881  233.33860649]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[25858.96729214 -8656.02923281 -7525.7277781    475.77835328]
------
Step:7, Action:North
State  288
Old Q Values:  [25858.96729214 -8656.02923281 -7525.7277781    475.77835328]
New Q values:  [10547.31666938 -8656.02923281 -7525.7277781    475.77835328]
Reward: 9  Episode Reward:  43
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  13.39926208  661.09917509 -180.6           3.52184257]
------
Step:8, Action:South
State  208
Old Q Values:  [95185.39317915  1709.86094255 -2651.70614553 -2227.14232413]
New Q values:  [95185.39317915  3847.53937784 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  42
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10547.31666938 -8656.02923281 -7525.7277781    475.77835328]
------
Step:9, Action:North
State  288
Old Q Values:  [10547.31666938 -8656.02923281 -7525.7277781    475.77835328]
New Q values:  [32773.9446215  -8656.02923281 -7525.7277781    475.77835328]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[95185.39317915  3847.53937784 -2651.70614553 -2227.14232413]
------
Step:10, Action:North
State  208
Old Q Values:  [95185.39317915  3847.53937784 -2651.70614553 -2227.14232413]
New Q values:  [61491.91296027  3847.53937784 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  50
xxxxx
x .ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.96121988e+02 -4.75325357e+01 -1.80008075e+02  7.80411856e+04]
------
Step:11, Action:West
State  130
Old Q Values:  [-2.96121988e+02 -4.75325357e+01 -1.80008075e+02  7.80411856e+04]
New Q values:  [-2.96121988e+02 -4.75325357e+01 -1.80008075e+02  1.09248576e+05]
Reward: 100009  Episode Reward:  100059
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[32773.9446215  -8656.02923281 -7525.7277781    475.77835328]
------
Step:1, Action:North
State  288
Old Q Values:  [32773.9446215  -8656.02923281 -7525.7277781    475.77835328]
New Q values:  [13313.30760113 -8656.02923281 -7525.7277781    475.77835328]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  13.39926208  661.09917509 -180.6           3.52184257]
------
Step:2, Action:South
State  208
Old Q Values:  [61491.91296027  3847.53937784 -2651.70614553 -2227.14232413]
New Q values:  [61491.91296027  5532.40803147 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13313.30760113 -8656.02923281 -7525.7277781    475.77835328]
------
Step:3, Action:North
State  288
Old Q Values:  [13313.30760113 -8656.02923281 -7525.7277781    475.77835328]
New Q values:  [ 5523.05279298 -8656.02923281 -7525.7277781    475.77835328]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  13.39926208  661.09917509 -180.6           3.52184257]
------
Step:4, Action:South
State  210
Old Q Values:  [  13.39926208  661.09917509 -180.6           3.52184257]
New Q values:  [  13.39926208 1920.75550793 -180.6           3.52184257]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5523.05279298 -8656.02923281 -7525.7277781    475.77835328]
------
Step:5, Action:North
State  288
Old Q Values:  [ 5523.05279298 -8656.02923281 -7525.7277781    475.77835328]
New Q values:  [ 2784.84776957 -8656.02923281 -7525.7277781    475.77835328]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  13.39926208 1920.75550793 -180.6           3.52184257]
------
Step:6, Action:South
State  208
Old Q Values:  [61491.91296027  5532.40803147 -2651.70614553 -2227.14232413]
New Q values:  [61491.91296027  3047.81754346 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  4
xxxxx
x...x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2784.84776957 -8656.02923281 -7525.7277781    475.77835328]
------
Step:7, Action:North
State  288
Old Q Values:  [ 2784.84776957 -8656.02923281 -7525.7277781    475.77835328]
New Q values:  [19560.91299591 -8656.02923281 -7525.7277781    475.77835328]
Reward: -1  Episode Reward:  3
xxxxx
x...x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[61491.91296027  3047.81754346 -2651.70614553 -2227.14232413]
------
Step:8, Action:North
State  208
Old Q Values:  [61491.91296027  3047.81754346 -2651.70614553 -2227.14232413]
New Q values:  [24602.46040361  3047.81754346 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  12
xxxxx
x.gax
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  9.84065000e-01 -6.24561866e+03 -2.87645685e+01]
------
Step:9, Action:South
State  136
Old Q Values:  [-6.18060000e+03  9.84065000e-01 -6.24561866e+03 -2.87645685e+01]
New Q values:  [-6180.6         7380.53174708 -6245.61866138   -28.76456845]
Reward: -1  Episode Reward:  11
xxxxx
x..gx
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24602.46040361  3047.81754346 -2651.70614553 -2227.14232413]
------
Step:10, Action:South
State  208
Old Q Values:  [24602.46040361  3047.81754346 -2651.70614553 -2227.14232413]
New Q values:  [24602.46040361  7086.80091616 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  10
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19560.91299591 -8656.02923281 -7525.7277781    475.77835328]
------
Step:11, Action:North
State  288
Old Q Values:  [19560.91299591 -8656.02923281 -7525.7277781    475.77835328]
New Q values:  [15204.50331945 -8656.02923281 -7525.7277781    475.77835328]
Reward: -1  Episode Reward:  9
xxxxx
x..gx
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24602.46040361  7086.80091616 -2651.70614553 -2227.14232413]
------
Step:12, Action:South
State  208
Old Q Values:  [24602.46040361  7086.80091616 -2651.70614553 -2227.14232413]
New Q values:  [24602.46040361  7395.4713623  -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x .gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15204.50331945 -8656.02923281 -7525.7277781    475.77835328]
------
Step:13, Action:West
State  288
Old Q Values:  [15204.50331945 -8656.02923281 -7525.7277781    475.77835328]
New Q values:  [15204.50331945 -8656.02923281 -7525.7277781   5806.10177226]
Reward: 9  Episode Reward:  17
xxxxx
x.. x
x . x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 18701.30143649   337.80317566]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   437.36881615  1725.55214525]
New Q values:  [-2527.46239811 -6212.61234477   437.36881615   699.68750722]
Reward: 9  Episode Reward:  26
xxxxx
x.. x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  13.55549707 -289.59534477 -813.89717213 -251.53897752]
------
Step:15, Action:North
State  260
Old Q Values:  [  148.34292794 -6457.4598       303.59541934 -2702.17995449]
New Q values:  [  129.8994445  -6457.4598       303.59541934 -2702.17995449]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   237.20757775     0.        ]
------
Step:16, Action:East
State  176
Old Q Values:  [    0.             0.         10073.73626031     0.        ]
New Q values:  [   0.            0.         4358.22986303    0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x.. x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.39726206e+02 1.07778453e+03 0.00000000e+00]
------
Step:17, Action:East
State  194
Old Q Values:  [ -0.6        106.17652826 211.20423056   0.        ]
New Q values:  [-6.00000000e-01  1.06176528e+02  6.60108345e+02  0.00000000e+00]
Reward: -1  Episode Reward:  33
xxxxx
x.. x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  13.39926208 1920.75550793 -180.6           3.52184257]
------
Step:18, Action:South
State  210
Old Q Values:  [  13.39926208 1920.75550793 -180.6           3.52184257]
New Q values:  [ 1.33992621e+01  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  32
xxxxx
x.. x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15204.50331945 -8656.02923281 -7525.7277781   5806.10177226]
------
Step:19, Action:North
State  288
Old Q Values:  [15204.50331945 -8656.02923281 -7525.7277781   5806.10177226]
New Q values:  [ 7679.91728748 -8656.02923281 -7525.7277781   5806.10177226]
Reward: -1  Episode Reward:  31
xxxxx
x.. x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.33992621e+01  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
------
Step:20, Action:South
State  208
Old Q Values:  [24602.46040361  7395.4713623  -2651.70614553 -2227.14232413]
New Q values:  [24602.46040361  5261.56373116 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  30
xxxxx
x.. x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7679.91728748 -8656.02923281 -7525.7277781   5806.10177226]
------
Step:21, Action:North
State  288
Old Q Values:  [ 7679.91728748 -8656.02923281 -7525.7277781   5806.10177226]
New Q values:  [10452.10503607 -8656.02923281 -7525.7277781   5806.10177226]
Reward: -1  Episode Reward:  29
xxxxx
x.. x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24602.46040361  5261.56373116 -2651.70614553 -2227.14232413]
------
Step:22, Action:North
State  208
Old Q Values:  [24602.46040361  5261.56373116 -2651.70614553 -2227.14232413]
New Q values:  [42614.95690513  5261.56373116 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  28
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.96121988e+02 -4.75325357e+01 -1.80008075e+02  1.09248576e+05]
------
Step:23, Action:West
State  130
Old Q Values:  [-2.96121988e+02 -4.75325357e+01 -1.80008075e+02  1.09248576e+05]
New Q values:  [-2.96121988e+02 -4.75325357e+01 -1.80008075e+02  6.17315319e+04]
Reward: 9  Episode Reward:  37
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  6.00890052e+04]
------
Step:24, Action:West
State  112
Old Q Values:  [    0.          1519.70944144 13703.34638233 60005.4       ]
New Q values:  [    0.          1519.70944144 13703.34638233 84007.56      ]
Reward: 100009  Episode Reward:  100046
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:1, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  1.56264983e+01]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  2.44279372e+02]
Reward: 9  Episode Reward:  9
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  775.42924189   20.879358   -272.09726687]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  775.42924189   20.879358   -272.09726687]
New Q values:  [-177.44732869  345.75396766   20.879358   -272.09726687]
Reward: 9  Episode Reward:  18
xxxxx
x   x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  5.02499993 100.60756969  33.82622604   0.        ]
------
Step:3, Action:South
State  183
Old Q Values:  [  5.02499993 100.60756969  33.82622604   0.        ]
New Q values:  [ 5.02499993 49.709677   33.82622604  0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  13.55549707 -289.59534477 -813.89717213 -251.53897752]
------
Step:4, Action:North
State  261
Old Q Values:  [  13.55549707 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  19.73510193 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 5.02499993 49.709677   33.82622604  0.        ]
------
Step:5, Action:South
State  189
Old Q Values:  [  14.51316842 2662.4412487    28.65006714 -244.98066897]
New Q values:  [  14.51316842 1070.29703006   28.65006714 -244.98066897]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  19.73510193 -289.59534477 -813.89717213 -251.53897752]
------
Step:6, Action:North
State  261
Old Q Values:  [  19.73510193 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 328.38314979 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  24
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842 1070.29703006   28.65006714 -244.98066897]
------
Step:7, Action:South
State  181
Old Q Values:  [   13.85659648    10.4214325  -2593.46256533  -180.6       ]
New Q values:  [   13.85659648   102.08351794 -2593.46256533  -180.6       ]
Reward: -1  Episode Reward:  23
xxxxx
x g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 328.38314979 -289.59534477 -813.89717213 -251.53897752]
------
Step:8, Action:North
State  261
Old Q Values:  [ 328.38314979 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 161.3783153  -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  22
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   102.08351794 -2593.46256533  -180.6       ]
------
Step:9, Action:South
State  189
Old Q Values:  [  14.51316842 1070.29703006   28.65006714 -244.98066897]
New Q values:  [  14.51316842  475.93230661   28.65006714 -244.98066897]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 161.3783153  -289.59534477 -813.89717213 -251.53897752]
------
Step:10, Action:North
State  261
Old Q Values:  [ 161.3783153  -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  78.86422922 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  20
xxxxx
x   x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 5.02499993 49.709677   33.82622604  0.        ]
------
Step:11, Action:South
State  183
Old Q Values:  [ 5.02499993 49.709677   33.82622604  0.        ]
New Q values:  [ 5.02499993 42.94313956 33.82622604  0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  78.86422922 -289.59534477 -813.89717213 -251.53897752]
------
Step:12, Action:North
State  261
Old Q Values:  [  78.86422922 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 173.72538367 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  18
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842  475.93230661   28.65006714 -244.98066897]
------
Step:13, Action:South
State  189
Old Q Values:  [  14.51316842  475.93230661   28.65006714 -244.98066897]
New Q values:  [  14.51316842  241.89053775   28.65006714 -244.98066897]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 173.72538367 -289.59534477 -813.89717213 -251.53897752]
------
Step:14, Action:North
State  261
Old Q Values:  [ 173.72538367 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  99.51520885 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  16
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   102.08351794 -2593.46256533  -180.6       ]
------
Step:15, Action:South
State  181
Old Q Values:  [   13.85659648   102.08351794 -2593.46256533  -180.6       ]
New Q values:  [   13.85659648    70.08796983 -2593.46256533  -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
x g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  99.51520885 -289.59534477 -813.89717213 -251.53897752]
------
Step:16, Action:North
State  261
Old Q Values:  [  99.51520885 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  60.23247449 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  14
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    70.08796983 -2593.46256533  -180.6       ]
------
Step:17, Action:South
State  189
Old Q Values:  [  14.51316842  241.89053775   28.65006714 -244.98066897]
New Q values:  [  14.51316842  114.22595744   28.65006714 -244.98066897]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  60.23247449 -289.59534477 -813.89717213 -251.53897752]
------
Step:18, Action:North
State  261
Old Q Values:  [  60.23247449 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  44.51938074 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  12
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    70.08796983 -2593.46256533  -180.6       ]
------
Step:19, Action:South
State  181
Old Q Values:  [   13.85659648    70.08796983 -2593.46256533  -180.6       ]
New Q values:  [   13.85659648    40.79100216 -2593.46256533  -180.6       ]
Reward: -1  Episode Reward:  11
xxxxx
x g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  44.51938074 -289.59534477 -813.89717213 -251.53897752]
------
Step:20, Action:North
State  261
Old Q Values:  [  44.51938074 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  51.47553953 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  10
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842  114.22595744   28.65006714 -244.98066897]
------
Step:21, Action:South
State  183
Old Q Values:  [ 5.02499993 42.94313956 33.82622604  0.        ]
New Q values:  [ 5.02499993 32.01991768 33.82622604  0.        ]
Reward: -1  Episode Reward:  9
xxxxx
x   x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  51.47553953 -289.59534477 -813.89717213 -251.53897752]
------
Step:22, Action:North
State  261
Old Q Values:  [  51.47553953 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  54.25800305 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  8
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  14.51316842  114.22595744   28.65006714 -244.98066897]
------
Step:23, Action:South
State  189
Old Q Values:  [  14.51316842  114.22595744   28.65006714 -244.98066897]
New Q values:  [  14.51316842   61.36778389   28.65006714 -244.98066897]
Reward: -1  Episode Reward:  7
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  54.25800305 -289.59534477 -813.89717213 -251.53897752]
------
Step:24, Action:North
State  261
Old Q Values:  [  54.25800305 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  33.34050186 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  6
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    40.79100216 -2593.46256533  -180.6       ]
------
Step:25, Action:South
State  189
Old Q Values:  [  14.51316842   61.36778389   28.65006714 -244.98066897]
New Q values:  [  14.51316842   33.94926412   28.65006714 -244.98066897]
Reward: -1  Episode Reward:  5
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  33.34050186 -289.59534477 -813.89717213 -251.53897752]
------
Step:26, Action:North
State  261
Old Q Values:  [  33.34050186 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  22.88406856 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  4
xxxxx
x   x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 5.02499993 32.01991768 33.82622604  0.        ]
------
Step:27, Action:East
State  183
Old Q Values:  [ 5.02499993 32.01991768 33.82622604  0.        ]
New Q values:  [  5.02499993  32.01991768 137.56059564   0.        ]
Reward: 9  Episode Reward:  13
xxxxx
x   x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01  3.95433684e+02  5.59303454e+01  0.00000000e+00]
------
Step:28, Action:East
State  203
Old Q Values:  [ 3.60604218  0.         12.26751246  0.        ]
New Q values:  [ 3.60604218  0.         14.17842544  0.        ]
Reward: 9  Episode Reward:  22
xxxxx
x   x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   4.59845327 -610.30346672    0.           12.90473484]
------
Step:29, Action:West
State  216
Old Q Values:  [    7.33328866   -51.76838521 -6170.35693855  -340.15723533]
New Q values:  [    7.33328866   -51.76838521 -6170.35693855  -135.56261802]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 3.66758704e+00  0.00000000e+00 -9.35708462e+03  2.40000000e-02]
------
Step:30, Action:North
State  200
Old Q Values:  [ 6.79482295e-01  2.29467556e+03  1.12932070e+01 -8.40000000e-01]
New Q values:  [ 6.28218634e+01  2.29467556e+03  1.12932070e+01 -8.40000000e-01]
Reward: -1  Episode Reward:  20
xxxxx
x a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  2.10500235e+02]
------
Step:31, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  2.10500235e+02]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.03199591e+02]
Reward: -1  Episode Reward:  19
xxxxx
xa  x
xg  x
x ..x
xxxxx
Step:32, Action:North
State  108
Old Q Values:  [-6180.6          632.25076219    16.09371094     0.        ]
New Q values:  [-8463.16477134   632.25076219    16.09371094     0.        ]
Reward: -10301  Episode Reward:  -10282
xxxxx
xg  x
x   x
x ..x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6         159.03331436]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6         159.03331436]
New Q values:  [-180.6           2.88323468 -180.6          99.97320308]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.03199591e+02]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.03199591e+02]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  6.62793336e+01]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6          65.33165717   10.25077275 -180.6       ]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869  345.75396766   20.879358   -272.09726687]
New Q values:  [-177.44732869  184.96976576   20.879358   -272.09726687]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa .x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  5.02499993  32.01991768 137.56059564   0.        ]
------
Step:4, Action:East
State  189
Old Q Values:  [  14.51316842   33.94926412   28.65006714 -244.98066897]
New Q values:  [   14.51316842    33.94926412 -5300.73730491  -244.98066897]
Reward: -10001  Episode Reward:  -9974
xxxxx
x   x
x g.x
x...x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10452.10503607 -8656.02923281 -7525.7277781   5806.10177226]
------
Step:1, Action:North
State  288
Old Q Values:  [10452.10503607 -8656.02923281 -7525.7277781   5806.10177226]
New Q values:  [16970.72908597 -8656.02923281 -7525.7277781   5806.10177226]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[42614.95690513  5261.56373116 -2651.70614553 -2227.14232413]
------
Step:2, Action:North
State  210
Old Q Values:  [ 1.33992621e+01  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 1.85302193e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.96121988e+02 -4.75325357e+01 -1.80008075e+02  6.17315319e+04]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          99.97320308]
New Q values:  [-180.6           2.88323468 -180.6          65.27308131]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  6.62793336e+01]
------
Step:4, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  3.10217953e-01 -4.68464655e+00]
New Q values:  [-1.01561177e+04 -5.99568600e+03  3.10217953e-01  1.71384243e+01]
Reward: 9  Episode Reward:  36
xxxxx
xag x
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    45.37427641 -6443.21937065  -180.6       ]
------
Step:5, Action:South
State  107
Old Q Values:  [-252.35169558   20.33605192    2.45188789 -252.78192178]
New Q values:  [-252.35169558    7.53442077    2.45188789 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -9.57659638e+02 -1.78980000e+02]
------
Step:6, Action:South
State  181
Old Q Values:  [   13.85659648    40.79100216 -2593.46256533  -180.6       ]
New Q values:  [   13.85659648    28.58162143 -2593.46256533  -180.6       ]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x .gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  22.88406856 -289.59534477 -813.89717213 -251.53897752]
------
Step:7, Action:North
State  261
Old Q Values:  [  22.88406856 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  18.73840666 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xag x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   14.51316842    33.94926412 -5300.73730491  -244.98066897]
------
Step:8, Action:South
State  181
Old Q Values:  [   13.85659648    28.58162143 -2593.46256533  -180.6       ]
New Q values:  [   13.85659648    16.45417057 -2593.46256533  -180.6       ]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x . x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  18.73840666 -289.59534477 -813.89717213 -251.53897752]
------
Step:9, Action:North
State  260
Old Q Values:  [  129.8994445  -6457.4598       303.59541934 -2702.17995449]
New Q values:  [  134.9749799  -6457.4598       303.59541934 -2702.17995449]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
xa. x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   138.68990048   278.71734033     0.        ]
------
Step:10, Action:East
State  188
Old Q Values:  [-6523.78898263    32.78037903   978.60391939     0.        ]
New Q values:  [-6523.78898263    32.78037903  1085.24423599     0.        ]
Reward: 9  Episode Reward:  50
xxxxx
x   x
xga x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 6.28218634e+01  2.29467556e+03  1.12932070e+01 -8.40000000e-01]
------
Step:11, Action:South
State  192
Old Q Values:  [3.89777037e-01 5.39726206e+02 1.07778453e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 6.04311967e+04 1.07778453e+03 0.00000000e+00]
Reward: 100009  Episode Reward:  100059
xxxxx
x   x
x g x
x a x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[42614.95690513  5261.56373116 -2651.70614553 -2227.14232413]
------
Step:1, Action:North
State  216
Old Q Values:  [    7.33328866   -51.76838521 -6170.35693855  -135.56261802]
New Q values:  [   27.91523986   -51.76838521 -6170.35693855  -135.56261802]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6          65.27308131]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          65.27308131]
New Q values:  [-180.6           2.88323468 -180.6          51.39303261]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  6.62793336e+01]
------
Step:3, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  6.62793336e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  5.15112306e+01]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6          65.33165717   10.25077275 -180.6       ]
------
Step:4, Action:South
State  110
Old Q Values:  [-180.6          65.33165717   10.25077275 -180.6       ]
New Q values:  [ -180.6        -5648.89406634    10.25077275  -180.6       ]
Reward: -10001  Episode Reward:  -9974
xxxxx
x   x
xg. x
x...x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.85302193e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
------
Step:1, Action:North
State  210
Old Q Values:  [ 1.85302193e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 7.43290562e+03  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6          51.39303261]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          51.39303261]
New Q values:  [-180.6           2.88323468 -180.6          41.41058222]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  5.15112306e+01]
------
Step:3, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  3.10217953e-01  1.71384243e+01]
New Q values:  [-1.01561177e+04 -5.99568600e+03  3.10217953e-01  2.58676526e+01]
Reward: 9  Episode Reward:  27
xxxxx
xag x
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    45.37427641 -6443.21937065  -180.6       ]
------
Step:4, Action:South
State  109
Old Q Values:  [ -241.10880094    45.37427641 -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    33.7344898  -6443.21937065  -180.6       ]
Reward: 9  Episode Reward:  36
xxxxx
x  gx
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   14.51316842    33.94926412 -5300.73730491  -244.98066897]
------
Step:5, Action:South
State  189
Old Q Values:  [   14.51316842    33.94926412 -5300.73730491  -244.98066897]
New Q values:  [   14.51316842    18.60122764 -5300.73730491  -244.98066897]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  18.73840666 -289.59534477 -813.89717213 -251.53897752]
------
Step:6, Action:North
State  261
Old Q Values:  [  18.73840666 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  12.47573096 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   14.51316842    18.60122764 -5300.73730491  -244.98066897]
------
Step:7, Action:South
State  189
Old Q Values:  [   14.51316842    18.60122764 -5300.73730491  -244.98066897]
New Q values:  [   14.51316842    10.58321034 -5300.73730491  -244.98066897]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  12.47573096 -289.59534477 -813.89717213 -251.53897752]
------
Step:8, Action:North
State  261
Old Q Values:  [  12.47573096 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [   8.74424291 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   14.51316842    10.58321034 -5300.73730491  -244.98066897]
------
Step:9, Action:North
State  189
Old Q Values:  [   14.51316842    10.58321034 -5300.73730491  -244.98066897]
New Q values:  [   60.6961971     10.58321034 -5300.73730491  -244.98066897]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  184.96976576   20.879358   -272.09726687]
------
Step:10, Action:South
State  111
Old Q Values:  [-177.44732869  184.96976576   20.879358   -272.09726687]
New Q values:  [-177.44732869  114.65608499   20.879358   -272.09726687]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xa. x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  5.02499993  32.01991768 137.56059564   0.        ]
------
Step:11, Action:East
State  191
Old Q Values:  [   3.06655861 1435.41687165    0.            0.        ]
New Q values:  [   3.06655861 1435.41687165   46.04536991    0.        ]
Reward: 9  Episode Reward:  39
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         135.48456638   6.60224881   0.        ]
------
Step:12, Action:East
State  203
Old Q Values:  [ 3.60604218  0.         14.17842544  0.        ]
New Q values:  [3.60604218 0.         8.94279063 0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   4.59845327 -610.30346672    0.           12.90473484]
------
Step:13, Action:West
State  218
Old Q Values:  [   4.59845327 -610.30346672    0.           12.90473484]
New Q values:  [   4.59845327 -610.30346672    0.           45.20726385]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         135.48456638   6.60224881   0.        ]
------
Step:14, Action:East
State  203
Old Q Values:  [3.60604218 0.         8.94279063 0.        ]
New Q values:  [ 3.60604218  0.         16.53929541  0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   4.59845327 -610.30346672    0.           45.20726385]
------
Step:15, Action:West
State  218
Old Q Values:  [   4.59845327 -610.30346672    0.           45.20726385]
New Q values:  [   4.59845327 -610.30346672    0.           58.12827546]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         135.48456638   6.60224881   0.        ]
------
Step:16, Action:East
State  202
Old Q Values:  [    0.         -5884.35407458    22.7133708      0.        ]
New Q values:  [    0.         -5884.35407458    25.92383096     0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   4.59845327 -610.30346672    0.           58.12827546]
------
Step:17, Action:West
State  218
Old Q Values:  [   4.59845327 -610.30346672    0.           58.12827546]
New Q values:  [   4.59845327 -610.30346672    0.           63.2966801 ]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         135.48456638   6.60224881   0.        ]
------
Step:18, Action:East
State  202
Old Q Values:  [    0.         -5884.35407458    25.92383096     0.        ]
New Q values:  [    0.         -5884.35407458    28.75853641     0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   4.59845327 -610.30346672    0.           63.2966801 ]
------
Step:19, Action:West
State  216
Old Q Values:  [   27.91523986   -51.76838521 -6170.35693855  -135.56261802]
New Q values:  [   27.91523986   -51.76838521 -6170.35693855   633.57762103]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xga x
x ..x
xxxxx
Step:20, Action:East
State  200
Old Q Values:  [ 6.28218634e+01  2.29467556e+03  1.12932070e+01 -8.40000000e-01]
New Q values:  [ 6.28218634e+01  2.29467556e+03  1.93990569e+02 -8.40000000e-01]
Reward: -1  Episode Reward:  30
xxxxx
xg  x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   27.91523986   -51.76838521 -6170.35693855   633.57762103]
------
Step:21, Action:West
State  216
Old Q Values:  [   27.91523986   -51.76838521 -6170.35693855   633.57762103]
New Q values:  [   27.91523986   -51.76838521 -6170.35693855   941.23371665]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 6.28218634e+01  2.29467556e+03  1.93990569e+02 -8.40000000e-01]
------
Step:22, Action:South
State  204
Old Q Values:  [  0.          48.92723688  -0.32153244 199.6250014 ]
New Q values:  [ 0.00000000e+00  2.51216067e+03 -3.21532439e-01  1.99625001e+02]
Reward: 9  Episode Reward:  38
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881  233.33860649]
------
Step:23, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   437.36881615   699.68750722]
New Q values:  [-2527.46239811 -6212.61234477 65271.56625225   699.68750722]
Reward: 100009  Episode Reward:  100047
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 65271.56625225   699.68750722]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 65271.56625225   699.68750722]
New Q values:  [-2527.46239811 -6212.61234477 31205.24522669   699.68750722]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x ..x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16970.72908597 -8656.02923281 -7525.7277781   5806.10177226]
------
Step:2, Action:North
State  288
Old Q Values:  [16970.72908597 -8656.02923281 -7525.7277781   5806.10177226]
New Q values:  [ 9023.56331979 -8656.02923281 -7525.7277781   5806.10177226]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x .ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 7.43290562e+03  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [ 7.43290562e+03  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 2.14980218e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x . x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.96121988e+02 -4.75325357e+01 -1.80008075e+02  6.17315319e+04]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          41.41058222]
New Q values:  [-180.6           2.88323468 -180.6          95.24804445]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x .gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  2.67620961e+00  2.44279372e+02]
------
Step:5, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  5.15112306e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  2.82648185e+01]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.53442077    2.45188789 -252.78192178]
------
Step:6, Action:South
State  107
Old Q Values:  [-252.35169558    7.53442077    2.45188789 -252.78192178]
New Q values:  [-252.35169558    7.35001948    2.45188789 -252.78192178]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xa.gx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    16.45417057 -2593.46256533  -180.6       ]
------
Step:7, Action:South
State  183
Old Q Values:  [  5.02499993  32.01991768 137.56059564   0.        ]
New Q values:  [  5.02499993  20.83123995 137.56059564   0.        ]
Reward: 9  Episode Reward:  53
xxxxx
x   x
x . x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   8.74424291 -289.59534477 -813.89717213 -251.53897752]
------
Step:8, Action:North
State  261
Old Q Values:  [   8.74424291 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  44.16587586 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  52
xxxxx
x   x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  5.02499993  20.83123995 137.56059564   0.        ]
------
Step:9, Action:East
State  179
Old Q Values:  [    0.         63160.05960754 73459.24376105     0.        ]
New Q values:  [    0.         63160.05960754 89466.55483296     0.        ]
Reward: 100009  Episode Reward:  100061
xxxxx
x   x
x a x
x  gx
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6          95.24804445]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          95.24804445]
New Q values:  [-180.6           2.88323468 -180.6          51.97866332]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  2.82648185e+01]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  2.82648185e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.97811592e+01]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -5648.89406634    10.25077275  -180.6       ]
------
Step:3, Action:East
State  110
Old Q Values:  [ -180.6        -5648.89406634    10.25077275  -180.6       ]
New Q values:  [ -180.6        -5648.89406634     9.43465686  -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x a x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.97811592e+01]
------
Step:4, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.97811592e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.01428607e+01]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -5648.89406634     9.43465686  -180.6       ]
------
Step:5, Action:East
State  107
Old Q Values:  [-252.35169558    7.35001948    2.45188789 -252.78192178]
New Q values:  [-252.35169558    7.35001948    3.42361338 -252.78192178]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.01428607e+01]
------
Step:6, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  2.44279372e+02]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  9.93167546e+01]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.35001948    3.42361338 -252.78192178]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869  114.65608499   20.879358   -272.09726687]
New Q values:  [-177.44732869   69.47129313   20.879358   -272.09726687]
Reward: 9  Episode Reward:  23
xxxxx
x   x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   60.6961971     10.58321034 -5300.73730491  -244.98066897]
------
Step:8, Action:North
State  183
Old Q Values:  [  5.02499993  20.83123995 137.56059564   0.        ]
New Q values:  [ 22.25138791  20.83123995 137.56059564   0.        ]
Reward: -1  Episode Reward:  22
xxxxx
xa  x
x  .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   69.47129313   20.879358   -272.09726687]
------
Step:9, Action:South
State  111
Old Q Values:  [-177.44732869   69.47129313   20.879358   -272.09726687]
New Q values:  [-177.44732869   68.45669594   20.879358   -272.09726687]
Reward: -1  Episode Reward:  21
xxxxx
x   x
xa .x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 22.25138791  20.83123995 137.56059564   0.        ]
------
Step:10, Action:East
State  189
Old Q Values:  [   60.6961971     10.58321034 -5300.73730491  -244.98066897]
New Q values:  [   60.6961971     10.58321034 -7432.49225373  -244.98066897]
Reward: -10001  Episode Reward:  -9980
xxxxx
x   x
x g.x
x...x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   68.45669594   20.879358   -272.09726687]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6          98.88959793    0.            0.        ]
New Q values:  [-180.6          49.89209034    0.            0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    16.45417057 -2593.46256533  -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [   13.85659648    16.45417057 -2593.46256533  -180.6       ]
New Q values:  [   13.85659648    25.23143098 -2593.46256533  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x ..x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  44.16587586 -289.59534477 -813.89717213 -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [  44.16587586 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  24.63577964 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
xa..x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    25.23143098 -2593.46256533  -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [   13.85659648    25.23143098 -2593.46256533  -180.6       ]
New Q values:  [   13.85659648    16.88330628 -2593.46256533  -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x g.x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  24.63577964 -289.59534477 -813.89717213 -251.53897752]
------
Step:5, Action:North
State  260
Old Q Values:  [  134.9749799  -6457.4598       303.59541934 -2702.17995449]
New Q values:  [-5862.99480594 -6457.4598       303.59541934 -2702.17995449]
Reward: -10001  Episode Reward:  -9985
xxxxx
x ..x
xg..x
x . x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  24.63577964 -289.59534477 -813.89717213 -251.53897752]
------
Step:1, Action:North
State  261
Old Q Values:  [  24.63577964 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  56.52249055 -289.59534477 -813.89717213 -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 22.25138791  20.83123995 137.56059564   0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 22.25138791  20.83123995 137.56059564   0.        ]
New Q values:  [ 22.25138791  20.83123995 137.8815668    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[ 38.85388605 258.19109515  15.26652123   0.        ]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831  1928.34157347 -3909.58186816     0.        ]
New Q values:  [-5922.26708831  6387.12706034 -3909.58186816     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 18701.30143649   337.80317566]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 31205.24522669   699.68750722]
New Q values:  [-2527.46239811 -6212.61234477 15188.56708661   699.68750722]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9023.56331979 -8656.02923281 -7525.7277781   5806.10177226]
------
Step:5, Action:North
State  288
Old Q Values:  [ 9023.56331979 -8656.02923281 -7525.7277781   5806.10177226]
New Q values:  [16399.31239945 -8656.02923281 -7525.7277781   5806.10177226]
Reward: 9  Episode Reward:  35
xxxxx
x.g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[42614.95690513  5261.56373116 -2651.70614553 -2227.14232413]
------
Step:6, Action:North
State  208
Old Q Values:  [42614.95690513  5261.56373116 -2651.70614553 -2227.14232413]
New Q values:  [12871.72894965  5261.56373116 -2651.70614553 -2227.14232413]
Reward: -9991  Episode Reward:  -9956
xxxxx
x..gx
x   x
x   x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 18701.30143649   337.80317566]
------
Step:1, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 18701.30143649   337.80317566]
New Q values:  [  37.74111519 -168.92307549 6405.71429443  337.80317566]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x.. x
x. gx
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 15188.56708661   699.68750722]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 15188.56708661   699.68750722]
New Q values:  [-2527.46239811 -6212.61234477 11000.62055448   699.68750722]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16399.31239945 -8656.02923281 -7525.7277781   5806.10177226]
------
Step:2, Action:North
State  288
Old Q Values:  [16399.31239945 -8656.02923281 -7525.7277781   5806.10177226]
New Q values:  [10426.64364468 -8656.02923281 -7525.7277781   5806.10177226]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12871.72894965  5261.56373116 -2651.70614553 -2227.14232413]
------
Step:3, Action:North
State  208
Old Q Values:  [12871.72894965  5261.56373116 -2651.70614553 -2227.14232413]
New Q values:  [23673.55114558  5261.56373116 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[-2.96121988e+02 -4.75325357e+01 -1.80008075e+02  6.17315319e+04]
------
Step:4, Action:West
State  136
Old Q Values:  [-6180.6         7380.53174708 -6245.61866138   -28.76456845]
New Q values:  [-6.18060000e+03  7.38053175e+03 -6.24561866e+03  1.65446841e+00]
Reward: 9  Episode Reward:  36
xxxxx
xga x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-1.01561177e+04 -5.99568600e+03  3.10217953e-01  2.58676526e+01]
------
Step:5, Action:East
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  3.10217953e-01  2.58676526e+01]
New Q values:  [-10156.11771313  -5995.686        2213.68361131     25.86765264]
Reward: -1  Episode Reward:  35
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  7.38053175e+03 -6.24561866e+03  1.65446841e+00]
------
Step:6, Action:South
State  136
Old Q Values:  [-6.18060000e+03  7.38053175e+03 -6.24561866e+03  1.65446841e+00]
New Q values:  [-6.18060000e+03  1.00536780e+04 -6.24561866e+03  1.65446841e+00]
Reward: -1  Episode Reward:  34
xxxxx
x. gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23673.55114558  5261.56373116 -2651.70614553 -2227.14232413]
------
Step:7, Action:South
State  208
Old Q Values:  [23673.55114558  5261.56373116 -2651.70614553 -2227.14232413]
New Q values:  [23673.55114558  5232.01858587 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10426.64364468 -8656.02923281 -7525.7277781   5806.10177226]
------
Step:8, Action:West
State  288
Old Q Values:  [10426.64364468 -8656.02923281 -7525.7277781   5806.10177226]
New Q values:  [10426.64364468 -8656.02923281 -7525.7277781   4243.55499723]
Reward: -1  Episode Reward:  32
xxxxx
x. gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 6405.71429443  337.80317566]
------
Step:9, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 6405.71429443  337.80317566]
New Q values:  [  37.74111519 -168.92307549 5689.67881118  337.80317566]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10426.64364468 -8656.02923281 -7525.7277781   4243.55499723]
------
Step:10, Action:West
State  288
Old Q Values:  [10426.64364468 -8656.02923281 -7525.7277781   4243.55499723]
New Q values:  [10426.64364468 -8656.02923281 -7525.7277781   3403.72564225]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 5689.67881118  337.80317566]
------
Step:11, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 11000.62055448   699.68750722]
New Q values:  [-2527.46239811 -6212.61234477 11000.62055448  8902.51096278]
Reward: 9  Episode Reward:  39
xxxxx
x.  x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[28724.11986631  -180.6          500.30088291  4453.74493219]
------
Step:12, Action:North
State  256
Old Q Values:  [    0.             0.          1196.96403289 -2468.58126138]
New Q values:  [  205.63143496     0.          1196.96403289 -2468.58126138]
Reward: 9  Episode Reward:  48
xxxxx
x.  x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[  0.           0.         667.43811653   0.        ]
------
Step:13, Action:East
State  176
Old Q Values:  [   0.            0.         4358.22986303    0.        ]
New Q values:  [    0.             0.         19872.05096563     0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.04311967e+04 1.07778453e+03 0.00000000e+00]
------
Step:14, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.04311967e+04 1.07778453e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 2.74720649e+04 1.07778453e+03 0.00000000e+00]
Reward: -1  Episode Reward:  46
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 11000.62055448  8902.51096278]
------
Step:15, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 5689.67881118  337.80317566]
New Q values:  [  37.74111519 -168.92307549 5403.26461787  337.80317566]
Reward: -1  Episode Reward:  45
xxxxx
x.  x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10426.64364468 -8656.02923281 -7525.7277781   3403.72564225]
------
Step:16, Action:West
State  288
Old Q Values:  [10426.64364468 -8656.02923281 -7525.7277781   3403.72564225]
New Q values:  [10426.64364468 -8656.02923281 -7525.7277781   2981.86964226]
Reward: -1  Episode Reward:  44
xxxxx
x.  x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 5403.26461787  337.80317566]
------
Step:17, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 11000.62055448  8902.51096278]
New Q values:  [-2527.46239811 -6212.61234477 11000.62055448 12177.640345  ]
Reward: -1  Episode Reward:  43
xxxxx
x.  x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[28724.11986631  -180.6          500.30088291  4453.74493219]
------
Step:18, Action:North
State  257
Old Q Values:  [28724.11986631  -180.6          500.30088291  4453.74493219]
New Q values:  [12435.65641745  -180.6          500.30088291  4453.74493219]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         2203.98243167 3155.36156975    0.        ]
------
Step:19, Action:South
State  176
Old Q Values:  [    0.             0.         19872.05096563     0.        ]
New Q values:  [    0.           358.48920987 19872.05096563     0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[  205.63143496     0.          1196.96403289 -2468.58126138]
------
Step:20, Action:East
State  257
Old Q Values:  [12435.65641745  -180.6          500.30088291  4453.74493219]
New Q values:  [12435.65641745  -180.6         3852.81245667  4453.74493219]
Reward: -1  Episode Reward:  40
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 11000.62055448 12177.640345  ]
------
Step:21, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 11000.62055448 12177.640345  ]
New Q values:  [-2527.46239811 -6212.61234477 11000.62055448  8601.15306324]
Reward: -1  Episode Reward:  39
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[12435.65641745  -180.6         3852.81245667  4453.74493219]
------
Step:22, Action:North
State  257
Old Q Values:  [12435.65641745  -180.6         3852.81245667  4453.74493219]
New Q values:  [5920.2710379  -180.6        3852.81245667 4453.74493219]
Reward: -1  Episode Reward:  38
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         2203.98243167 3155.36156975    0.        ]
------
Step:23, Action:South
State  176
Old Q Values:  [    0.           358.48920987 19872.05096563     0.        ]
New Q values:  [    0.           501.88489382 19872.05096563     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[  205.63143496     0.          1196.96403289 -2468.58126138]
------
Step:24, Action:East
State  256
Old Q Values:  [  205.63143496     0.          1196.96403289 -2468.58126138]
New Q values:  [  205.63143496     0.          3778.3717795  -2468.58126138]
Reward: -1  Episode Reward:  36
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 11000.62055448  8601.15306324]
------
Step:25, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 11000.62055448  8601.15306324]
New Q values:  [-2527.46239811 -6212.61234477  7527.6413152   8601.15306324]
Reward: -1  Episode Reward:  35
xxxxx
x.  x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10426.64364468 -8656.02923281 -7525.7277781   2981.86964226]
------
Step:26, Action:North
State  288
Old Q Values:  [10426.64364468 -8656.02923281 -7525.7277781   2981.86964226]
New Q values:  [11272.12280154 -8656.02923281 -7525.7277781   2981.86964226]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23673.55114558  5232.01858587 -2651.70614553 -2227.14232413]
------
Step:27, Action:North
State  208
Old Q Values:  [23673.55114558  5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [11289.16664583  5232.01858587 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  33
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 6067.82062533  1141.37341868 -8652.84        4547.03370611]
------
Step:28, Action:North
State  130
Old Q Values:  [-2.96121988e+02 -4.75325357e+01 -1.80008075e+02  6.17315319e+04]
New Q values:  [ 1.82204108e+04 -4.75325357e+01 -1.80008075e+02  6.17315319e+04]
Reward: -301  Episode Reward:  -268
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 1.82204108e+04 -4.75325357e+01 -1.80008075e+02  6.17315319e+04]
------
Step:29, Action:West
State  130
Old Q Values:  [ 1.82204108e+04 -4.75325357e+01 -1.80008075e+02  6.17315319e+04]
New Q values:  [18220.41077038   -47.53253566  -180.00807518 42718.71431509]
Reward: -1  Episode Reward:  -269
xxxxx
x.a x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  6.00890052e+04]
------
Step:30, Action:West
State  114
Old Q Values:  [-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  6.00890052e+04]
New Q values:  [-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  8.41248473e+04]
Reward: 100009  Episode Reward:  99740
xxxxx
xa  x
x   x
xg  x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6          51.97866332]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          51.97866332]
New Q values:  [-180.6           2.88323468 -180.6          29.23432355]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.01428607e+01]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.01428607e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.16621501e+01]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.35001948    3.42361338 -252.78192178]
------
Step:3, Action:South
State  109
Old Q Values:  [ -241.10880094    33.7344898  -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    23.9587878  -6443.21937065  -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x g x
xa..x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    16.88330628 -2593.46256533  -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [   13.85659648    16.88330628 -2593.46256533  -180.6       ]
New Q values:  [   13.85659648    29.11006968 -2593.46256533  -180.6       ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x g.x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  56.52249055 -289.59534477 -813.89717213 -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [  56.52249055 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  30.74201712 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    29.11006968 -2593.46256533  -180.6       ]
------
Step:6, Action:South
State  181
Old Q Values:  [   13.85659648    29.11006968 -2593.46256533  -180.6       ]
New Q values:  [   13.85659648    20.26663301 -2593.46256533  -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x g.x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  30.74201712 -289.59534477 -813.89717213 -251.53897752]
------
Step:7, Action:North
State  260
Old Q Values:  [-5862.99480594 -6457.4598       303.59541934 -2702.17995449]
New Q values:  [-8262.18272028 -6457.4598       303.59541934 -2702.17995449]
Reward: -10001  Episode Reward:  -9967
xxxxx
x   x
xg..x
x . x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[ 38.85388605 258.19109515  15.26652123   0.        ]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831  6387.12706034 -3909.58186816     0.        ]
New Q values:  [-5922.26708831  4181.2302095  -3909.58186816     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 5403.26461787  337.80317566]
------
Step:2, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 5403.26461787  337.80317566]
New Q values:  [  37.74111519 -168.92307549 5542.34268761  337.80317566]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11272.12280154 -8656.02923281 -7525.7277781   2981.86964226]
------
Step:3, Action:North
State  288
Old Q Values:  [11272.12280154 -8656.02923281 -7525.7277781   2981.86964226]
New Q values:  [ 7900.99911437 -8656.02923281 -7525.7277781   2981.86964226]
Reward: 9  Episode Reward:  17
xxxxx
x.g.x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11289.16664583  5232.01858587 -2651.70614553 -2227.14232413]
------
Step:4, Action:North
State  208
Old Q Values:  [11289.16664583  5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [17336.68095286  5232.01858587 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  26
xxxxx
x..ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038   -47.53253566  -180.00807518 42718.71431509]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          29.23432355]
New Q values:  [-180.6           2.88323468 -180.6          20.59237446]
Reward: 9  Episode Reward:  35
xxxxx
x.a x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.16621501e+01]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.16621501e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.22698659e+01]
Reward: 9  Episode Reward:  44
xxxxx
xa  x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.35001948    3.42361338 -252.78192178]
------
Step:7, Action:South
State  105
Old Q Values:  [-1.80600000e+02  3.15324882e+00 -3.89520980e+03  0.00000000e+00]
New Q values:  [ -180.6            6.66129953 -3895.20980426     0.        ]
Reward: 9  Episode Reward:  53
xxxxx
x g x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00 -9.57659638e+02 -1.78980000e+02]
------
Step:8, Action:South
State  177
Old Q Values:  [   0.         2203.98243167 3155.36156975    0.        ]
New Q values:  [    0.         62663.07428404  3155.36156975     0.        ]
Reward: 100009  Episode Reward:  100062
xxxxx
x  gx
x   x
xa  x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.74720649e+04 1.07778453e+03 0.00000000e+00]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831  4181.2302095  -3909.58186816     0.        ]
New Q values:  [-5922.26708831  3340.59489008 -3909.58186816     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 5542.34268761  337.80317566]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  7527.6413152   8601.15306324]
New Q values:  [-2527.46239811 -6212.61234477  5386.75626039  8601.15306324]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7900.99911437 -8656.02923281 -7525.7277781   2981.86964226]
------
Step:3, Action:North
State  288
Old Q Values:  [ 7900.99911437 -8656.02923281 -7525.7277781   2981.86964226]
New Q values:  [ 8366.8039316  -8656.02923281 -7525.7277781   2981.86964226]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17336.68095286  5232.01858587 -2651.70614553 -2227.14232413]
------
Step:4, Action:North
State  208
Old Q Values:  [17336.68095286  5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [19755.68667567  5232.01858587 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  36
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038   -47.53253566  -180.00807518 42718.71431509]
------
Step:5, Action:West
State  130
Old Q Values:  [18220.41077038   -47.53253566  -180.00807518 42718.71431509]
New Q values:  [18220.41077038   -47.53253566  -180.00807518 42324.33991116]
Reward: -1  Episode Reward:  35
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  8.41248473e+04]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.22698659e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.25129522e+01]
Reward: 9  Episode Reward:  44
xxxxx
xa  x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.35001948    3.42361338 -252.78192178]
------
Step:7, Action:South
State  107
Old Q Values:  [-252.35169558    7.35001948    3.42361338 -252.78192178]
New Q values:  [-252.35169558   11.28108573    3.42361338 -252.78192178]
Reward: 9  Episode Reward:  53
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[-0.11058345  0.          9.80359313  0.        ]
------
Step:8, Action:East
State  185
Old Q Values:  [-6.00000000e-01  0.00000000e+00 -9.57659638e+02 -1.78980000e+02]
New Q values:  [  -0.6           0.         -382.56357897 -178.98      ]
Reward: -1  Episode Reward:  52
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 3.66758704e+00  0.00000000e+00 -9.35708462e+03  2.40000000e-02]
------
Step:9, Action:North
State  201
Old Q Values:  [ 3.66758704e+00  0.00000000e+00 -9.35708462e+03  2.40000000e-02]
New Q values:  [ 2.33354578e+00  0.00000000e+00 -9.35708462e+03  2.40000000e-02]
Reward: -1  Episode Reward:  51
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:10, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686        2213.68361131     25.86765264]
New Q values:  [-10156.11771313  -5995.686        2213.68361131     11.74545092]
Reward: -1  Episode Reward:  50
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6            6.66129953 -3895.20980426     0.        ]
------
Step:11, Action:South
State  104
Old Q Values:  [-8.65284000e+03 -6.00000000e-01 -9.58483566e+02 -8.65284000e+03]
New Q values:  [-8.65284000e+03  4.32678599e-01 -9.58483566e+02 -8.65284000e+03]
Reward: -1  Episode Reward:  49
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[0.       0.       4.242262 0.      ]
------
Step:12, Action:East
State  185
Old Q Values:  [  -0.6           0.         -382.56357897 -178.98      ]
New Q values:  [  -0.6           0.          534.77723665 -178.98      ]
Reward: -1  Episode Reward:  48
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 6.28218634e+01  2.29467556e+03  1.93990569e+02 -8.40000000e-01]
------
Step:13, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  0.00000000e+00 -9.35708462e+03  2.40000000e-02]
New Q values:  [ 2.33354578e+00  1.66210281e+03 -9.35708462e+03  2.40000000e-02]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 5542.34268761  337.80317566]
------
Step:14, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 5542.34268761  337.80317566]
New Q values:  [  37.74111519 -168.92307549 4726.37825453  337.80317566]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8366.8039316  -8656.02923281 -7525.7277781   2981.86964226]
------
Step:15, Action:West
State  288
Old Q Values:  [ 8366.8039316  -8656.02923281 -7525.7277781   2981.86964226]
New Q values:  [ 8366.8039316  -8656.02923281 -7525.7277781   2610.06133326]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4726.37825453  337.80317566]
------
Step:16, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4726.37825453  337.80317566]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  4.72637825e+03  6.19166026e+04]
Reward: 100009  Episode Reward:  100054
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  4.72637825e+03  6.19166026e+04]
------
Step:1, Action:West
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  4.72637825e+03  6.19166026e+04]
New Q values:  [   37.74111519  -168.92307549  4726.37825453 24781.26363779]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  30.74201712 -289.59534477 -813.89717213 -251.53897752]
------
Step:2, Action:North
State  261
Old Q Values:  [  30.74201712 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  59.06127689 -289.59534477 -813.89717213 -251.53897752]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 22.25138791  20.83123995 137.8815668    0.        ]
------
Step:3, Action:East
State  181
Old Q Values:  [   13.85659648    20.26663301 -2593.46256533  -180.6       ]
New Q values:  [  13.85659648   20.26663301  -29.80655911 -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3340.59489008 -3909.58186816     0.        ]
------
Step:4, Action:South
State  193
Old Q Values:  [-5922.26708831  3340.59489008 -3909.58186816     0.        ]
New Q values:  [-5922.26708831  8770.01704737 -3909.58186816     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4726.37825453 24781.26363779]
------
Step:5, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  4726.37825453 24781.26363779]
New Q values:  [  37.74111519 -168.92307549 4726.37825453 9929.62383818]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  59.06127689 -289.59534477 -813.89717213 -251.53897752]
------
Step:6, Action:North
State  261
Old Q Values:  [  59.06127689 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  64.3889808  -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 22.25138791  20.83123995 137.8815668    0.        ]
------
Step:7, Action:East
State  181
Old Q Values:  [  13.85659648   20.26663301  -29.80655911 -180.6       ]
New Q values:  [  13.85659648   20.26663301 2618.48249057 -180.6       ]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8770.01704737 -3909.58186816     0.        ]
------
Step:8, Action:South
State  195
Old Q Values:  [ 38.85388605 258.19109515  15.26652123   0.        ]
New Q values:  [  38.85388605 3081.56358951   15.26652123    0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4726.37825453 9929.62383818]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  5386.75626039  8601.15306324]
New Q values:  [-2527.46239811 -6212.61234477  5386.75626039  3459.17791953]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  64.3889808  -289.59534477 -813.89717213 -251.53897752]
------
Step:10, Action:North
State  260
Old Q Values:  [-8262.18272028 -6457.4598       303.59541934 -2702.17995449]
New Q values:  [-3234.31081479 -6457.4598       303.59541934 -2702.17995449]
Reward: -1  Episode Reward:  20
xxxxx
x...x
xa .x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   237.20757775     0.        ]
------
Step:11, Action:East
State  180
Old Q Values:  [-1367.02476015   138.68990048   278.71734033     0.        ]
New Q values:  [-1367.02476015   138.68990048  8352.5063942      0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x...x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.74720649e+04 1.07778453e+03 0.00000000e+00]
------
Step:12, Action:South
State  196
Old Q Values:  [-2469.90645144   573.25701621    73.71095389     0.        ]
New Q values:  [-2469.90645144  2715.89258013    73.71095389     0.        ]
Reward: -1  Episode Reward:  18
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881  233.33860649]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  5386.75626039  3459.17791953]
New Q values:  [-2527.46239811 -6212.61234477  4664.14368364  3459.17791953]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8366.8039316  -8656.02923281 -7525.7277781   2610.06133326]
------
Step:14, Action:North
State  288
Old Q Values:  [ 8366.8039316  -8656.02923281 -7525.7277781   2610.06133326]
New Q values:  [ 9278.82757534 -8656.02923281 -7525.7277781   2610.06133326]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19755.68667567  5232.01858587 -2651.70614553 -2227.14232413]
------
Step:15, Action:North
State  208
Old Q Values:  [19755.68667567  5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [20604.97664362  5232.01858587 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  35
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038   -47.53253566  -180.00807518 42324.33991116]
------
Step:16, Action:West
State  130
Old Q Values:  [18220.41077038   -47.53253566  -180.00807518 42324.33991116]
New Q values:  [18220.41077038   -47.53253566  -180.00807518 42172.59014959]
Reward: 9  Episode Reward:  44
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  8.41248473e+04]
------
Step:17, Action:West
State  115
Old Q Values:  [-180.6          59.78986392    6.47656656    0.        ]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  7.05907948e+04]
Reward: 100009  Episode Reward:  100053
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  64.3889808  -289.59534477 -813.89717213 -251.53897752]
------
Step:1, Action:North
State  261
Old Q Values:  [  64.3889808  -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  72.52006236 -289.59534477 -813.89717213 -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 22.25138791  20.83123995 137.8815668    0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 22.25138791  20.83123995 137.8815668    0.        ]
New Q values:  [ 22.25138791  20.83123995 258.5851301    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  6.60108345e+02  0.00000000e+00]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  6.60108345e+02  0.00000000e+00]
New Q values:  [-6.00000000e-01  1.06176528e+02  6.71884988e+03  0.00000000e+00]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.14980218e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
------
Step:4, Action:North
State  210
Old Q Values:  [ 2.14980218e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 2.12563858e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038   -47.53253566  -180.00807518 42172.59014959]
------
Step:5, Action:West
State  130
Old Q Values:  [18220.41077038   -47.53253566  -180.00807518 42172.59014959]
New Q values:  [18220.41077038   -47.53253566  -180.00807518 42111.89024496]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  8.41248473e+04]
------
Step:6, Action:West
State  126
Old Q Values:  [ 0.         65.95371601  5.9800666   8.15865135]
New Q values:  [ 0.         65.95371601  5.9800666  29.20046932]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   68.45669594   20.879358   -272.09726687]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869   68.45669594   20.879358   -272.09726687]
New Q values:  [-177.44732869   44.99153751   20.879358   -272.09726687]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   60.6961971     10.58321034 -7432.49225373  -244.98066897]
------
Step:8, Action:North
State  189
Old Q Values:  [   60.6961971     10.58321034 -7432.49225373  -244.98066897]
New Q values:  [   30.86611518    10.58321034 -7432.49225373  -244.98066897]
Reward: -1  Episode Reward:  52
xxxxx
xag x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    23.9587878  -6443.21937065  -180.6       ]
------
Step:9, Action:South
State  109
Old Q Values:  [ -241.10880094    23.9587878  -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    18.24334968 -6443.21937065  -180.6       ]
Reward: -1  Episode Reward:  51
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   30.86611518    10.58321034 -7432.49225373  -244.98066897]
------
Step:10, Action:North
State  189
Old Q Values:  [   30.86611518    10.58321034 -7432.49225373  -244.98066897]
New Q values:  [   17.21945097    10.58321034 -7432.49225373  -244.98066897]
Reward: -1  Episode Reward:  50
xxxxx
xag x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    18.24334968 -6443.21937065  -180.6       ]
------
Step:11, Action:South
State  109
Old Q Values:  [ -241.10880094    18.24334968 -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    11.86317516 -6443.21937065  -180.6       ]
Reward: -1  Episode Reward:  49
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   17.21945097    10.58321034 -7432.49225373  -244.98066897]
------
Step:12, Action:North
State  189
Old Q Values:  [   17.21945097    10.58321034 -7432.49225373  -244.98066897]
New Q values:  [    9.84673294    10.58321034 -7432.49225373  -244.98066897]
Reward: -1  Episode Reward:  48
xxxxx
xag x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    11.86317516 -6443.21937065  -180.6       ]
------
Step:13, Action:South
State  111
Old Q Values:  [-177.44732869   44.99153751   20.879358   -272.09726687]
New Q values:  [-177.44732869   20.57157811   20.879358   -272.09726687]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294    10.58321034 -7432.49225373  -244.98066897]
------
Step:14, Action:South
State  191
Old Q Values:  [   3.06655861 1435.41687165   46.04536991    0.        ]
New Q values:  [  3.06655861 595.32276737  46.04536991   0.        ]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  72.52006236 -289.59534477 -813.89717213 -251.53897752]
------
Step:15, Action:North
State  257
Old Q Values:  [5920.2710379  -180.6        3852.81245667 4453.74493219]
New Q values:  [29207.47486505  -180.6         3852.81245667  4453.74493219]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         63160.05960754 89466.55483296     0.        ]
------
Step:16, Action:East
State  189
Old Q Values:  [    9.84673294    10.58321034 -7432.49225373  -244.98066897]
New Q values:  [    9.84673294    10.58321034 -2971.32675163  -244.98066897]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         7.56716619]
------
Step:17, Action:West
State  195
Old Q Values:  [  38.85388605 3081.56358951   15.26652123    0.        ]
New Q values:  [3.88538861e+01 3.08156359e+03 1.52665212e+01 2.68393664e+04]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         63160.05960754 89466.55483296     0.        ]
------
Step:18, Action:East
State  189
Old Q Values:  [    9.84673294    10.58321034 -2971.32675163  -244.98066897]
New Q values:  [    9.84673294    10.58321034 -1186.8605508   -244.98066897]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         7.56716619]
------
Step:19, Action:West
State  205
Old Q Values:  [0.         0.         0.         7.56716619]
New Q values:  [0.         0.         0.         5.60182958]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294    10.58321034 -1186.8605508   -244.98066897]
------
Step:20, Action:South
State  189
Old Q Values:  [    9.84673294    10.58321034 -1186.8605508   -244.98066897]
New Q values:  [    9.84673294    25.38930285 -1186.8605508   -244.98066897]
Reward: -1  Episode Reward:  40
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  72.52006236 -289.59534477 -813.89717213 -251.53897752]
------
Step:21, Action:North
State  261
Old Q Values:  [  72.52006236 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [  36.0248158  -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294    25.38930285 -1186.8605508   -244.98066897]
------
Step:22, Action:South
State  189
Old Q Values:  [    9.84673294    25.38930285 -1186.8605508   -244.98066897]
New Q values:  [    9.84673294    20.36316588 -1186.8605508   -244.98066897]
Reward: -1  Episode Reward:  38
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  36.0248158  -289.59534477 -813.89717213 -251.53897752]
------
Step:23, Action:North
State  260
Old Q Values:  [-3234.31081479 -6457.4598       303.59541934 -2702.17995449]
New Q values:  [ -968.75105512 -6457.4598       303.59541934 -2702.17995449]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263    32.78037903  1085.24423599     0.        ]
------
Step:24, Action:East
State  188
Old Q Values:  [-6523.78898263    32.78037903  1085.24423599     0.        ]
New Q values:  [-6523.78898263    32.78037903  1187.14589491     0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[ 0.00000000e+00  2.51216067e+03 -3.21532439e-01  1.99625001e+02]
------
Step:25, Action:South
State  204
Old Q Values:  [ 0.00000000e+00  2.51216067e+03 -3.21532439e-01  1.99625001e+02]
New Q values:  [ 0.00000000e+00  3.49145404e+03 -3.21532439e-01  1.99625001e+02]
Reward: -1  Episode Reward:  35
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881  233.33860649]
------
Step:26, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  4664.14368364  3459.17791953]
New Q values:  [-2527.46239811 -6212.61234477 64654.70574606  3459.17791953]
Reward: 100009  Episode Reward:  100044
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6          20.59237446]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          20.59237446]
New Q values:  [-180.6           2.88323468 -180.6          17.39083545]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.25129522e+01]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.25129522e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.66689883e+01]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   20.57157811   20.879358   -272.09726687]
------
Step:3, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573    3.42361338 -252.78192178]
New Q values:  [-252.35169558   11.28108573   30.56447173 -252.78192178]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  2.67620961e+00  9.93167546e+01]
------
Step:4, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.66689883e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.23314027e+01]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   20.57157811   20.879358   -272.09726687]
------
Step:5, Action:East
State  110
Old Q Values:  [ -180.6        -5648.89406634     9.43465686  -180.6       ]
New Q values:  [ -180.6        -5648.89406634     6.87328356  -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.23314027e+01]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.23314027e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  6.39454615e+00]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -5648.89406634     6.87328356  -180.6       ]
------
Step:7, Action:East
State  110
Old Q Values:  [ -180.6        -5648.89406634     6.87328356  -180.6       ]
New Q values:  [-1.80600000e+02 -5.64889407e+03  4.06767727e+00 -1.80600000e+02]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x...x
xg .x
xxxxx
Step:8, Action:South
State  126
Old Q Values:  [ 0.         65.95371601  5.9800666  29.20046932]
New Q values:  [   0.         2047.43645092    5.9800666    29.20046932]
Reward: 9  Episode Reward:  22
xxxxx
x   x
x.a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  6.71884988e+03  0.00000000e+00]
------
Step:9, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  6.71884988e+03  0.00000000e+00]
New Q values:  [-6.00000000e-01  1.06176528e+02  2.71192896e+03  0.00000000e+00]
Reward: 9  Episode Reward:  31
xxxxx
x   x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   4.59845327 -610.30346672    0.           63.2966801 ]
------
Step:10, Action:West
State  216
Old Q Values:  [   27.91523986   -51.76838521 -6170.35693855   941.23371665]
New Q values:  [   27.91523986   -51.76838521 -6170.35693855  2617.51294473]
Reward: -10001  Episode Reward:  -9970
xxxxx
x   x
x.g x
x. .x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  36.0248158  -289.59534477 -813.89717213 -251.53897752]
------
Step:1, Action:North
State  261
Old Q Values:  [  36.0248158  -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 805.35467349 -289.59534477 -813.89717213 -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  13.85659648   20.26663301 2618.48249057 -180.6       ]
------
Step:2, Action:East
State  181
Old Q Values:  [  13.85659648   20.26663301 2618.48249057 -180.6       ]
New Q values:  [  13.85659648   20.26663301 3683.79811044 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x a.x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8770.01704737 -3909.58186816     0.        ]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831  8770.01704737 -3909.58186816     0.        ]
New Q values:  [-5922.26708831  6492.2939704  -3909.58186816     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4726.37825453 9929.62383818]
------
Step:4, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4726.37825453 9929.62383818]
New Q values:  [  37.74111519 -168.92307549 4726.37825453 4212.85593732]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x  .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 805.35467349 -289.59534477 -813.89717213 -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [ 805.35467349 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 399.11740843 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  25
xxxxx
x...x
xa .x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 22.25138791  20.83123995 258.5851301    0.        ]
------
Step:6, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534   237.20757775     0.        ]
New Q values:  [    0.         -5969.29177534   907.86171811     0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x a.x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  2.71192896e+03  0.00000000e+00]
------
Step:7, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  2.71192896e+03  0.00000000e+00]
New Q values:  [-6.00000000e-01  1.06176528e+02  7.46708731e+03  0.00000000e+00]
Reward: 9  Episode Reward:  33
xxxxx
x...x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.12563858e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
------
Step:8, Action:North
State  210
Old Q Values:  [ 2.12563858e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 2.11415214e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  42
xxxxx
x..ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038   -47.53253566  -180.00807518 42111.89024496]
------
Step:9, Action:West
State  130
Old Q Values:  [18220.41077038   -47.53253566  -180.00807518 42111.89024496]
New Q values:  [18220.41077038   -47.53253566  -180.00807518 42087.61028311]
Reward: 9  Episode Reward:  51
xxxxx
x.a x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  8.41248473e+04]
------
Step:10, Action:West
State  114
Old Q Values:  [-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  8.41248473e+04]
New Q values:  [-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  9.37391841e+04]
Reward: 100009  Episode Reward:  100060
xxxxx
xa  x
x   x
xg  x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 399.11740843 -289.59534477 -813.89717213 -251.53897752]
------
Step:1, Action:North
State  261
Old Q Values:  [ 399.11740843 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [1270.1863965  -289.59534477 -813.89717213 -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  13.85659648   20.26663301 3683.79811044 -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [  13.85659648   20.26663301 3683.79811044 -180.6       ]
New Q values:  [  13.85659648  388.56257215 3683.79811044 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1270.1863965  -289.59534477 -813.89717213 -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [1270.1863965  -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 585.05009763 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 22.25138791  20.83123995 258.5851301    0.        ]
------
Step:4, Action:East
State  183
Old Q Values:  [ 22.25138791  20.83123995 258.5851301    0.        ]
New Q values:  [  22.25138791   20.83123995 2348.96024615    0.        ]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  7.46708731e+03  0.00000000e+00]
------
Step:5, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.74720649e+04 1.07778453e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 2.74720649e+04 6.61800680e+03 0.00000000e+00]
Reward: 9  Episode Reward:  25
xxxxx
x...x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20604.97664362  5232.01858587 -2651.70614553 -2227.14232413]
------
Step:6, Action:North
State  208
Old Q Values:  [20604.97664362  5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [20873.67374238  5232.01858587 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  34
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038   -47.53253566  -180.00807518 42087.61028311]
------
Step:7, Action:West
State  130
Old Q Values:  [18220.41077038   -47.53253566  -180.00807518 42087.61028311]
New Q values:  [18220.41077038   -47.53253566  -180.00807518 44962.1993481 ]
Reward: 9  Episode Reward:  43
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  9.37391841e+04]
------
Step:8, Action:West
State  126
Old Q Values:  [   0.         2047.43645092    5.9800666    29.20046932]
New Q values:  [   0.         2047.43645092    5.9800666    18.30049091]
Reward: 9  Episode Reward:  52
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -5.64889407e+03  4.06767727e+00 -1.80600000e+02]
------
Step:9, Action:East
State  111
Old Q Values:  [-177.44732869   20.57157811   20.879358   -272.09726687]
New Q values:  [-177.44732869   20.57157811  621.98267847 -272.09726687]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.         2047.43645092    5.9800666    18.30049091]
------
Step:10, Action:West
State  127
Old Q Values:  [ 0.          1.67014986  5.38992087 15.32541077]
New Q values:  [  0.           1.67014986   5.38992087 192.12496785]
Reward: -1  Episode Reward:  50
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   20.57157811  621.98267847 -272.09726687]
------
Step:11, Action:East
State  99
Old Q Values:  [    0.         35284.64933898     0.             0.        ]
New Q values:  [    0.         35284.64933898 21176.63844051     0.        ]
Reward: -1  Episode Reward:  49
xxxxx
x a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  7.05907948e+04]
------
Step:12, Action:West
State  127
Old Q Values:  [  0.           1.67014986   5.38992087 192.12496785]
New Q values:  [  0.           1.67014986   5.38992087 262.84479068]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   20.57157811  621.98267847 -272.09726687]
------
Step:13, Action:East
State  111
Old Q Values:  [-177.44732869   20.57157811  621.98267847 -272.09726687]
New Q values:  [-177.44732869   20.57157811  862.42400667 -272.09726687]
Reward: -1  Episode Reward:  47
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.         2047.43645092    5.9800666    18.30049091]
------
Step:14, Action:West
State  127
Old Q Values:  [  0.           1.67014986   5.38992087 262.84479068]
New Q values:  [  0.           1.67014986   5.38992087 363.26511827]
Reward: -1  Episode Reward:  46
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   20.57157811  862.42400667 -272.09726687]
------
Step:15, Action:East
State  99
Old Q Values:  [    0.         35284.64933898 21176.63844051     0.        ]
New Q values:  [    0.         35284.64933898 29647.29381671     0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  7.05907948e+04]
------
Step:16, Action:West
State  127
Old Q Values:  [  0.           1.67014986   5.38992087 363.26511827]
New Q values:  [  0.           1.67014986   5.38992087 403.43324931]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   20.57157811  862.42400667 -272.09726687]
------
Step:17, Action:East
State  109
Old Q Values:  [ -241.10880094    11.86317516 -6443.21937065  -180.6       ]
New Q values:  [ -241.10880094    11.86317516 -2577.56330487  -180.6       ]
Reward: -1  Episode Reward:  43
xxxxx
x agx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         1.08147795]
------
Step:18, Action:West
State  124
Old Q Values:  [ 0.00000000e+00  5.40000000e+00 -5.98640012e+03  9.10538211e-01]
New Q values:  [ 0.00000000e+00  5.40000000e+00 -5.98640012e+03  3.32316783e+00]
Reward: -1  Episode Reward:  42
xxxxx
xag x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    11.86317516 -2577.56330487  -180.6       ]
------
Step:19, Action:South
State  109
Old Q Values:  [ -241.10880094    11.86317516 -2577.56330487  -180.6       ]
New Q values:  [ -241.10880094    10.25421983 -2577.56330487  -180.6       ]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294    20.36316588 -1186.8605508   -244.98066897]
------
Step:20, Action:South
State  189
Old Q Values:  [    9.84673294    20.36316588 -1186.8605508   -244.98066897]
New Q values:  [    9.84673294   183.06029564 -1186.8605508   -244.98066897]
Reward: -1  Episode Reward:  40
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 585.05009763 -289.59534477 -813.89717213 -251.53897752]
------
Step:21, Action:North
State  261
Old Q Values:  [ 585.05009763 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 288.33812774 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   183.06029564 -1186.8605508   -244.98066897]
------
Step:22, Action:South
State  189
Old Q Values:  [    9.84673294   183.06029564 -1186.8605508   -244.98066897]
New Q values:  [    9.84673294   159.12555658 -1186.8605508   -244.98066897]
Reward: -1  Episode Reward:  38
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 288.33812774 -289.59534477 -813.89717213 -251.53897752]
------
Step:23, Action:North
State  261
Old Q Values:  [ 288.33812774 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 162.47291807 -289.59534477 -813.89717213 -251.53897752]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   159.12555658 -1186.8605508   -244.98066897]
------
Step:24, Action:South
State  188
Old Q Values:  [-6523.78898263    32.78037903  1187.14589491     0.        ]
New Q values:  [-6523.78898263   103.59077741  1187.14589491     0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -968.75105512 -6457.4598       303.59541934 -2702.17995449]
------
Step:25, Action:East
State  261
Old Q Values:  [ 162.47291807 -289.59534477 -813.89717213 -251.53897752]
New Q values:  [ 162.47291807 -289.59534477 2161.03090479 -251.53897752]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881  233.33860649]
------
Step:26, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 64654.70574606  3459.17791953]
New Q values:  [-2527.46239811 -6212.61234477 88650.93057103  3459.17791953]
Reward: 100009  Episode Reward:  100044
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20873.67374238  5232.01858587 -2651.70614553 -2227.14232413]
------
Step:1, Action:North
State  216
Old Q Values:  [   27.91523986   -51.76838521 -6170.35693855  2617.51294473]
New Q values:  [   21.78334658   -51.76838521 -6170.35693855  2617.51294473]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6          17.39083545]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          17.39083545]
New Q values:  [-180.6           2.88323468 -180.6           8.27469802]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  6.39454615e+00]
------
Step:3, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  6.39454615e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.71271600e+01]
Reward: 9  Episode Reward:  17
xxxxx
xa  x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   30.56447173 -252.78192178]
------
Step:4, Action:East
State  109
Old Q Values:  [ -241.10880094    10.25421983 -2577.56330487  -180.6       ]
New Q values:  [ -241.10880094    10.25421983 -6367.52023856  -180.6       ]
Reward: -10001  Episode Reward:  -9984
xxxxx
x g x
x.. x
x...x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 162.47291807 -289.59534477 2161.03090479 -251.53897752]
------
Step:1, Action:North
State  260
Old Q Values:  [ -968.75105512 -6457.4598       303.59541934 -2702.17995449]
New Q values:  [ -109.74190661 -6457.4598       303.59541934 -2702.17995449]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   907.86171811     0.        ]
------
Step:2, Action:East
State  180
Old Q Values:  [-1367.02476015   138.68990048  8352.5063942      0.        ]
New Q values:  [-1367.02476015   138.68990048 11588.02201575     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.74720649e+04 6.61800680e+03 0.00000000e+00]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144  2715.89258013    73.71095389     0.        ]
New Q values:  [-2469.90645144  3572.9468057     73.71095389     0.        ]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881  233.33860649]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 88650.93057103  3459.17791953]
New Q values:  [-2527.46239811 -6212.61234477 38249.42050101  3459.17791953]
Reward: 9  Episode Reward:  26
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9278.82757534 -8656.02923281 -7525.7277781   2610.06133326]
------
Step:5, Action:North
State  288
Old Q Values:  [ 9278.82757534 -8656.02923281 -7525.7277781   2610.06133326]
New Q values:  [10059.38744459 -8656.02923281 -7525.7277781   2610.06133326]
Reward: 9  Episode Reward:  35
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.11415214e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
------
Step:6, Action:North
State  210
Old Q Values:  [ 2.11415214e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 2.19506684e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  44
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038   -47.53253566  -180.00807518 44962.1993481 ]
------
Step:7, Action:West
State  130
Old Q Values:  [18220.41077038   -47.53253566  -180.00807518 44962.1993481 ]
New Q values:  [18220.41077038   -47.53253566  -180.00807518 39167.51817975]
Reward: 9  Episode Reward:  53
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  7.05907948e+04]
------
Step:8, Action:West
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  7.05907948e+04]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  9.88271127e+04]
Reward: 100009  Episode Reward:  100062
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6           8.27469802]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6           8.27469802]
New Q values:  [-180.6           2.88323468 -180.6          13.8480272 ]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.71271600e+01]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.71271600e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.34711672e+01]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -5.64889407e+03  4.06767727e+00 -1.80600000e+02]
------
Step:3, Action:East
State  110
Old Q Values:  [-1.80600000e+02 -5.64889407e+03  4.06767727e+00 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -5.64889407e+03  5.06842106e+00 -1.80600000e+02]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.34711672e+01]
------
Step:4, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  1.34711672e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  6.30899319e+00]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -5.64889407e+03  5.06842106e+00 -1.80600000e+02]
------
Step:5, Action:East
State  108
Old Q Values:  [-8463.16477134   632.25076219    16.09371094     0.        ]
New Q values:  [-8463.16477134   632.25076219   669.94256777     0.        ]
Reward: -1  Episode Reward:  15
xxxxx
xga x
x...x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686        2213.68361131     11.74545092]
------
Step:6, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  4.77456255e+00  6.30899319e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  5.46423318e+00  6.30899319e+00]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6          13.8480272 ]
------
Step:7, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          13.8480272 ]
New Q values:  [-180.6           2.88323468 -180.6           6.83190884]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  5.46423318e+00  6.30899319e+00]
------
Step:8, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  5.46423318e+00  6.30899319e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  5.46423318e+00  3.44412359e+00]
Reward: -1  Episode Reward:  12
xxxxx
xa  x
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -5.64889407e+03  5.06842106e+00 -1.80600000e+02]
------
Step:9, Action:East
State  110
Old Q Values:  [-1.80600000e+02 -5.64889407e+03  5.06842106e+00 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -5.64889407e+03  3.06663838e+00 -1.80600000e+02]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  5.46423318e+00  3.44412359e+00]
------
Step:10, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  5.46423318e+00  3.44412359e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  3.63526592e+00  3.44412359e+00]
Reward: -1  Episode Reward:  10
xxxxx
x  ax
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6           6.83190884]
------
Step:11, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6           6.83190884]
New Q values:  [-180.6           2.88323468 -180.6           3.22334331]
Reward: -1  Episode Reward:  9
xxxxx
x a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  3.63526592e+00  3.44412359e+00]
------
Step:12, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  3.63526592e+00  3.44412359e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  3.44412359e+00]
Reward: -1  Episode Reward:  8
xxxxx
x  ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6           3.22334331]
------
Step:13, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6           3.22334331]
New Q values:  [-180.6           2.88323468 -180.6           1.7225744 ]
Reward: -1  Episode Reward:  7
xxxxx
x a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  3.44412359e+00]
------
Step:14, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  3.44412359e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  2.59504851e+02]
Reward: -1  Episode Reward:  6
xxxxx
xa  x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   20.57157811  862.42400667 -272.09726687]
------
Step:15, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   30.56447173 -252.78192178]
New Q values:  [-252.35169558   11.28108573   89.47724412 -252.78192178]
Reward: -1  Episode Reward:  5
xxxxx
x a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  2.59504851e+02]
------
Step:16, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686        2213.68361131     11.74545092]
New Q values:  [-1.01561177e+04 -5.99568600e+03  2.21368361e+03  7.17444631e+00]
Reward: -1  Episode Reward:  4
xxxxx
xag x
x...x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094    10.25421983 -6367.52023856  -180.6       ]
------
Step:17, Action:South
State  111
Old Q Values:  [-177.44732869   20.57157811  862.42400667 -272.09726687]
New Q values:  [-177.44732869 1118.76806437  862.42400667 -272.09726687]
Reward: 9  Episode Reward:  13
xxxxx
x   x
xag.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  13.85659648  388.56257215 3683.79811044 -180.6       ]
------
Step:18, Action:South
State  181
Old Q Values:  [  13.85659648  388.56257215 3683.79811044 -180.6       ]
New Q values:  [  13.85659648  809.1343003  3683.79811044 -180.6       ]
Reward: 9  Episode Reward:  22
xxxxx
x g x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 162.47291807 -289.59534477 2161.03090479 -251.53897752]
------
Step:19, Action:East
State  261
Old Q Values:  [ 162.47291807 -289.59534477 2161.03090479 -251.53897752]
New Q values:  [ 162.47291807 -289.59534477 3351.00213556 -251.53897752]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881  233.33860649]
------
Step:20, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 38249.42050101  3459.17791953]
New Q values:  [-2527.46239811 -6212.61234477 18322.98443378  3459.17791953]
Reward: 9  Episode Reward:  30
xxxxx
x   x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10059.38744459 -8656.02923281 -7525.7277781   2610.06133326]
------
Step:21, Action:North
State  288
Old Q Values:  [10059.38744459 -8656.02923281 -7525.7277781   2610.06133326]
New Q values:  [10291.25710055 -8656.02923281 -7525.7277781   2610.06133326]
Reward: 9  Episode Reward:  39
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20873.67374238  5232.01858587 -2651.70614553 -2227.14232413]
------
Step:22, Action:North
State  208
Old Q Values:  [20873.67374238  5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [11364.9729097   5232.01858587 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  38
xxxxx
xg ax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  1.00536780e+04 -6.24561866e+03  1.65446841e+00]
------
Step:23, Action:South
State  136
Old Q Values:  [-6.18060000e+03  1.00536780e+04 -6.24561866e+03  1.65446841e+00]
New Q values:  [-6.18060000e+03  7.43036309e+03 -6.24561866e+03  1.65446841e+00]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11364.9729097   5232.01858587 -2651.70614553 -2227.14232413]
------
Step:24, Action:North
State  208
Old Q Values:  [11364.9729097   5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [  774.49809086  5232.01858587 -2651.70614553 -2227.14232413]
Reward: -10001  Episode Reward:  -9964
xxxxx
x  gx
x . x
x   x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1118.76806437  862.42400667 -272.09726687]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094    10.25421983 -6367.52023856  -180.6       ]
New Q values:  [ -241.10880094  1114.64112106 -6367.52023856  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x .gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  13.85659648  809.1343003  3683.79811044 -180.6       ]
------
Step:2, Action:East
State  181
Old Q Values:  [  13.85659648  809.1343003  3683.79811044 -180.6       ]
New Q values:  [  13.85659648  809.1343003  1977.55008606 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.66210281e+03 -9.35708462e+03  2.40000000e-02]
------
Step:3, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.74720649e+04 6.61800680e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 1.64911213e+04 6.61800680e+03 0.00000000e+00]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 18322.98443378  3459.17791953]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 18322.98443378  3459.17791953]
New Q values:  [-2527.46239811 -6212.61234477 10421.97090368  3459.17791953]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10291.25710055 -8656.02923281 -7525.7277781   2610.06133326]
------
Step:5, Action:North
State  288
Old Q Values:  [10291.25710055 -8656.02923281 -7525.7277781   2610.06133326]
New Q values:  [10701.10334733 -8656.02923281 -7525.7277781   2610.06133326]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.19506684e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
------
Step:6, Action:North
State  208
Old Q Values:  [  774.49809086  5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [12065.45469027  5232.01858587 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  44
xxxxx
x .ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038   -47.53253566  -180.00807518 39167.51817975]
------
Step:7, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6           1.7225744 ]
New Q values:  [-180.6           2.88323468 -180.6          35.88405614]
Reward: 9  Episode Reward:  53
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  2.67620961e+00  9.93167546e+01]
------
Step:8, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  9.93167546e+01]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  6.59698751e+01]
Reward: -1  Episode Reward:  52
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   89.47724412 -252.78192178]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   89.47724412 -252.78192178]
New Q values:  [-252.35169558   11.28108573   54.98186017 -252.78192178]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[-2.53448863e+02 -6.00060000e+03  2.67620961e+00  6.59698751e+01]
------
Step:10, Action:West
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  6.59698751e+01]
New Q values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  4.22825081e+01]
Reward: -1  Episode Reward:  50
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   54.98186017 -252.78192178]
------
Step:11, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   54.98186017 -252.78192178]
New Q values:  [-252.35169558   11.28108573   34.07749649 -252.78192178]
Reward: -1  Episode Reward:  49
xxxxx
x a x
x   x
x. gx
xxxxx
Step:12, Action:South
State  123
Old Q Values:  [-2.53448863e+02 -6.00060000e+03  2.67620961e+00  4.22825081e+01]
New Q values:  [ -253.44886264 -1902.20915811     2.67620961    42.28250808]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.66210281e+03 -9.35708462e+03  2.40000000e-02]
------
Step:13, Action:South
State  200
Old Q Values:  [ 6.28218634e+01  2.29467556e+03  1.93990569e+02 -8.40000000e-01]
New Q values:  [ 6.28218634e+01  4.04386150e+03  1.93990569e+02 -8.40000000e-01]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 10421.97090368  3459.17791953]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 10421.97090368  3459.17791953]
New Q values:  [-2527.46239811 -6212.61234477  7378.51936567  3459.17791953]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10701.10334733 -8656.02923281 -7525.7277781   2610.06133326]
------
Step:15, Action:North
State  288
Old Q Values:  [10701.10334733 -8656.02923281 -7525.7277781   2610.06133326]
New Q values:  [ 5065.09522235 -8656.02923281 -7525.7277781   2610.06133326]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   21.78334658   -51.76838521 -6170.35693855  2617.51294473]
------
Step:16, Action:North
State  216
Old Q Values:  [   21.78334658   -51.76838521 -6170.35693855  2617.51294473]
New Q values:  [   18.87855547   -51.76838521 -6170.35693855  2617.51294473]
Reward: -1  Episode Reward:  44
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           2.88323468 -180.6          35.88405614]
------
Step:17, Action:West
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          35.88405614]
New Q values:  [-180.6           2.88323468 -180.6          91.60507789]
Reward: -1  Episode Reward:  43
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  2.59504851e+02]
------
Step:18, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  2.59504851e+02]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  1.13425190e+02]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   34.07749649 -252.78192178]
------
Step:19, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   34.07749649 -252.78192178]
New Q values:  [-252.35169558   11.28108573   47.05855545 -252.78192178]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  1.13425190e+02]
------
Step:20, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     2.67620961    42.28250808]
New Q values:  [ -253.44886264 -1902.20915811     2.67620961    30.43056987]
Reward: -1  Episode Reward:  40
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   47.05855545 -252.78192178]
------
Step:21, Action:East
State  105
Old Q Values:  [ -180.6            6.66129953 -3895.20980426     0.        ]
New Q values:  [ -180.6            6.66129953 -1557.21741074     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:22, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  2.21368361e+03  7.17444631e+00]
New Q values:  [-1.01561177e+04 -5.99568600e+03  2.21368361e+03  4.26816838e+00]
Reward: -1  Episode Reward:  38
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6            6.66129953 -1557.21741074     0.        ]
------
Step:23, Action:South
State  105
Old Q Values:  [ -180.6            6.66129953 -1557.21741074     0.        ]
New Q values:  [ -180.6          162.4976908  -1557.21741074     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  -0.6           0.          534.77723665 -178.98      ]
------
Step:24, Action:East
State  185
Old Q Values:  [  -0.6           0.          534.77723665 -178.98      ]
New Q values:  [-6.00000000e-01  0.00000000e+00  7.11941737e+02 -1.78980000e+02]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.66210281e+03 -9.35708462e+03  2.40000000e-02]
------
Step:25, Action:South
State  203
Old Q Values:  [ 3.60604218  0.         16.53929541  0.        ]
New Q values:  [   3.60604218 1417.31347636   16.53929541    0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4726.37825453 4212.85593732]
------
Step:26, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4726.37825453 4212.85593732]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  4.72637825e+03  7.04527848e+04]
Reward: 100009  Episode Reward:  100044
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -109.74190661 -6457.4598       303.59541934 -2702.17995449]
------
Step:1, Action:East
State  260
Old Q Values:  [ -109.74190661 -6457.4598       303.59541934 -2702.17995449]
New Q values:  [ -109.74190661 -6457.4598      2614.02794138 -2702.17995449]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881  233.33860649]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  7378.51936567  3459.17791953]
New Q values:  [-2527.46239811 -6212.61234477  4476.33631297  3459.17791953]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5065.09522235 -8656.02923281 -7525.7277781   2610.06133326]
------
Step:3, Action:North
State  288
Old Q Values:  [ 5065.09522235 -8656.02923281 -7525.7277781   2610.06133326]
New Q values:  [ 5651.07449602 -8656.02923281 -7525.7277781   2610.06133326]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12065.45469027  5232.01858587 -2651.70614553 -2227.14232413]
------
Step:4, Action:North
State  208
Old Q Values:  [12065.45469027  5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [16581.83733003  5232.01858587 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038   -47.53253566  -180.00807518 39167.51817975]
------
Step:5, Action:West
State  130
Old Q Values:  [18220.41077038   -47.53253566  -180.00807518 39167.51817975]
New Q values:  [18220.41077038   -47.53253566  -180.00807518 43794.16250675]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  9.37391841e+04]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  1.13425190e+02]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  3.86400495e+02]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1118.76806437  862.42400667 -272.09726687]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869 1118.76806437  862.42400667 -272.09726687]
New Q values:  [-177.44732869 1151.59529959  862.42400667 -272.09726687]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791   20.83123995 2348.96024615    0.        ]
------
Step:8, Action:East
State  177
Old Q Values:  [    0.         62663.07428404  3155.36156975     0.        ]
New Q values:  [    0.         62663.07428404 63215.23281902     0.        ]
Reward: 100009  Episode Reward:  100062
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 162.47291807 -289.59534477 3351.00213556 -251.53897752]
------
Step:1, Action:North
State  260
Old Q Values:  [ -109.74190661 -6457.4598      2614.02794138 -2702.17995449]
New Q values:  [  233.86175279 -6457.4598      2614.02794138 -2702.17995449]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   907.86171811     0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [  22.25138791   20.83123995 2348.96024615    0.        ]
New Q values:  [  22.25138791   20.83123995 3185.11029257    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  7.46708731e+03  0.00000000e+00]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.64911213e+04 6.61800680e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 1.64911213e+04 7.62715392e+03 0.00000000e+00]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16581.83733003  5232.01858587 -2651.70614553 -2227.14232413]
------
Step:4, Action:North
State  208
Old Q Values:  [16581.83733003  5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [19776.38368404  5232.01858587 -2651.70614553 -2227.14232413]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038   -47.53253566  -180.00807518 43794.16250675]
------
Step:5, Action:West
State  130
Old Q Values:  [18220.41077038   -47.53253566  -180.00807518 43794.16250675]
New Q values:  [18220.41077038   -47.53253566  -180.00807518 45644.82023756]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  9.37391841e+04]
------
Step:6, Action:West
State  126
Old Q Values:  [   0.         2047.43645092    5.9800666    18.30049091]
New Q values:  [   0.         2047.43645092    5.9800666   358.19878624]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1151.59529959  862.42400667 -272.09726687]
------
Step:7, Action:South
State  99
Old Q Values:  [    0.         35284.64933898 29647.29381671     0.        ]
New Q values:  [    0.         40953.22618548 29647.29381671     0.        ]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         63160.05960754 89466.55483296     0.        ]
------
Step:8, Action:East
State  189
Old Q Values:  [    9.84673294   159.12555658 -1186.8605508   -244.98066897]
New Q values:  [   9.84673294  159.12555658 -473.66367145 -244.98066897]
Reward: -1  Episode Reward:  52
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         5.60182958]
------
Step:9, Action:West
State  205
Old Q Values:  [0.         0.         0.         5.60182958]
New Q values:  [ 0.          0.          0.         49.37839881]
Reward: -1  Episode Reward:  51
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  159.12555658 -473.66367145 -244.98066897]
------
Step:10, Action:South
State  189
Old Q Values:  [   9.84673294  159.12555658 -473.66367145 -244.98066897]
New Q values:  [   9.84673294 1068.3508633  -473.66367145 -244.98066897]
Reward: -1  Episode Reward:  50
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 162.47291807 -289.59534477 3351.00213556 -251.53897752]
------
Step:11, Action:East
State  261
Old Q Values:  [ 162.47291807 -289.59534477 3351.00213556 -251.53897752]
New Q values:  [ 162.47291807 -289.59534477 3826.99062787 -251.53897752]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881  233.33860649]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  4476.33631297  3459.17791953]
New Q values:  [-2527.46239811 -6212.61234477 63491.256874    3459.17791953]
Reward: 100009  Episode Reward:  100058
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  7.43036309e+03 -6.24561866e+03  1.65446841e+00]
------
Step:1, Action:South
State  138
Old Q Values:  [-180.6           2.88323468 -180.6          91.60507789]
New Q values:  [-180.6         791.80717729 -180.6          91.60507789]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   18.87855547   -51.76838521 -6170.35693855  2617.51294473]
------
Step:2, Action:West
State  216
Old Q Values:  [   18.87855547   -51.76838521 -6170.35693855  2617.51294473]
New Q values:  [   18.87855547   -51.76838521 -6170.35693855  5999.74156016]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.64911213e+04 7.62715392e+03 0.00000000e+00]
------
Step:3, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.64911213e+04 7.62715392e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 2.56492256e+04 7.62715392e+03 0.00000000e+00]
Reward: 9  Episode Reward:  27
xxxxx
x . x
xg  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 63491.256874    3459.17791953]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 63491.256874    3459.17791953]
New Q values:  [-2527.46239811 -6212.61234477 27097.2250984   3459.17791953]
Reward: 9  Episode Reward:  36
xxxxx
x . x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5651.07449602 -8656.02923281 -7525.7277781   2610.06133326]
------
Step:5, Action:North
State  288
Old Q Values:  [ 5651.07449602 -8656.02923281 -7525.7277781   2610.06133326]
New Q values:  [ 8192.74490362 -8656.02923281 -7525.7277781   2610.06133326]
Reward: -1  Episode Reward:  35
xxxxx
x . x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19776.38368404  5232.01858587 -2651.70614553 -2227.14232413]
------
Step:6, Action:North
State  210
Old Q Values:  [ 2.19506684e+04  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 9.01720950e+03  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  34
xxxxx
x .ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         791.80717729 -180.6          91.60507789]
------
Step:7, Action:South
State  130
Old Q Values:  [18220.41077038   -47.53253566  -180.00807518 45644.82023756]
New Q values:  [18220.41077038  2685.54983454  -180.00807518 45644.82023756]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 9.01720950e+03  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
------
Step:8, Action:North
State  210
Old Q Values:  [ 9.01720950e+03  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 3.84382595e+03  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  32
xxxxx
x .ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         791.80717729 -180.6          91.60507789]
------
Step:9, Action:South
State  130
Old Q Values:  [18220.41077038  2685.54983454  -180.00807518 45644.82023756]
New Q values:  [18220.41077038  2672.33589352  -180.00807518 45644.82023756]
Reward: -1  Episode Reward:  31
xxxxx
x . x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3.84382595e+03  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
------
Step:10, Action:South
State  210
Old Q Values:  [ 3.84382595e+03  5.32905320e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 3.84382595e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  30
xxxxx
x . x
x.  x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8192.74490362 -8656.02923281 -7525.7277781   2610.06133326]
------
Step:11, Action:North
State  288
Old Q Values:  [ 8192.74490362 -8656.02923281 -7525.7277781   2610.06133326]
New Q values:  [ 9209.41306666 -8656.02923281 -7525.7277781   2610.06133326]
Reward: -1  Episode Reward:  29
xxxxx
x . x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19776.38368404  5232.01858587 -2651.70614553 -2227.14232413]
------
Step:12, Action:North
State  208
Old Q Values:  [19776.38368404  5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [ 8147.4956268   5232.01858587 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  28
xxxxx
x .ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         791.80717729 -180.6          91.60507789]
------
Step:13, Action:South
State  130
Old Q Values:  [18220.41077038  2672.33589352  -180.00807518 45644.82023756]
New Q values:  [18220.41077038  3512.58304545  -180.00807518 45644.82023756]
Reward: -1  Episode Reward:  27
xxxxx
x . x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8147.4956268   5232.01858587 -2651.70614553 -2227.14232413]
------
Step:14, Action:North
State  208
Old Q Values:  [ 8147.4956268   5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [ 3495.94040391  5232.01858587 -2651.70614553 -2227.14232413]
Reward: -1  Episode Reward:  26
xxxxx
x .ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         791.80717729 -180.6          91.60507789]
------
Step:15, Action:South
State  138
Old Q Values:  [-180.6         791.80717729 -180.6          91.60507789]
New Q values:  [ -180.6        -4114.27155332  -180.6           91.60507789]
Reward: -10001  Episode Reward:  -9975
xxxxx
x . x
x. gx
x.  x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[3.88538861e+01 3.08156359e+03 1.52665212e+01 2.68393664e+04]
------
Step:1, Action:West
State  193
Old Q Values:  [-5922.26708831  6492.2939704  -3909.58186816     0.        ]
New Q values:  [-5922.26708831  6492.2939704  -3909.58186816   598.66502582]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  13.85659648  809.1343003  1977.55008606 -180.6       ]
------
Step:2, Action:East
State  183
Old Q Values:  [  22.25138791   20.83123995 3185.11029257    0.        ]
New Q values:  [  22.25138791   20.83123995 9325.25405199    0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[3.88538861e+01 3.08156359e+03 1.52665212e+01 2.68393664e+04]
------
Step:3, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  7.46708731e+03  0.00000000e+00]
New Q values:  [-6.00000000e-01  1.06176528e+02  7.46708731e+03  2.79697622e+03]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xa .x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791   20.83123995 9325.25405199    0.        ]
------
Step:4, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534   907.86171811     0.        ]
New Q values:  [    0.         -5969.29177534  2602.67088135     0.        ]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x a.x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  7.46708731e+03  2.79697622e+03]
------
Step:5, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  7.46708731e+03  2.79697622e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  4.36888835e+03  2.79697622e+03]
Reward: 9  Episode Reward:  15
xxxxx
x...x
x  ax
x.g x
xxxxx
Step:6, Action:West
State  208
Old Q Values:  [ 3495.94040391  5232.01858587 -2651.70614553 -2227.14232413]
New Q values:  [ 3495.94040391  5232.01858587 -2651.70614553   803.31074191]
Reward: -10001  Episode Reward:  -9986
xxxxx
x...x
x g x
x.. x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  4.72637825e+03  7.04527848e+04]
------
Step:1, Action:West
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  4.72637825e+03  7.04527848e+04]
New Q values:  [   37.74111519  -168.92307549  4726.37825453 29334.61112214]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 162.47291807 -289.59534477 3826.99062787 -251.53897752]
------
Step:2, Action:East
State  261
Old Q Values:  [ 162.47291807 -289.59534477 3826.99062787 -251.53897752]
New Q values:  [  162.47291807  -289.59534477 10330.57958779  -251.53897752]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x...x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4726.37825453 29334.61112214]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 27097.2250984   3459.17791953]
New Q values:  [-2527.46239811 -6212.61234477 27097.2250984   4482.24504415]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  162.47291807  -289.59534477 10330.57958779  -251.53897752]
------
Step:4, Action:North
State  261
Old Q Values:  [  162.47291807  -289.59534477 10330.57958779  -251.53897752]
New Q values:  [ 2867.96538283  -289.59534477 10330.57958779  -251.53897752]
Reward: 9  Episode Reward:  16
xxxxx
x.. x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791   20.83123995 9325.25405199    0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [  22.25138791   20.83123995 9325.25405199    0.        ]
New Q values:  [  22.25138791   20.83123995 5046.168126      0.        ]
Reward: 9  Episode Reward:  25
xxxxx
x.. x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  4.36888835e+03  2.79697622e+03]
------
Step:6, Action:East
State  195
Old Q Values:  [3.88538861e+01 3.08156359e+03 1.52665212e+01 2.68393664e+04]
New Q values:  [   38.85388605  3081.56358951  1388.1600337  26839.36644989]
Reward: 9  Episode Reward:  34
xxxxx
x.. x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3.84382595e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:7, Action:North
State  210
Old Q Values:  [ 3.84382595e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 1.52303765e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  33
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  3512.58304545  -180.00807518 45644.82023756]
------
Step:8, Action:West
State  130
Old Q Values:  [18220.41077038  3512.58304545  -180.00807518 45644.82023756]
New Q values:  [18220.41077038  3512.58304545  -180.00807518 46385.08332988]
Reward: 9  Episode Reward:  42
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  9.37391841e+04]
------
Step:9, Action:West
State  124
Old Q Values:  [ 0.00000000e+00  5.40000000e+00 -5.98640012e+03  3.32316783e+00]
New Q values:  [ 0.00000000e+00  5.40000000e+00 -5.98640012e+03  3.41121603e+02]
Reward: 9  Episode Reward:  51
xxxxx
xag x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1114.64112106 -6367.52023856  -180.6       ]
------
Step:10, Action:South
State  108
Old Q Values:  [-8463.16477134   632.25076219   669.94256777     0.        ]
New Q values:  [-8463.16477134   608.44407335   669.94256777     0.        ]
Reward: -1  Episode Reward:  50
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263   103.59077741  1187.14589491     0.        ]
------
Step:11, Action:East
State  189
Old Q Values:  [   9.84673294 1068.3508633  -473.66367145 -244.98066897]
New Q values:  [   9.84673294 1068.3508633   857.37074372 -244.98066897]
Reward: -1  Episode Reward:  49
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[ 0.00000000e+00  3.49145404e+03 -3.21532439e-01  1.99625001e+02]
------
Step:12, Action:South
State  204
Old Q Values:  [ 0.00000000e+00  3.49145404e+03 -3.21532439e-01  1.99625001e+02]
New Q values:  [ 0.00000000e+00  3.88317139e+03 -3.21532439e-01  1.99625001e+02]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881  233.33860649]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 27097.2250984   4482.24504415]
New Q values:  [-2527.46239811 -6212.61234477 73607.11395936  4482.24504415]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   138.68990048 11588.02201575     0.        ]
------
Step:1, Action:East
State  180
Old Q Values:  [-1367.02476015   138.68990048 11588.02201575     0.        ]
New Q values:  [-1367.02476015   138.68990048  5712.49284801     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  3572.9468057     73.71095389     0.        ]
------
Step:2, Action:South
State  196
Old Q Values:  [-2469.90645144  3572.9468057     73.71095389     0.        ]
New Q values:  [-2469.90645144  3921.76849592    73.71095389     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881  233.33860649]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 73607.11395936  4482.24504415]
New Q values:  [-2527.46239811 -6212.61234477 32211.06950374  4482.24504415]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9209.41306666 -8656.02923281 -7525.7277781   2610.06133326]
------
Step:4, Action:North
State  288
Old Q Values:  [ 9209.41306666 -8656.02923281 -7525.7277781   2610.06133326]
New Q values:  [ 5258.77080242 -8656.02923281 -7525.7277781   2610.06133326]
Reward: 9  Episode Reward:  36
xxxxx
xg..x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3495.94040391  5232.01858587 -2651.70614553   803.31074191]
------
Step:5, Action:South
State  208
Old Q Values:  [ 3495.94040391  5232.01858587 -2651.70614553   803.31074191]
New Q values:  [ 3495.94040391  3669.83867507 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5258.77080242 -8656.02923281 -7525.7277781   2610.06133326]
------
Step:6, Action:North
State  288
Old Q Values:  [ 5258.77080242 -8656.02923281 -7525.7277781   2610.06133326]
New Q values:  [ 3203.85992349 -8656.02923281 -7525.7277781   2610.06133326]
Reward: -1  Episode Reward:  34
xxxxx
xg..x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3495.94040391  3669.83867507 -2651.70614553   803.31074191]
------
Step:7, Action:South
State  208
Old Q Values:  [ 3495.94040391  3669.83867507 -2651.70614553   803.31074191]
New Q values:  [ 3495.94040391  2428.49344708 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  33
xxxxx
x g.x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3203.85992349 -8656.02923281 -7525.7277781   2610.06133326]
------
Step:8, Action:North
State  288
Old Q Values:  [ 3203.85992349 -8656.02923281 -7525.7277781   2610.06133326]
New Q values:  [ 2329.72609057 -8656.02923281 -7525.7277781   2610.06133326]
Reward: -1  Episode Reward:  32
xxxxx
xg..x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3495.94040391  2428.49344708 -2651.70614553   803.31074191]
------
Step:9, Action:North
State  216
Old Q Values:  [   18.87855547   -51.76838521 -6170.35693855  5999.74156016]
New Q values:  [ 2242.06034916   -51.76838521 -6170.35693855  5999.74156016]
Reward: 9  Episode Reward:  41
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6.18060000e+03  7.43036309e+03 -6.24561866e+03  1.65446841e+00]
------
Step:10, Action:South
State  128
Old Q Values:  [ 6067.82062533  1141.37341868 -8652.84        4547.03370611]
New Q values:  [ 6067.82062533  1504.73148864 -8652.84        4547.03370611]
Reward: -1  Episode Reward:  40
xxxxx
x .gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3495.94040391  2428.49344708 -2651.70614553   803.31074191]
------
Step:11, Action:South
State  208
Old Q Values:  [ 3495.94040391  2428.49344708 -2651.70614553   803.31074191]
New Q values:  [ 3495.94040391  1753.81577881 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  39
xxxxx
x . x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2329.72609057 -8656.02923281 -7525.7277781   2610.06133326]
------
Step:12, Action:West
State  288
Old Q Values:  [ 2329.72609057 -8656.02923281 -7525.7277781   2610.06133326]
New Q values:  [ 2329.72609057 -8656.02923281 -7525.7277781   9843.80786995]
Reward: -1  Episode Reward:  38
xxxxx
x . x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4726.37825453 29334.61112214]
------
Step:13, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  4726.37825453 29334.61112214]
New Q values:  [   37.74111519  -168.92307549  4726.37825453 14838.41832519]
Reward: 9  Episode Reward:  47
xxxxx
x . x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2867.96538283  -289.59534477 10330.57958779  -251.53897752]
------
Step:14, Action:East
State  261
Old Q Values:  [ 2867.96538283  -289.59534477 10330.57958779  -251.53897752]
New Q values:  [2867.96538283 -289.59534477 8583.15733267 -251.53897752]
Reward: -1  Episode Reward:  46
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4726.37825453 14838.41832519]
------
Step:15, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 32211.06950374  4482.24504415]
New Q values:  [-2527.46239811 -6212.61234477 32211.06950374 10554.54047718]
Reward: -1  Episode Reward:  45
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[29207.47486505  -180.6         3852.81245667  4453.74493219]
------
Step:16, Action:North
State  261
Old Q Values:  [2867.96538283 -289.59534477 8583.15733267 -251.53897752]
New Q values:  [1739.85117895 -289.59534477 8583.15733267 -251.53897752]
Reward: -1  Episode Reward:  44
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  13.85659648  809.1343003  1977.55008606 -180.6       ]
------
Step:17, Action:East
State  177
Old Q Values:  [    0.         62663.07428404 63215.23281902     0.        ]
New Q values:  [    0.         62663.07428404 32980.26079918     0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.56492256e+04 7.62715392e+03 0.00000000e+00]
------
Step:18, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.56492256e+04 7.62715392e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 1.99224111e+04 7.62715392e+03 0.00000000e+00]
Reward: -1  Episode Reward:  42
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 32211.06950374 10554.54047718]
------
Step:19, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 32211.06950374 10554.54047718]
New Q values:  [-2527.46239811 -6212.61234477 15836.97016248 10554.54047718]
Reward: -1  Episode Reward:  41
xxxxx
x . x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2329.72609057 -8656.02923281 -7525.7277781   9843.80786995]
------
Step:20, Action:West
State  288
Old Q Values:  [ 2329.72609057 -8656.02923281 -7525.7277781   9843.80786995]
New Q values:  [ 2329.72609057 -8656.02923281 -7525.7277781   8688.01419672]
Reward: -1  Episode Reward:  40
xxxxx
x . x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 15836.97016248 10554.54047718]
------
Step:21, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 15836.97016248 10554.54047718]
New Q values:  [-2527.46239811 -6212.61234477  8940.59232401 10554.54047718]
Reward: -1  Episode Reward:  39
xxxxx
x . x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2329.72609057 -8656.02923281 -7525.7277781   8688.01419672]
------
Step:22, Action:North
State  288
Old Q Values:  [ 2329.72609057 -8656.02923281 -7525.7277781   8688.01419672]
New Q values:  [ 5500.4033718  -8656.02923281 -7525.7277781   8688.01419672]
Reward: -1  Episode Reward:  38
xxxxx
x . x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.52303765e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:23, Action:North
State  208
Old Q Values:  [ 3495.94040391  1753.81577881 -2651.70614553   803.31074191]
New Q values:  [15313.30116053  1753.81577881 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  37
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  3512.58304545  -180.00807518 46385.08332988]
------
Step:24, Action:West
State  130
Old Q Values:  [18220.41077038  3512.58304545  -180.00807518 46385.08332988]
New Q values:  [ 18220.41077038   3512.58304545   -180.00807518 106681.18856681]
Reward: 100009  Episode Reward:  100046
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  13.85659648  809.1343003  1977.55008606 -180.6       ]
------
Step:1, Action:East
State  189
Old Q Values:  [   9.84673294 1068.3508633   857.37074372 -244.98066897]
New Q values:  [   9.84673294 1068.3508633  2296.03648861 -244.98066897]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  6492.2939704  -3909.58186816   598.66502582]
------
Step:2, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.99224111e+04 7.62715392e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 1.11407266e+04 7.62715392e+03 0.00000000e+00]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  8940.59232401 10554.54047718]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  8940.59232401 10554.54047718]
New Q values:  [-2527.46239811 -6212.61234477  8940.59232401  6802.16339067]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1739.85117895 -289.59534477 8583.15733267 -251.53897752]
------
Step:4, Action:East
State  261
Old Q Values:  [1739.85117895 -289.59534477 8583.15733267 -251.53897752]
New Q values:  [1739.85117895 -289.59534477  114.84063027 -251.53897752]
Reward: -10001  Episode Reward:  -9974
xxxxx
x...x
x   x
x g.x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  6492.2939704  -3909.58186816   598.66502582]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831  6492.2939704  -3909.58186816   598.66502582]
New Q values:  [-5922.26708831  7053.84308572 -3909.58186816   598.66502582]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x. gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4726.37825453 14838.41832519]
------
Step:2, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  4726.37825453 14838.41832519]
New Q values:  [  37.74111519 -168.92307549 4726.37825453 6462.72268376]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1739.85117895 -289.59534477  114.84063027 -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [1739.85117895 -289.59534477  114.84063027 -251.53897752]
New Q values:  [19500.26275679  -289.59534477   114.84063027  -251.53897752]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         62663.07428404 32980.26079918     0.        ]
------
Step:4, Action:South
State  181
Old Q Values:  [  13.85659648  809.1343003  1977.55008606 -180.6       ]
New Q values:  [  13.85659648 6173.13254716 1977.55008606 -180.6       ]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[19500.26275679  -289.59534477   114.84063027  -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [19500.26275679  -289.59534477   114.84063027  -251.53897752]
New Q values:  [9313.35554052 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791   20.83123995 5046.168126      0.        ]
------
Step:6, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  2602.67088135     0.        ]
New Q values:  [    0.         -5969.29177534  2351.13485775     0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  4.36888835e+03  2.79697622e+03]
------
Step:7, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  4.36888835e+03  2.79697622e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  6.32206828e+03  2.79697622e+03]
Reward: 9  Episode Reward:  33
xxxxx
x.. x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.52303765e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:8, Action:North
State  210
Old Q Values:  [ 1.52303765e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 3.80959072e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  32
xxxxx
x..ax
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 18220.41077038   3512.58304545   -180.00807518 106681.18856681]
------
Step:9, Action:West
State  130
Old Q Values:  [ 18220.41077038   3512.58304545   -180.00807518 106681.18856681]
New Q values:  [18220.41077038  3512.58304545  -180.00807518 70799.63066158]
Reward: 9  Episode Reward:  41
xxxxx
x.a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  9.37391841e+04]
------
Step:10, Action:West
State  126
Old Q Values:  [   0.         2047.43645092    5.9800666   358.19878624]
New Q values:  [   0.         2047.43645092    5.9800666   494.15810438]
Reward: 9  Episode Reward:  50
xxxxx
xa  x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1151.59529959  862.42400667 -272.09726687]
------
Step:11, Action:South
State  111
Old Q Values:  [-177.44732869 1151.59529959  862.42400667 -272.09726687]
New Q values:  [-177.44732869  638.63495005  862.42400667 -272.09726687]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 595.32276737  46.04536991   0.        ]
------
Step:12, Action:South
State  179
Old Q Values:  [    0.         63160.05960754 89466.55483296     0.        ]
New Q values:  [    0.         34025.66630253 89466.55483296     0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[29207.47486505  -180.6         3852.81245667  4453.74493219]
------
Step:13, Action:North
State  261
Old Q Values:  [9313.35554052 -289.59534477  114.84063027 -251.53897752]
New Q values:  [4413.55316279 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294 1068.3508633  2296.03648861 -244.98066897]
------
Step:14, Action:East
State  189
Old Q Values:  [   9.84673294 1068.3508633  2296.03648861 -244.98066897]
New Q values:  [   9.84673294 1068.3508633   932.62811509 -244.98066897]
Reward: -1  Episode Reward:  46
xxxxx
x  gx
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[ 0.          0.          0.         49.37839881]
------
Step:15, Action:West
State  204
Old Q Values:  [ 0.00000000e+00  3.88317139e+03 -3.21532439e-01  1.99625001e+02]
New Q values:  [ 0.00000000e+00  3.88317139e+03 -3.21532439e-01  3.99755260e+02]
Reward: -1  Episode Reward:  45
xxxxx
x g x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294 1068.3508633   932.62811509 -244.98066897]
------
Step:16, Action:South
State  189
Old Q Values:  [   9.84673294 1068.3508633   932.62811509 -244.98066897]
New Q values:  [   9.84673294 1750.80629416  932.62811509 -244.98066897]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4413.55316279 -289.59534477  114.84063027 -251.53897752]
------
Step:17, Action:North
State  261
Old Q Values:  [4413.55316279 -289.59534477  114.84063027 -251.53897752]
New Q values:  [2290.06315336 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294 1750.80629416  932.62811509 -244.98066897]
------
Step:18, Action:South
State  189
Old Q Values:  [   9.84673294 1750.80629416  932.62811509 -244.98066897]
New Q values:  [   9.84673294 1386.74146367  932.62811509 -244.98066897]
Reward: -1  Episode Reward:  42
xxxxx
x  gx
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2290.06315336 -289.59534477  114.84063027 -251.53897752]
------
Step:19, Action:North
State  261
Old Q Values:  [2290.06315336 -289.59534477  114.84063027 -251.53897752]
New Q values:  [1331.44770045 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294 1386.74146367  932.62811509 -244.98066897]
------
Step:20, Action:South
State  189
Old Q Values:  [   9.84673294 1386.74146367  932.62811509 -244.98066897]
New Q values:  [   9.84673294  953.5308956   932.62811509 -244.98066897]
Reward: -1  Episode Reward:  40
xxxxx
x  gx
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1331.44770045 -289.59534477  114.84063027 -251.53897752]
------
Step:21, Action:North
State  261
Old Q Values:  [1331.44770045 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 818.03834886 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  39
xxxxx
x g x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  953.5308956   932.62811509 -244.98066897]
------
Step:22, Action:South
State  189
Old Q Values:  [   9.84673294  953.5308956   932.62811509 -244.98066897]
New Q values:  [   9.84673294  626.2238629   932.62811509 -244.98066897]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 818.03834886 -289.59534477  114.84063027 -251.53897752]
------
Step:23, Action:North
State  261
Old Q Values:  [ 818.03834886 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 606.40377407 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  37
xxxxx
x g x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  626.2238629   932.62811509 -244.98066897]
------
Step:24, Action:East
State  189
Old Q Values:  [   9.84673294  626.2238629   932.62811509 -244.98066897]
New Q values:  [   9.84673294  626.2238629   387.26476568 -244.98066897]
Reward: -1  Episode Reward:  36
xxxxx
x  gx
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[ 0.          0.          0.         49.37839881]
------
Step:25, Action:West
State  205
Old Q Values:  [ 0.          0.          0.         49.37839881]
New Q values:  [  0.           0.           0.         207.01851839]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  626.2238629   387.26476568 -244.98066897]
------
Step:26, Action:South
State  189
Old Q Values:  [   9.84673294  626.2238629   387.26476568 -244.98066897]
New Q values:  [   9.84673294  431.81067738  387.26476568 -244.98066897]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 606.40377407 -289.59534477  114.84063027 -251.53897752]
------
Step:27, Action:North
State  261
Old Q Values:  [ 606.40377407 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 371.50471284 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  33
xxxxx
x g x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  431.81067738  387.26476568 -244.98066897]
------
Step:28, Action:South
State  189
Old Q Values:  [   9.84673294  431.81067738  387.26476568 -244.98066897]
New Q values:  [   9.84673294  283.5756848   387.26476568 -244.98066897]
Reward: -1  Episode Reward:  32
xxxxx
x  gx
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 371.50471284 -289.59534477  114.84063027 -251.53897752]
------
Step:29, Action:North
State  261
Old Q Values:  [ 371.50471284 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 264.18131484 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  283.5756848   387.26476568 -244.98066897]
------
Step:30, Action:East
State  179
Old Q Values:  [    0.         34025.66630253 89466.55483296     0.        ]
New Q values:  [    0.         34025.66630253 43837.83186815     0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  3081.56358951  1388.1600337  26839.36644989]
------
Step:31, Action:West
State  205
Old Q Values:  [  0.           0.           0.         207.01851839]
New Q values:  [  0.           0.           0.         198.38683706]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  283.5756848   387.26476568 -244.98066897]
------
Step:32, Action:East
State  189
Old Q Values:  [   9.84673294  283.5756848   387.26476568 -244.98066897]
New Q values:  [    9.84673294   283.5756848  -4680.74267672  -244.98066897]
Reward: -10001  Episode Reward:  -9972
xxxxx
x   x
x g x
x  .x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  3081.56358951  1388.1600337  26839.36644989]
------
Step:1, Action:West
State  193
Old Q Values:  [-5922.26708831  7053.84308572 -3909.58186816   598.66502582]
New Q values:  [-5922.26708831  7053.84308572 -3909.58186816  2096.80577447]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  13.85659648 6173.13254716 1977.55008606 -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [  13.85659648 6173.13254716 1977.55008606 -180.6       ]
New Q values:  [  13.85659648 2553.90741331 1977.55008606 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x  .x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 264.18131484 -289.59534477  114.84063027 -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [ 264.18131484 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 871.24474993 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xa gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  13.85659648 2553.90741331 1977.55008606 -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [  13.85659648 2553.90741331 1977.55008606 -180.6       ]
New Q values:  [  13.85659648 1282.3363903  1977.55008606 -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
x  .x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 871.24474993 -289.59534477  114.84063027 -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [ 871.24474993 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 941.16292579 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  15
xxxxx
x...x
xa gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  13.85659648 1282.3363903  1977.55008606 -180.6       ]
------
Step:6, Action:East
State  181
Old Q Values:  [  13.85659648 1282.3363903  1977.55008606 -180.6       ]
New Q values:  [   13.85659648  1282.3363903  -1867.36199304  -180.6       ]
Reward: -10001  Episode Reward:  -9986
xxxxx
x...x
x g.x
x . x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15313.30116053  1753.81577881 -2651.70614553   803.31074191]
------
Step:1, Action:North
State  208
Old Q Values:  [15313.30116053  1753.81577881 -2651.70614553   803.31074191]
New Q values:  [ 6158.20198758  1753.81577881 -2651.70614553   803.31074191]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4114.27155332  -180.6           91.60507789]
------
Step:2, Action:West
State  136
Old Q Values:  [-6.18060000e+03  7.43036309e+03 -6.24561866e+03  1.65446841e+00]
New Q values:  [-6180.6         7430.36308991 -6245.61866138     7.52829833]
Reward: 9  Episode Reward:  18
xxxxx
x.agx
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:3, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     2.67620961    30.43056987]
New Q values:  [ -253.44886264 -1902.20915811     2.67620961    31.68979458]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   47.05855545 -252.78192178]
------
Step:4, Action:East
State  105
Old Q Values:  [ -180.6          162.4976908  -1557.21741074     0.        ]
New Q values:  [-180.6         162.4976908  -622.02045333    0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x agx
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     2.67620961    31.68979458]
New Q values:  [ -253.44886264 -1902.20915811     2.67620961    26.19348447]
Reward: -1  Episode Reward:  25
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   47.05855545 -252.78192178]
------
Step:6, Action:East
State  105
Old Q Values:  [-180.6         162.4976908  -622.02045333    0.        ]
New Q values:  [-180.6         162.4976908  -247.94167037    0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x agx
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:7, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     2.67620961    26.19348447]
New Q values:  [ -253.44886264 -1902.20915811     2.67620961    23.99496042]
Reward: -1  Episode Reward:  23
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   47.05855545 -252.78192178]
------
Step:8, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   47.05855545 -252.78192178]
New Q values:  [-252.35169558   11.28108573   25.42191031 -252.78192178]
Reward: -1  Episode Reward:  22
xxxxx
x a x
x.  x
x..gx
xxxxx
Step:9, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     2.67620961    23.99496042]
New Q values:  [ -253.44886264 -1902.20915811     2.67620961    16.62455726]
Reward: -1  Episode Reward:  21
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   25.42191031 -252.78192178]
------
Step:10, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   25.42191031 -252.78192178]
New Q values:  [-252.35169558   11.28108573  125.48891266 -252.78192178]
Reward: -1  Episode Reward:  20
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  3.86400495e+02]
------
Step:11, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     2.67620961    16.62455726]
New Q values:  [ -253.44886264 -1902.20915811     2.67620961    43.6964967 ]
Reward: -1  Episode Reward:  19
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573  125.48891266 -252.78192178]
------
Step:12, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573  125.48891266 -252.78192178]
New Q values:  [-252.35169558   11.28108573  165.5157136  -252.78192178]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  3.86400495e+02]
------
Step:13, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     2.67620961    43.6964967 ]
New Q values:  [ -253.44886264 -1902.20915811     2.67620961    66.53331276]
Reward: -1  Episode Reward:  17
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573  165.5157136  -252.78192178]
------
Step:14, Action:East
State  105
Old Q Values:  [-180.6         162.4976908  -247.94167037    0.        ]
New Q values:  [-180.6         162.4976908   -98.31015718    0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x agx
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:15, Action:West
State  120
Old Q Values:  [-1.01561177e+04 -5.99568600e+03  2.21368361e+03  4.26816838e+00]
New Q values:  [-10156.11771313  -5995.686        2213.68361131     49.8565746 ]
Reward: -1  Episode Reward:  15
xxxxx
xag x
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         162.4976908   -98.31015718    0.        ]
------
Step:16, Action:South
State  109
Old Q Values:  [ -241.10880094  1114.64112106 -6367.52023856  -180.6       ]
New Q values:  [ -241.10880094   536.32915387 -6367.52023856  -180.6       ]
Reward: 9  Episode Reward:  24
xxxxx
x  gx
xa  x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   283.5756848  -4680.74267672  -244.98066897]
------
Step:17, Action:South
State  189
Old Q Values:  [    9.84673294   283.5756848  -4680.74267672  -244.98066897]
New Q values:  [    9.84673294   401.17915166 -4680.74267672  -244.98066897]
Reward: 9  Episode Reward:  33
xxxxx
x g x
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 941.16292579 -289.59534477  114.84063027 -251.53897752]
------
Step:18, Action:North
State  261
Old Q Values:  [ 941.16292579 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 496.21891581 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  32
xxxxx
x  gx
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   401.17915166 -4680.74267672  -244.98066897]
------
Step:19, Action:South
State  189
Old Q Values:  [    9.84673294   401.17915166 -4680.74267672  -244.98066897]
New Q values:  [    9.84673294   308.73733541 -4680.74267672  -244.98066897]
Reward: -1  Episode Reward:  31
xxxxx
x g x
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 496.21891581 -289.59534477  114.84063027 -251.53897752]
------
Step:20, Action:North
State  261
Old Q Values:  [ 496.21891581 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 290.50876695 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  30
xxxxx
x  gx
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   308.73733541 -4680.74267672  -244.98066897]
------
Step:21, Action:South
State  189
Old Q Values:  [    9.84673294   308.73733541 -4680.74267672  -244.98066897]
New Q values:  [    9.84673294   210.04756425 -4680.74267672  -244.98066897]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 290.50876695 -289.59534477  114.84063027 -251.53897752]
------
Step:22, Action:North
State  261
Old Q Values:  [ 290.50876695 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 178.61777605 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   210.04756425 -4680.74267672  -244.98066897]
------
Step:23, Action:South
State  189
Old Q Values:  [    9.84673294   210.04756425 -4680.74267672  -244.98066897]
New Q values:  [    9.84673294   137.00435851 -4680.74267672  -244.98066897]
Reward: -1  Episode Reward:  27
xxxxx
x g x
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 178.61777605 -289.59534477  114.84063027 -251.53897752]
------
Step:24, Action:North
State  261
Old Q Values:  [ 178.61777605 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 111.94841798 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  26
xxxxx
x  gx
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   137.00435851 -4680.74267672  -244.98066897]
------
Step:25, Action:South
State  189
Old Q Values:  [    9.84673294   137.00435851 -4680.74267672  -244.98066897]
New Q values:  [    9.84673294    88.65393249 -4680.74267672  -244.98066897]
Reward: -1  Episode Reward:  25
xxxxx
x g x
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 111.94841798 -289.59534477  114.84063027 -251.53897752]
------
Step:26, Action:East
State  260
Old Q Values:  [  233.86175279 -6457.4598      2614.02794138 -2702.17995449]
New Q values:  [  233.86175279 -6457.4598      3538.2009502  -2702.17995449]
Reward: 9  Episode Reward:  34
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881  233.33860649]
------
Step:27, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  8940.59232401  6802.16339067]
New Q values:  [-2527.46239811 -6212.61234477 66188.04118862  6802.16339067]
Reward: 100009  Episode Reward:  100043
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4114.27155332  -180.6           91.60507789]
------
Step:1, Action:West
State  138
Old Q Values:  [ -180.6        -4114.27155332  -180.6           91.60507789]
New Q values:  [ -180.6        -4114.27155332  -180.6          157.96217969]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  3.86400495e+02]
------
Step:2, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  3.86400495e+02]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  1.60880190e+02]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x...x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -5.64889407e+03  3.06663838e+00 -1.80600000e+02]
------
Step:3, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573  165.5157136  -252.78192178]
New Q values:  [-252.35169558   11.28108573  113.87034231 -252.78192178]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  1.60880190e+02]
------
Step:4, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  1.60880190e+02]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  9.79131785e+01]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573  113.87034231 -252.78192178]
------
Step:5, Action:East
State  109
Old Q Values:  [ -241.10880094   536.32915387 -6367.52023856  -180.6       ]
New Q values:  [ -241.10880094   536.32915387 -7883.50301203  -180.6       ]
Reward: -10001  Episode Reward:  -9985
xxxxx
x g x
x...x
x.. x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5500.4033718  -8656.02923281 -7525.7277781   8688.01419672]
------
Step:1, Action:North
State  288
Old Q Values:  [ 5500.4033718  -8656.02923281 -7525.7277781   8688.01419672]
New Q values:  [13634.33349396 -8656.02923281 -7525.7277781   8688.01419672]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3.80959072e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:2, Action:North
State  208
Old Q Values:  [ 6158.20198758  1753.81577881 -2651.70614553   803.31074191]
New Q values:  [ 2516.06944894  1753.81577881 -2651.70614553   803.31074191]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4114.27155332  -180.6          157.96217969]
------
Step:3, Action:West
State  138
Old Q Values:  [ -180.6        -4114.27155332  -180.6          157.96217969]
New Q values:  [ -180.6        -4114.27155332  -180.6           97.95882543]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  9.79131785e+01]
------
Step:4, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     2.67620961    66.53331276]
New Q values:  [ -253.44886264 -1902.20915811     2.67620961    66.1744278 ]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573  113.87034231 -252.78192178]
------
Step:5, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573  113.87034231 -252.78192178]
New Q values:  [-252.35169558   11.28108573   74.32209048 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  9.79131785e+01]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  9.79131785e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  3.90872965e+01]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -6.00060000e+03  1.74008365e+00 -1.80600000e+02]
------
Step:7, Action:East
State  104
Old Q Values:  [-8.65284000e+03  4.32678599e-01 -9.58483566e+02 -8.65284000e+03]
New Q values:  [-8.65284000e+03  4.32678599e-01  2.80111657e+02 -8.65284000e+03]
Reward: -1  Episode Reward:  33
xxxxx
xga x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686        2213.68361131     49.8565746 ]
------
Step:8, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686        2213.68361131     49.8565746 ]
New Q values:  [-10156.11771313  -5995.686        3113.9823715      49.8565746 ]
Reward: -1  Episode Reward:  32
xxxxx
x gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6         7430.36308991 -6245.61866138     7.52829833]
------
Step:9, Action:South
State  138
Old Q Values:  [ -180.6        -4114.27155332  -180.6           97.95882543]
New Q values:  [-180.6        -891.48778665 -180.6          97.95882543]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2516.06944894  1753.81577881 -2651.70614553   803.31074191]
------
Step:10, Action:North
State  208
Old Q Values:  [ 2516.06944894  1753.81577881 -2651.70614553   803.31074191]
New Q values:  [ 3234.93670655  1753.81577881 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  30
xxxxx
x gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6         7430.36308991 -6245.61866138     7.52829833]
------
Step:11, Action:South
State  136
Old Q Values:  [-6180.6         7430.36308991 -6245.61866138     7.52829833]
New Q values:  [-6180.6         3942.02624793 -6245.61866138     7.52829833]
Reward: -1  Episode Reward:  29
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3234.93670655  1753.81577881 -2651.70614553   803.31074191]
------
Step:12, Action:North
State  208
Old Q Values:  [ 3234.93670655  1753.81577881 -2651.70614553   803.31074191]
New Q values:  [ 2475.982557    1753.81577881 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  28
xxxxx
x gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-6180.6         3942.02624793 -6245.61866138     7.52829833]
------
Step:13, Action:South
State  138
Old Q Values:  [-180.6        -891.48778665 -180.6          97.95882543]
New Q values:  [-180.6         385.59965244 -180.6          97.95882543]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x.gax
x.  x
xxxxx
Step:14, Action:South
State  208
Old Q Values:  [ 2475.982557    1753.81577881 -2651.70614553   803.31074191]
New Q values:  [ 2475.982557    4791.22635971 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13634.33349396 -8656.02923281 -7525.7277781   8688.01419672]
------
Step:15, Action:West
State  288
Old Q Values:  [13634.33349396 -8656.02923281 -7525.7277781   8688.01419672]
New Q values:  [13634.33349396 -8656.02923281 -7525.7277781   5413.42248382]
Reward: -1  Episode Reward:  25
xxxxx
x  gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4726.37825453 6462.72268376]
------
Step:16, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4726.37825453 6462.72268376]
New Q values:  [   37.74111519  -168.92307549  4726.37825453 11352.73153302]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[29207.47486505  -180.6         3852.81245667  4453.74493219]
------
Step:17, Action:North
State  261
Old Q Values:  [ 111.94841798 -289.59534477  114.84063027 -251.53897752]
New Q values:  [1564.02980499 -289.59534477  114.84063027 -251.53897752]
Reward: 9  Episode Reward:  43
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791   20.83123995 5046.168126      0.        ]
------
Step:18, Action:East
State  177
Old Q Values:  [    0.         62663.07428404 32980.26079918     0.        ]
New Q values:  [    0.         62663.07428404 75313.65724539     0.        ]
Reward: 100009  Episode Reward:  100052
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  638.63495005  862.42400667 -272.09726687]
------
Step:1, Action:East
State  109
Old Q Values:  [ -241.10880094   536.32915387 -7883.50301203  -180.6       ]
New Q values:  [ -241.10880094   536.32915387 -8213.80649336  -180.6       ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x g.x
x. .x
x...x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   536.32915387 -8213.80649336  -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869  638.63495005  862.42400667 -272.09726687]
New Q values:  [-177.44732869  645.55489711  862.42400667 -272.09726687]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648  1282.3363903  -1867.36199304  -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [   13.85659648  1282.3363903  -1867.36199304  -180.6       ]
New Q values:  [   13.85659648   987.54349762 -1867.36199304  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1564.02980499 -289.59534477  114.84063027 -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [1564.02980499 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 921.27497128 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   987.54349762 -1867.36199304  -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [   13.85659648   987.54349762 -1867.36199304  -180.6       ]
New Q values:  [   13.85659648   670.79989043 -1867.36199304  -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x . x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 921.27497128 -289.59534477  114.84063027 -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [ 921.27497128 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 569.14995564 -289.59534477  114.84063027 -251.53897752]
Reward: -1  Episode Reward:  15
xxxxx
x g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   670.79989043 -1867.36199304  -180.6       ]
------
Step:6, Action:South
State  180
Old Q Values:  [-1367.02476015   138.68990048  5712.49284801     0.        ]
New Q values:  [-1367.02476015  1116.33624525  5712.49284801     0.        ]
Reward: -1  Episode Reward:  14
xxxxx
xg. x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  233.86175279 -6457.4598      3538.2009502  -2702.17995449]
------
Step:7, Action:East
State  261
Old Q Values:  [ 569.14995564 -289.59534477  114.84063027 -251.53897752]
New Q values:  [ 569.14995564 -289.59534477 2538.52602575 -251.53897752]
Reward: 9  Episode Reward:  23
xxxxx
x g x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881  233.33860649]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 66188.04118862  6802.16339067]
New Q values:  [-2527.46239811 -6212.61234477 30570.91652364  6802.16339067]
Reward: 9  Episode Reward:  32
xxxxx
xg. x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13634.33349396 -8656.02923281 -7525.7277781   5413.42248382]
------
Step:9, Action:North
State  288
Old Q Values:  [13634.33349396 -8656.02923281 -7525.7277781   5413.42248382]
New Q values:  [ 6896.5013055  -8656.02923281 -7525.7277781   5413.42248382]
Reward: 9  Episode Reward:  41
xxxxx
x g x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2475.982557    4791.22635971 -2651.70614553   803.31074191]
------
Step:10, Action:South
State  208
Old Q Values:  [ 2475.982557    4791.22635971 -2651.70614553   803.31074191]
New Q values:  [ 2475.982557    3984.84093553 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  40
xxxxx
x .gx
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6896.5013055  -8656.02923281 -7525.7277781   5413.42248382]
------
Step:11, Action:North
State  288
Old Q Values:  [ 6896.5013055  -8656.02923281 -7525.7277781   5413.42248382]
New Q values:  [ 3953.45280286 -8656.02923281 -7525.7277781   5413.42248382]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2475.982557    3984.84093553 -2651.70614553   803.31074191]
------
Step:12, Action:South
State  208
Old Q Values:  [ 2475.982557    3984.84093553 -2651.70614553   803.31074191]
New Q values:  [ 2475.982557    3217.36311936 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  38
xxxxx
x .gx
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3953.45280286 -8656.02923281 -7525.7277781   5413.42248382]
------
Step:13, Action:West
State  288
Old Q Values:  [ 3953.45280286 -8656.02923281 -7525.7277781   5413.42248382]
New Q values:  [ 3953.45280286 -8656.02923281 -7525.7277781  11336.04395062]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 30570.91652364  6802.16339067]
------
Step:14, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549  4726.37825453 11352.73153302]
New Q values:  [   37.74111519  -168.92307549  5290.764487   11352.73153302]
Reward: -1  Episode Reward:  36
xxxxx
x .gx
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3953.45280286 -8656.02923281 -7525.7277781  11336.04395062]
------
Step:15, Action:West
State  288
Old Q Values:  [ 3953.45280286 -8656.02923281 -7525.7277781  11336.04395062]
New Q values:  [ 3953.45280286 -8656.02923281 -7525.7277781   7939.63704015]
Reward: -1  Episode Reward:  35
xxxxx
x . x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  5290.764487   11352.73153302]
------
Step:16, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 30570.91652364  6802.16339067]
New Q values:  [-2527.46239811 -6212.61234477 30570.91652364  3481.82316399]
Reward: -1  Episode Reward:  34
xxxxx
x . x
x g x
xa  x
xxxxx
Step:17, Action:East
State  261
Old Q Values:  [ 569.14995564 -289.59534477 2538.52602575 -251.53897752]
New Q values:  [ 569.14995564 -289.59534477 4420.62987021 -251.53897752]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  5290.764487   11352.73153302]
------
Step:18, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  5290.764487   11352.73153302]
New Q values:  [  37.74111519 -168.92307549 5290.764487   5866.68157427]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 569.14995564 -289.59534477 4420.62987021 -251.53897752]
------
Step:19, Action:East
State  261
Old Q Values:  [ 569.14995564 -289.59534477 4420.62987021 -251.53897752]
New Q values:  [ 569.14995564 -289.59534477 3527.65642036 -251.53897752]
Reward: -1  Episode Reward:  31
xxxxx
x . x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 5290.764487   5866.68157427]
------
Step:20, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 5290.764487   5866.68157427]
New Q values:  [  37.74111519 -168.92307549 5290.764487   3404.36955582]
Reward: -1  Episode Reward:  30
xxxxx
x .gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 569.14995564 -289.59534477 3527.65642036 -251.53897752]
------
Step:21, Action:East
State  261
Old Q Values:  [ 569.14995564 -289.59534477 3527.65642036 -251.53897752]
New Q values:  [ 569.14995564 -289.59534477 2997.69191424 -251.53897752]
Reward: -1  Episode Reward:  29
xxxxx
x . x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 5290.764487   3404.36955582]
------
Step:22, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 5290.764487   3404.36955582]
New Q values:  [   37.74111519  -168.92307549 -1502.40309316  3404.36955582]
Reward: -10001  Episode Reward:  -9972
xxxxx
x . x
x . x
x  gx
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 569.14995564 -289.59534477 2997.69191424 -251.53897752]
------
Step:1, Action:East
State  261
Old Q Values:  [ 569.14995564 -289.59534477 2997.69191424 -251.53897752]
New Q values:  [  569.14995564  -289.59534477 10375.75172279  -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 30570.91652364  3481.82316399]
------
Step:2, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 -1502.40309316  3404.36955582]
New Q values:  [  37.74111519 -168.92307549 1786.32987478 3404.36955582]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3953.45280286 -8656.02923281 -7525.7277781   7939.63704015]
------
Step:3, Action:West
State  288
Old Q Values:  [ 3953.45280286 -8656.02923281 -7525.7277781   7939.63704015]
New Q values:  [ 3953.45280286 -8656.02923281 -7525.7277781  12346.52977315]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 30570.91652364  3481.82316399]
------
Step:4, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 1786.32987478 3404.36955582]
New Q values:  [  37.74111519 -168.92307549 4417.89088186 3404.36955582]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3953.45280286 -8656.02923281 -7525.7277781  12346.52977315]
------
Step:5, Action:West
State  288
Old Q Values:  [ 3953.45280286 -8656.02923281 -7525.7277781  12346.52977315]
New Q values:  [ 3953.45280286 -8656.02923281 -7525.7277781   6263.37917382]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x..gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4417.89088186 3404.36955582]
------
Step:6, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 4417.89088186 3404.36955582]
New Q values:  [   37.74111519  -168.92307549 -2354.42989511  3404.36955582]
Reward: -10001  Episode Reward:  -9986
xxxxx
x.. x
x...x
x  gx
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 30570.91652364  3481.82316399]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 30570.91652364  3481.82316399]
New Q values:  [-2527.46239811 -6212.61234477 14112.7803616   3481.82316399]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3953.45280286 -8656.02923281 -7525.7277781   6263.37917382]
------
Step:2, Action:North
State  288
Old Q Values:  [ 3953.45280286 -8656.02923281 -7525.7277781   6263.37917382]
New Q values:  [13015.55326638 -8656.02923281 -7525.7277781   6263.37917382]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3.80959072e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [ 3.80959072e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 3.64836521e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  3512.58304545  -180.00807518 70799.63066158]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6         385.59965244 -180.6          97.95882543]
New Q values:  [-180.6         385.59965244 -180.6          64.43585851]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811     2.67620961    66.1744278 ]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     2.67620961    66.1744278 ]
New Q values:  [ -253.44886264 -1902.20915811     2.67620961    54.16639826]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   74.32209048 -252.78192178]
------
Step:6, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   74.32209048 -252.78192178]
New Q values:  [-252.35169558   11.28108573   40.85502514 -252.78192178]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  3.90872965e+01]
------
Step:7, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  3.90872965e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  1.55569437e+01]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -6.00060000e+03  1.74008365e+00 -1.80600000e+02]
------
Step:8, Action:East
State  106
Old Q Values:  [-1.80600000e+02 -6.00060000e+03  1.74008365e+00 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -6.00060000e+03  4.76311657e+00 -1.80600000e+02]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  1.55569437e+01]
------
Step:9, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  1.55569437e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  1.78792850e+01]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   40.85502514 -252.78192178]
------
Step:10, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   40.85502514 -252.78192178]
New Q values:  [-252.35169558   11.28108573   21.10579556 -252.78192178]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  1.78792850e+01]
------
Step:11, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  1.78792850e+01]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  7.98064898e+00]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -6.00060000e+03  4.76311657e+00 -1.80600000e+02]
------
Step:12, Action:East
State  106
Old Q Values:  [-1.80600000e+02 -6.00060000e+03  4.76311657e+00 -1.80600000e+02]
New Q values:  [-1.80600000e+02 -6.00060000e+03  3.69944132e+00 -1.80600000e+02]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  7.98064898e+00]
------
Step:13, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  7.98064898e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  8.92399826e+00]
Reward: -1  Episode Reward:  37
xxxxx
xa  x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   21.10579556 -252.78192178]
------
Step:14, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   21.10579556 -252.78192178]
New Q values:  [-252.35169558   11.28108573   24.0922377  -252.78192178]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811     2.67620961    54.16639826]
------
Step:15, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     2.67620961    54.16639826]
New Q values:  [ -253.44886264 -1902.20915811     2.67620961    28.29423062]
Reward: -1  Episode Reward:  35
xxxxx
xa  x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   24.0922377  -252.78192178]
------
Step:16, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   24.0922377  -252.78192178]
New Q values:  [-252.35169558   11.28108573   11.71409456 -252.78192178]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.82110936e+00  8.92399826e+00]
------
Step:17, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  8.92399826e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  4.07943170e+00]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[-1.80600000e+02 -6.00060000e+03  3.69944132e+00 -1.80600000e+02]
------
Step:18, Action:East
State  104
Old Q Values:  [-8.65284000e+03  4.32678599e-01  2.80111657e+02 -8.65284000e+03]
New Q values:  [-8.65284000e+03  4.32678599e-01  1.04563937e+03 -8.65284000e+03]
Reward: -1  Episode Reward:  32
xxxxx
xga x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686        3113.9823715      49.8565746 ]
------
Step:19, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.82110936e+00  4.07943170e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.15808339e+02  4.07943170e+00]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         385.59965244 -180.6          64.43585851]
------
Step:20, Action:South
State  136
Old Q Values:  [-6180.6         3942.02624793 -6245.61866138     7.52829833]
New Q values:  [-6180.6         2541.41943498 -6245.61866138     7.52829833]
Reward: -1  Episode Reward:  30
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2475.982557    3217.36311936 -2651.70614553   803.31074191]
------
Step:21, Action:South
State  216
Old Q Values:  [ 2242.06034916   -51.76838521 -6170.35693855  5999.74156016]
New Q values:  [ 2242.06034916  3883.35862583 -6170.35693855  5999.74156016]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13015.55326638 -8656.02923281 -7525.7277781   6263.37917382]
------
Step:22, Action:North
State  288
Old Q Values:  [13015.55326638 -8656.02923281 -7525.7277781   6263.37917382]
New Q values:  [ 6170.83024236 -8656.02923281 -7525.7277781   6263.37917382]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2475.982557    3217.36311936 -2651.70614553   803.31074191]
------
Step:23, Action:South
State  216
Old Q Values:  [ 2242.06034916  3883.35862583 -6170.35693855  5999.74156016]
New Q values:  [ 2242.06034916  3431.75720248 -6170.35693855  5999.74156016]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6170.83024236 -8656.02923281 -7525.7277781   6263.37917382]
------
Step:24, Action:West
State  288
Old Q Values:  [ 6170.83024236 -8656.02923281 -7525.7277781   6263.37917382]
New Q values:  [ 6170.83024236 -8656.02923281 -7525.7277781   6738.58577801]
Reward: -1  Episode Reward:  26
xxxxx
xg  x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 14112.7803616   3481.82316399]
------
Step:25, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 14112.7803616   3481.82316399]
New Q values:  [-2527.46239811 -6212.61234477  7666.08787804  3481.82316399]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6170.83024236 -8656.02923281 -7525.7277781   6738.58577801]
------
Step:26, Action:West
State  288
Old Q Values:  [ 6170.83024236 -8656.02923281 -7525.7277781   6738.58577801]
New Q values:  [ 6170.83024236 -8656.02923281 -7525.7277781   4994.66067462]
Reward: -1  Episode Reward:  24
xxxxx
xg  x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  7666.08787804  3481.82316399]
------
Step:27, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  7666.08787804  3481.82316399]
New Q values:  [-2527.46239811 -6212.61234477  4917.08422393  3481.82316399]
Reward: -1  Episode Reward:  23
xxxxx
x g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6170.83024236 -8656.02923281 -7525.7277781   4994.66067462]
------
Step:28, Action:North
State  288
Old Q Values:  [ 6170.83024236 -8656.02923281 -7525.7277781   4994.66067462]
New Q values:  [ 3432.94103275 -8656.02923281 -7525.7277781   4994.66067462]
Reward: -1  Episode Reward:  22
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2475.982557    3217.36311936 -2651.70614553   803.31074191]
------
Step:29, Action:South
State  208
Old Q Values:  [ 2475.982557    3217.36311936 -2651.70614553   803.31074191]
New Q values:  [ 2475.982557    2784.74345013 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  21
xxxxx
x g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3432.94103275 -8656.02923281 -7525.7277781   4994.66067462]
------
Step:30, Action:West
State  288
Old Q Values:  [ 3432.94103275 -8656.02923281 -7525.7277781   4994.66067462]
New Q values:  [ 3432.94103275 -8656.02923281 -7525.7277781   3472.38953702]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  4917.08422393  3481.82316399]
------
Step:31, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  4917.08422393  3481.82316399]
New Q values:  [-2527.46239811 -6212.61234477  3007.95055068  3481.82316399]
Reward: -1  Episode Reward:  19
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3432.94103275 -8656.02923281 -7525.7277781   3472.38953702]
------
Step:32, Action:West
State  288
Old Q Values:  [ 3432.94103275 -8656.02923281 -7525.7277781   3472.38953702]
New Q values:  [ 3432.94103275 -8656.02923281 -7525.7277781   2432.90276401]
Reward: -1  Episode Reward:  18
xxxxx
x   x
x.  x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  3007.95055068  3481.82316399]
------
Step:33, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  3007.95055068  3481.82316399]
New Q values:  [-2527.46239811 -6212.61234477  2232.4625301   3481.82316399]
Reward: -1  Episode Reward:  17
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3432.94103275 -8656.02923281 -7525.7277781   2432.90276401]
------
Step:34, Action:North
State  288
Old Q Values:  [ 3432.94103275 -8656.02923281 -7525.7277781   2432.90276401]
New Q values:  [ 2207.99944814 -8656.02923281 -7525.7277781   2432.90276401]
Reward: -1  Episode Reward:  16
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2475.982557    2784.74345013 -2651.70614553   803.31074191]
------
Step:35, Action:South
State  208
Old Q Values:  [ 2475.982557    2784.74345013 -2651.70614553   803.31074191]
New Q values:  [ 2475.982557    1843.16820925 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  15
xxxxx
x g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2207.99944814 -8656.02923281 -7525.7277781   2432.90276401]
------
Step:36, Action:West
State  288
Old Q Values:  [ 2207.99944814 -8656.02923281 -7525.7277781   2432.90276401]
New Q values:  [ 2207.99944814 -8656.02923281 -7525.7277781   1993.87197235]
Reward: -1  Episode Reward:  14
xxxxx
x  gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -2354.42989511  3404.36955582]
------
Step:37, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -2354.42989511  3404.36955582]
New Q values:  [   37.74111519  -168.92307549 -2354.42989511 10129.39028184]
Reward: 9  Episode Reward:  23
xxxxx
x   x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[29207.47486505  -180.6         3852.81245667  4453.74493219]
------
Step:38, Action:North
State  257
Old Q Values:  [29207.47486505  -180.6         3852.81245667  4453.74493219]
New Q values:  [84839.73950647  -180.6         3852.81245667  4453.74493219]
Reward: 100009  Episode Reward:  100032
xxxxx
x   x
xa  x
x  gx
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  2232.4625301   3481.82316399]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2232.4625301   3481.82316399]
New Q values:  [-2527.46239811 -6212.61234477  2232.4625301  -3540.41044934]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x ..x
xg .x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  645.55489711  862.42400667 -272.09726687]
------
Step:1, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   11.71409456 -252.78192178]
New Q values:  [-252.35169558   11.28108573   18.57390701 -252.78192178]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811     2.67620961    28.29423062]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     2.67620961    28.29423062]
New Q values:  [ -253.44886264 -1902.20915811     2.67620961    16.28986435]
Reward: -1  Episode Reward:  8
xxxxx
xa .x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   18.57390701 -252.78192178]
------
Step:3, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   18.57390701 -252.78192178]
New Q values:  [-252.35169558   11.28108573   41.57206465 -252.78192178]
Reward: -1  Episode Reward:  7
xxxxx
x a.x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.15808339e+02  4.07943170e+00]
------
Step:4, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.15808339e+02  4.07943170e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  1.67403232e+02  4.07943170e+00]
Reward: 9  Episode Reward:  16
xxxxx
x  ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         385.59965244 -180.6          64.43585851]
------
Step:5, Action:South
State  138
Old Q Values:  [-180.6         385.59965244 -180.6          64.43585851]
New Q values:  [ -180.6        11098.73547861  -180.6           64.43585851]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3.64836521e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:6, Action:North
State  210
Old Q Values:  [ 3.64836521e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 1.79224815e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        11098.73547861  -180.6           64.43585851]
------
Step:7, Action:South
State  138
Old Q Values:  [ -180.6        11098.73547861  -180.6           64.43585851]
New Q values:  [-180.6        6238.81665949 -180.6          64.43585851]
Reward: -1  Episode Reward:  13
xxxxx
x   x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2242.06034916  3431.75720248 -6170.35693855  5999.74156016]
------
Step:8, Action:West
State  218
Old Q Values:  [   4.59845327 -610.30346672    0.           63.2966801 ]
New Q values:  [   4.59845327 -610.30346672    0.         1927.33915479]
Reward: 9  Episode Reward:  22
xxxxx
x   x
x.a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  6.32206828e+03  2.79697622e+03]
------
Step:9, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  6.32206828e+03  2.79697622e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  3.10642906e+03  2.79697622e+03]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   4.59845327 -610.30346672    0.         1927.33915479]
------
Step:10, Action:West
State  216
Old Q Values:  [ 2242.06034916  3431.75720248 -6170.35693855  5999.74156016]
New Q values:  [ 2242.06034916  3431.75720248 -6170.35693855 -2387.54492731]
Reward: -10001  Episode Reward:  -9980
xxxxx
x   x
x.g x
x...x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2207.99944814 -8656.02923281 -7525.7277781   1993.87197235]
------
Step:1, Action:North
State  288
Old Q Values:  [ 2207.99944814 -8656.02923281 -7525.7277781   1993.87197235]
New Q values:  [ 1631.39454636 -8656.02923281 -7525.7277781   1993.87197235]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2475.982557    1843.16820925 -2651.70614553   803.31074191]
------
Step:2, Action:South
State  208
Old Q Values:  [ 2475.982557    1843.16820925 -2651.70614553   803.31074191]
New Q values:  [ 2475.982557    1334.82887541 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1631.39454636 -8656.02923281 -7525.7277781   1993.87197235]
------
Step:3, Action:West
State  288
Old Q Values:  [ 1631.39454636 -8656.02923281 -7525.7277781   1993.87197235]
New Q values:  [ 1631.39454636 -8656.02923281 -7525.7277781   1472.68754797]
Reward: 9  Episode Reward:  17
xxxxx
x. .x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  2232.4625301  -3540.41044934]
------
Step:4, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 -2354.42989511 10129.39028184]
New Q values:  [   37.74111519  -168.92307549  -452.95359414 10129.39028184]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1631.39454636 -8656.02923281 -7525.7277781   1472.68754797]
------
Step:5, Action:West
State  288
Old Q Values:  [ 1631.39454636 -8656.02923281 -7525.7277781   1472.68754797]
New Q values:  [ 1631.39454636 -8656.02923281 -7525.7277781   1258.21377822]
Reward: -1  Episode Reward:  15
xxxxx
x. .x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  2232.4625301  -3540.41044934]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2232.4625301  -3540.41044934]
New Q values:  [-2527.46239811 -6212.61234477  1381.80337595 -3540.41044934]
Reward: -1  Episode Reward:  14
xxxxx
x. .x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1631.39454636 -8656.02923281 -7525.7277781   1258.21377822]
------
Step:7, Action:North
State  288
Old Q Values:  [ 1631.39454636 -8656.02923281 -7525.7277781   1258.21377822]
New Q values:  [ 1394.75258564 -8656.02923281 -7525.7277781   1258.21377822]
Reward: -1  Episode Reward:  13
xxxxx
xg .x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2475.982557    1334.82887541 -2651.70614553   803.31074191]
------
Step:8, Action:North
State  208
Old Q Values:  [ 2475.982557    1334.82887541 -2651.70614553   803.31074191]
New Q values:  [ 2867.43802065  1334.82887541 -2651.70614553   803.31074191]
Reward: 9  Episode Reward:  22
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        6238.81665949 -180.6          64.43585851]
------
Step:9, Action:South
State  138
Old Q Values:  [-180.6        6238.81665949 -180.6          64.43585851]
New Q values:  [-180.6        7871.67110393 -180.6          64.43585851]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.79224815e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:10, Action:North
State  210
Old Q Values:  [ 1.79224815e+04  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 9.52989392e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        7871.67110393 -180.6          64.43585851]
------
Step:11, Action:South
State  138
Old Q Values:  [-180.6        7871.67110393 -180.6          64.43585851]
New Q values:  [-180.6        6007.03661698 -180.6          64.43585851]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 9.52989392e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:12, Action:North
State  210
Old Q Values:  [ 9.52989392e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 5.61346855e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  18
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        6007.03661698 -180.6          64.43585851]
------
Step:13, Action:South
State  138
Old Q Values:  [-180.6        6007.03661698 -180.6          64.43585851]
New Q values:  [-180.6        4086.25521248 -180.6          64.43585851]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5.61346855e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:14, Action:North
State  208
Old Q Values:  [ 2867.43802065  1334.82887541 -2651.70614553   803.31074191]
New Q values:  [ 2372.251772    1334.82887541 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        4086.25521248 -180.6          64.43585851]
------
Step:15, Action:South
State  136
Old Q Values:  [-6180.6         2541.41943498 -6245.61866138     7.52829833]
New Q values:  [-6180.6         1727.64330559 -6245.61866138     7.52829833]
Reward: -1  Episode Reward:  15
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2372.251772    1334.82887541 -2651.70614553   803.31074191]
------
Step:16, Action:North
State  208
Old Q Values:  [ 2372.251772    1334.82887541 -2651.70614553   803.31074191]
New Q values:  [ 2174.17727255  1334.82887541 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        4086.25521248 -180.6          64.43585851]
------
Step:17, Action:South
State  136
Old Q Values:  [-6180.6         1727.64330559 -6245.61866138     7.52829833]
New Q values:  [-6180.6         1342.710504   -6245.61866138     7.52829833]
Reward: -1  Episode Reward:  13
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2174.17727255  1334.82887541 -2651.70614553   803.31074191]
------
Step:18, Action:North
State  208
Old Q Values:  [ 2174.17727255  1334.82887541 -2651.70614553   803.31074191]
New Q values:  [ 2094.94747276  1334.82887541 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  12
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        4086.25521248 -180.6          64.43585851]
------
Step:19, Action:South
State  138
Old Q Values:  [-180.6        4086.25521248 -180.6          64.43585851]
New Q values:  [-180.6        2262.38632682 -180.6          64.43585851]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2094.94747276  1334.82887541 -2651.70614553   803.31074191]
------
Step:20, Action:North
State  208
Old Q Values:  [ 2094.94747276  1334.82887541 -2651.70614553   803.31074191]
New Q values:  [ 1516.09488715  1334.82887541 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  10
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2262.38632682 -180.6          64.43585851]
------
Step:21, Action:West
State  138
Old Q Values:  [-180.6        2262.38632682 -180.6          64.43585851]
New Q values:  [-180.6        2262.38632682 -180.6          75.39531286]
Reward: -1  Episode Reward:  9
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  1.67403232e+02  4.07943170e+00]
------
Step:22, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     2.67620961    16.28986435]
New Q values:  [ -253.44886264 -1902.20915811   679.18638189    16.28986435]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2262.38632682 -180.6          75.39531286]
------
Step:23, Action:West
State  138
Old Q Values:  [-180.6        2262.38632682 -180.6          75.39531286]
New Q values:  [-180.6        2262.38632682 -180.6         233.31403971]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   679.18638189    16.28986435]
------
Step:24, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  1.67403232e+02  4.07943170e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  7.45077191e+02  4.07943170e+00]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2262.38632682 -180.6         233.31403971]
------
Step:25, Action:South
State  138
Old Q Values:  [-180.6        2262.38632682 -180.6         233.31403971]
New Q values:  [-180.6        2588.39509642 -180.6         233.31403971]
Reward: -1  Episode Reward:  5
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5.61346855e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:26, Action:North
State  208
Old Q Values:  [ 1516.09488715  1334.82887541 -2651.70614553   803.31074191]
New Q values:  [ 1382.35648379  1334.82887541 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  4
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2588.39509642 -180.6         233.31403971]
------
Step:27, Action:South
State  138
Old Q Values:  [-180.6        2588.39509642 -180.6         233.31403971]
New Q values:  [-180.6        2718.79860426 -180.6         233.31403971]
Reward: -1  Episode Reward:  3
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5.61346855e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:28, Action:North
State  208
Old Q Values:  [ 1382.35648379  1334.82887541 -2651.70614553   803.31074191]
New Q values:  [ 1367.98217479  1334.82887541 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  2
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2718.79860426 -180.6         233.31403971]
------
Step:29, Action:South
State  136
Old Q Values:  [-6180.6         1342.710504   -6245.61866138     7.52829833]
New Q values:  [-6180.6          946.87885404 -6245.61866138     7.52829833]
Reward: -1  Episode Reward:  1
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1367.98217479  1334.82887541 -2651.70614553   803.31074191]
------
Step:30, Action:North
State  208
Old Q Values:  [ 1367.98217479  1334.82887541 -2651.70614553   803.31074191]
New Q values:  [ 1362.23245119  1334.82887541 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  0
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2718.79860426 -180.6         233.31403971]
------
Step:31, Action:South
State  138
Old Q Values:  [-180.6        2718.79860426 -180.6         233.31403971]
New Q values:  [-180.6        1495.58917706 -180.6         233.31403971]
Reward: -1  Episode Reward:  -1
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1362.23245119  1334.82887541 -2651.70614553   803.31074191]
------
Step:32, Action:North
State  208
Old Q Values:  [ 1362.23245119  1334.82887541 -2651.70614553   803.31074191]
New Q values:  [  992.9697336   1334.82887541 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  -2
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1495.58917706 -180.6         233.31403971]
------
Step:33, Action:South
State  136
Old Q Values:  [-6180.6          946.87885404 -6245.61866138     7.52829833]
New Q values:  [-6180.6          778.60020424 -6245.61866138     7.52829833]
Reward: -1  Episode Reward:  -3
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  992.9697336   1334.82887541 -2651.70614553   803.31074191]
------
Step:34, Action:South
State  208
Old Q Values:  [  992.9697336   1334.82887541 -2651.70614553   803.31074191]
New Q values:  [  992.9697336    951.75732585 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  -4
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1394.75258564 -8656.02923281 -7525.7277781   1258.21377822]
------
Step:35, Action:North
State  288
Old Q Values:  [ 1394.75258564 -8656.02923281 -7525.7277781   1258.21377822]
New Q values:  [  855.19195434 -8656.02923281 -7525.7277781   1258.21377822]
Reward: -1  Episode Reward:  -5
xxxxx
x. gx
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  992.9697336    951.75732585 -2651.70614553   803.31074191]
------
Step:36, Action:South
State  208
Old Q Values:  [  992.9697336    951.75732585 -2651.70614553   803.31074191]
New Q values:  [  992.9697336    757.56706381 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  -6
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  855.19195434 -8656.02923281 -7525.7277781   1258.21377822]
------
Step:37, Action:West
State  288
Old Q Values:  [  855.19195434 -8656.02923281 -7525.7277781   1258.21377822]
New Q values:  [  855.19195434 -8656.02923281 -7525.7277781    917.22652407]
Reward: -1  Episode Reward:  -7
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1381.80337595 -3540.41044934]
------
Step:38, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1381.80337595 -3540.41044934]
New Q values:  [-2527.46239811 -6212.61234477   827.2893076  -3540.41044934]
Reward: -1  Episode Reward:  -8
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  855.19195434 -8656.02923281 -7525.7277781    917.22652407]
------
Step:39, Action:West
State  288
Old Q Values:  [  855.19195434 -8656.02923281 -7525.7277781    917.22652407]
New Q values:  [  855.19195434 -8656.02923281 -7525.7277781   3405.10769418]
Reward: -1  Episode Reward:  -9
xxxxx
x. gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  -452.95359414 10129.39028184]
------
Step:40, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   827.2893076  -3540.41044934]
New Q values:  [-2527.46239811 -6212.61234477   827.2893076  24041.1576722 ]
Reward: 9  Episode Reward:  0
xxxxx
x.g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[84839.73950647  -180.6         3852.81245667  4453.74493219]
------
Step:41, Action:North
State  257
Old Q Values:  [84839.73950647  -180.6         3852.81245667  4453.74493219]
New Q values:  [56535.3929762   -180.6         3852.81245667  4453.74493219]
Reward: 9  Episode Reward:  9
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         62663.07428404 75313.65724539     0.        ]
------
Step:42, Action:South
State  177
Old Q Values:  [    0.         62663.07428404 75313.65724539     0.        ]
New Q values:  [    0.         42025.24760648 75313.65724539     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[56535.3929762   -180.6         3852.81245667  4453.74493219]
------
Step:43, Action:North
State  257
Old Q Values:  [56535.3929762   -180.6         3852.81245667  4453.74493219]
New Q values:  [45207.6543641   -180.6         3852.81245667  4453.74493219]
Reward: -1  Episode Reward:  7
xxxxx
x. gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         42025.24760648 75313.65724539     0.        ]
------
Step:44, Action:East
State  177
Old Q Values:  [    0.         42025.24760648 75313.65724539     0.        ]
New Q values:  [    0.         42025.24760648 33473.08087069     0.        ]
Reward: 9  Episode Reward:  16
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.11407266e+04 7.62715392e+03 0.00000000e+00]
------
Step:45, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.11407266e+04 7.62715392e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 1.16680379e+04 7.62715392e+03 0.00000000e+00]
Reward: -1  Episode Reward:  15
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   827.2893076  24041.1576722 ]
------
Step:46, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   827.2893076  24041.1576722 ]
New Q values:  [-2527.46239811 -6212.61234477   827.2893076  23178.15937811]
Reward: -1  Episode Reward:  14
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[45207.6543641   -180.6         3852.81245667  4453.74493219]
------
Step:47, Action:North
State  257
Old Q Values:  [45207.6543641   -180.6         3852.81245667  4453.74493219]
New Q values:  [30690.03602758  -180.6         3852.81245667  4453.74493219]
Reward: -1  Episode Reward:  13
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         42025.24760648 33473.08087069     0.        ]
------
Step:48, Action:South
State  177
Old Q Values:  [    0.         42025.24760648 33473.08087069     0.        ]
New Q values:  [    0.         26016.50985086 33473.08087069     0.        ]
Reward: -1  Episode Reward:  12
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[30690.03602758  -180.6         3852.81245667  4453.74493219]
------
Step:49, Action:North
State  257
Old Q Values:  [30690.03602758  -180.6         3852.81245667  4453.74493219]
New Q values:  [22317.33867224  -180.6         3852.81245667  4453.74493219]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         26016.50985086 33473.08087069     0.        ]
------
Step:50, Action:South
State  177
Old Q Values:  [    0.         26016.50985086 33473.08087069     0.        ]
New Q values:  [    0.         17101.20554202 33473.08087069     0.        ]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[22317.33867224  -180.6         3852.81245667  4453.74493219]
------
Step:51, Action:North
State  257
Old Q Values:  [22317.33867224  -180.6         3852.81245667  4453.74493219]
New Q values:  [22077.68502934  -180.6         3852.81245667  4453.74493219]
Reward: -1  Episode Reward:  9
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         34025.66630253 43837.83186815     0.        ]
------
Step:52, Action:East
State  177
Old Q Values:  [    0.         17101.20554202 33473.08087069     0.        ]
New Q values:  [    0.         17101.20554202 15504.78527399     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  7053.84308572 -3909.58186816  2096.80577447]
------
Step:53, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.16680379e+04 7.62715392e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 1.16200630e+04 7.62715392e+03 0.00000000e+00]
Reward: -1  Episode Reward:  7
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   827.2893076  23178.15937811]
------
Step:54, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  -452.95359414 10129.39028184]
New Q values:  [   37.74111519  -168.92307549  -452.95359414 10674.46162154]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[22077.68502934  -180.6         3852.81245667  4453.74493219]
------
Step:55, Action:North
State  257
Old Q Values:  [22077.68502934  -180.6         3852.81245667  4453.74493219]
New Q values:  [21981.82357218  -180.6         3852.81245667  4453.74493219]
Reward: -1  Episode Reward:  5
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         34025.66630253 43837.83186815     0.        ]
------
Step:56, Action:East
State  179
Old Q Values:  [    0.         34025.66630253 43837.83186815     0.        ]
New Q values:  [    0.         34025.66630253 18466.46146429     0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  3.10642906e+03  2.79697622e+03]
------
Step:57, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  3.10642906e+03  2.79697622e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  2.92601219e+03  2.79697622e+03]
Reward: -1  Episode Reward:  3
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5.61346855e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:58, Action:North
State  208
Old Q Values:  [  992.9697336    757.56706381 -2651.70614553   803.31074191]
New Q values:  [21636.47709191   757.56706381 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  2
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  3512.58304545  -180.00807518 70799.63066158]
------
Step:59, Action:West
State  130
Old Q Values:  [18220.41077038  3512.58304545  -180.00807518 70799.63066158]
New Q values:  [18220.41077038  3512.58304545  -180.00807518 56441.00749949]
Reward: -1  Episode Reward:  1
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  9.37391841e+04]
------
Step:60, Action:West
State  114
Old Q Values:  [-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  9.37391841e+04]
New Q values:  [-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  1.09787042e+05]
Reward: 100009  Episode Reward:  100010
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   827.2893076  23178.15937811]
------
Step:1, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        8290.63257881  233.33860649]
New Q values:  [  16.82637525 -180.6        8290.63257881 1160.19572766]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  233.86175279 -6457.4598      3538.2009502  -2702.17995449]
------
Step:2, Action:East
State  260
Old Q Values:  [  233.86175279 -6457.4598      3538.2009502  -2702.17995449]
New Q values:  [  233.86175279 -6457.4598      8368.12819351 -2702.17995449]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   827.2893076  23178.15937811]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   827.2893076  23178.15937811]
New Q values:  [-2527.46239811 -6212.61234477  1357.84803129 23178.15937811]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x. .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  855.19195434 -8656.02923281 -7525.7277781   3405.10769418]
------
Step:4, Action:North
State  288
Old Q Values:  [  855.19195434 -8656.02923281 -7525.7277781   3405.10769418]
New Q values:  [ 2031.51734742 -8656.02923281 -7525.7277781   3405.10769418]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5.61346855e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:5, Action:North
State  208
Old Q Values:  [21636.47709191   757.56706381 -2651.70614553   803.31074191]
New Q values:  [25592.29308661   757.56706381 -2651.70614553   803.31074191]
Reward: 9  Episode Reward:  35
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  3512.58304545  -180.00807518 56441.00749949]
------
Step:6, Action:West
State  136
Old Q Values:  [-6180.6          778.60020424 -6245.61866138     7.52829833]
New Q values:  [-6180.6          778.60020424 -6245.61866138   942.60603078]
Reward: 9  Episode Reward:  44
xxxxx
xga x
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686        3113.9823715      49.8565746 ]
------
Step:7, Action:East
State  112
Old Q Values:  [    0.          1519.70944144 13703.34638233 84007.56      ]
New Q values:  [    0.          1519.70944144  7301.08474053 84007.56      ]
Reward: -1  Episode Reward:  43
xxxxx
x.gax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 6067.82062533  1504.73148864 -8652.84        4547.03370611]
------
Step:8, Action:North
State  136
Old Q Values:  [-6180.6          778.60020424 -6245.61866138   942.60603078]
New Q values:  [-2370.05819077   778.60020424 -6245.61866138   942.60603078]
Reward: -301  Episode Reward:  -258
xxxxx
xg ax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2370.05819077   778.60020424 -6245.61866138   942.60603078]
------
Step:9, Action:West
State  128
Old Q Values:  [ 6067.82062533  1504.73148864 -8652.84        4547.03370611]
New Q values:  [ 6067.82062533  1504.73148864 -8652.84       21020.48148244]
Reward: -10001  Episode Reward:  -10259
xxxxx
x.g x
x.  x
x   x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2031.51734742 -8656.02923281 -7525.7277781   3405.10769418]
------
Step:1, Action:North
State  288
Old Q Values:  [ 2031.51734742 -8656.02923281 -7525.7277781   3405.10769418]
New Q values:  [ 2502.04750466 -8656.02923281 -7525.7277781   3405.10769418]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5.61346855e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:2, Action:North
State  208
Old Q Values:  [25592.29308661   757.56706381 -2651.70614553   803.31074191]
New Q values:  [10690.99398776   757.56706381 -2651.70614553   803.31074191]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1495.58917706 -180.6         233.31403971]
------
Step:3, Action:West
State  136
Old Q Values:  [-2370.05819077   778.60020424 -6245.61866138   942.60603078]
New Q values:  [-2370.05819077   778.60020424 -6245.61866138   383.90892328]
Reward: 9  Episode Reward:  27
xxxxx
x.agx
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:4, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   679.18638189    16.28986435]
New Q values:  [ -253.44886264 -1902.20915811   679.18638189    24.38756513]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   41.57206465 -252.78192178]
------
Step:5, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   41.57206465 -252.78192178]
New Q values:  [-252.35169558   11.28108573  219.78474043 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   679.18638189    24.38756513]
------
Step:6, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  7.45077191e+02  4.07943170e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  7.46107629e+02  4.07943170e+00]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1495.58917706 -180.6         233.31403971]
------
Step:7, Action:South
State  138
Old Q Values:  [-180.6        1495.58917706 -180.6         233.31403971]
New Q values:  [-180.6        3804.93386715 -180.6         233.31403971]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10690.99398776   757.56706381 -2651.70614553   803.31074191]
------
Step:8, Action:North
State  210
Old Q Values:  [ 5.61346855e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 3.38626758e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        3804.93386715 -180.6         233.31403971]
------
Step:9, Action:South
State  138
Old Q Values:  [-180.6        3804.93386715 -180.6         233.31403971]
New Q values:  [-180.6        2898.02697207 -180.6         233.31403971]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3.38626758e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
------
Step:10, Action:South
State  210
Old Q Values:  [ 3.38626758e+03  4.58884475e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [3386.26758107 2856.47020853 -180.6           3.52184257]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2502.04750466 -8656.02923281 -7525.7277781   3405.10769418]
------
Step:11, Action:West
State  288
Old Q Values:  [ 2502.04750466 -8656.02923281 -7525.7277781   3405.10769418]
New Q values:  [ 2502.04750466 -8656.02923281 -7525.7277781   8314.89089111]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1357.84803129 23178.15937811]
------
Step:12, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1357.84803129 23178.15937811]
New Q values:  [-2527.46239811 -6212.61234477  1357.84803129 15871.2108229 ]
Reward: 9  Episode Reward:  38
xxxxx
x   x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[21981.82357218  -180.6         3852.81245667  4453.74493219]
------
Step:13, Action:North
State  260
Old Q Values:  [  233.86175279 -6457.4598      8368.12819351 -2702.17995449]
New Q values:  [-4187.30744448 -6457.4598      8368.12819351 -2702.17995449]
Reward: -9991  Episode Reward:  -9953
xxxxx
x   x
xg. x
x   x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   679.18638189    24.38756513]
------
Step:1, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   679.18638189    24.38756513]
New Q values:  [ -253.44886264 -1902.20915811  1146.48264438    24.38756513]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2898.02697207 -180.6         233.31403971]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6        2898.02697207 -180.6         233.31403971]
New Q values:  [-180.6        2174.49106315 -180.6         233.31403971]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3386.26758107 2856.47020853 -180.6           3.52184257]
------
Step:3, Action:North
State  210
Old Q Values:  [3386.26758107 2856.47020853 -180.6           3.52184257]
New Q values:  [2006.25435137 2856.47020853 -180.6           3.52184257]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2174.49106315 -180.6         233.31403971]
------
Step:4, Action:South
State  138
Old Q Values:  [-180.6        2174.49106315 -180.6         233.31403971]
New Q values:  [-180.6        1726.13748782 -180.6         233.31403971]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2006.25435137 2856.47020853 -180.6           3.52184257]
------
Step:5, Action:South
State  210
Old Q Values:  [2006.25435137 2856.47020853 -180.6           3.52184257]
New Q values:  [ 2.00625435e+03  3.64245535e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  15
xxxxx
x.  x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2502.04750466 -8656.02923281 -7525.7277781   8314.89089111]
------
Step:6, Action:West
State  288
Old Q Values:  [ 2502.04750466 -8656.02923281 -7525.7277781   8314.89089111]
New Q values:  [ 2502.04750466 -8656.02923281 -7525.7277781   2092.71960331]
Reward: -9991  Episode Reward:  -9976
xxxxx
x.  x
x.. x
x.g x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  569.14995564  -289.59534477 10375.75172279  -251.53897752]
------
Step:1, Action:East
State  261
Old Q Values:  [  569.14995564  -289.59534477 10375.75172279  -251.53897752]
New Q values:  [ 569.14995564 -289.59534477 2917.06393599 -251.53897752]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. .x
x g.x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-1.80600000e+02 -5.64889407e+03  3.06663838e+00 -1.80600000e+02]
------
Step:1, Action:East
State  110
Old Q Values:  [-1.80600000e+02 -5.64889407e+03  3.06663838e+00 -1.80600000e+02]
New Q values:  [ -180.6        -5648.89406634   230.45894417  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[-2.81736000e+02 -8.87787327e+03  7.46107629e+02  4.07943170e+00]
------
Step:2, Action:East
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  7.46107629e+02  4.07943170e+00]
New Q values:  [-2.81736000e+02 -8.87787327e+03  8.21684298e+02  4.07943170e+00]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1726.13748782 -180.6         233.31403971]
------
Step:3, Action:South
State  136
Old Q Values:  [-2370.05819077   778.60020424 -6245.61866138   383.90892328]
New Q values:  [-2370.05819077  1346.36724244 -6245.61866138   383.90892328]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2242.06034916  3431.75720248 -6170.35693855 -2387.54492731]
------
Step:4, Action:South
State  216
Old Q Values:  [ 2242.06034916  3431.75720248 -6170.35693855 -2387.54492731]
New Q values:  [ 2242.06034916  2128.71713239 -6170.35693855 -2387.54492731]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2502.04750466 -8656.02923281 -7525.7277781   2092.71960331]
------
Step:5, Action:North
State  288
Old Q Values:  [ 2502.04750466 -8656.02923281 -7525.7277781   2092.71960331]
New Q values:  [ 1672.83710661 -8656.02923281 -7525.7277781   2092.71960331]
Reward: -1  Episode Reward:  35
xxxxx
xg  x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2242.06034916  2128.71713239 -6170.35693855 -2387.54492731]
------
Step:6, Action:North
State  216
Old Q Values:  [ 2242.06034916  2128.71713239 -6170.35693855 -2387.54492731]
New Q values:  [ 1300.1343124   2128.71713239 -6170.35693855 -2387.54492731]
Reward: -1  Episode Reward:  34
xxxxx
x gax
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2370.05819077  1346.36724244 -6245.61866138   383.90892328]
------
Step:7, Action:South
State  138
Old Q Values:  [-180.6        1726.13748782 -180.6         233.31403971]
New Q values:  [-180.6        1328.47013484 -180.6         233.31403971]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1300.1343124   2128.71713239 -6170.35693855 -2387.54492731]
------
Step:8, Action:South
State  210
Old Q Values:  [ 2.00625435e+03  3.64245535e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [2006.25435137 2084.19802129 -180.6           3.52184257]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.83710661 -8656.02923281 -7525.7277781   2092.71960331]
------
Step:9, Action:North
State  288
Old Q Values:  [ 1672.83710661 -8656.02923281 -7525.7277781   2092.71960331]
New Q values:  [ 1246.73658908 -8656.02923281 -7525.7277781   2092.71960331]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x .ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[   4.59845327 -610.30346672    0.         1927.33915479]
------
Step:10, Action:West
State  218
Old Q Values:  [   4.59845327 -610.30346672    0.         1927.33915479]
New Q values:  [   4.59845327 -610.30346672    0.          784.96322284]
Reward: 9  Episode Reward:  40
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    28.75853641     0.        ]
------
Step:11, Action:East
State  200
Old Q Values:  [ 6.28218634e+01  4.04386150e+03  1.93990569e+02 -8.40000000e-01]
New Q values:  [ 6.28218634e+01  4.04386150e+03  7.15611367e+02 -8.40000000e-01]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1300.1343124   2128.71713239 -6170.35693855 -2387.54492731]
------
Step:12, Action:South
State  216
Old Q Values:  [ 1300.1343124   2128.71713239 -6170.35693855 -2387.54492731]
New Q values:  [ 1300.1343124   1478.70273395 -6170.35693855 -2387.54492731]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1246.73658908 -8656.02923281 -7525.7277781   2092.71960331]
------
Step:13, Action:West
State  288
Old Q Values:  [ 1246.73658908 -8656.02923281 -7525.7277781   2092.71960331]
New Q values:  [ 1246.73658908 -8656.02923281 -7525.7277781   5603.85108819]
Reward: 9  Episode Reward:  47
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1357.84803129 15871.2108229 ]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1357.84803129 15871.2108229 ]
New Q values:  [-2527.46239811 -6212.61234477  1357.84803129 67487.39586301]
Reward: 100009  Episode Reward:  100056
xxxxx
x   x
xg  x
xa  x
xxxxx
xxxxx
x...x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4187.30744448 -6457.4598      8368.12819351 -2702.17995449]
------
Step:1, Action:East
State  261
Old Q Values:  [ 569.14995564 -289.59534477 2917.06393599 -251.53897752]
New Q values:  [ 569.14995564 -289.59534477 3659.41534804 -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881 1160.19572766]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1357.84803129 67487.39586301]
New Q values:  [-2527.46239811 -6212.61234477  2229.69453898 67487.39586301]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x ..x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1246.73658908 -8656.02923281 -7525.7277781   5603.85108819]
------
Step:3, Action:North
State  288
Old Q Values:  [ 1246.73658908 -8656.02923281 -7525.7277781   5603.85108819]
New Q values:  [ 1129.35404202 -8656.02923281 -7525.7277781   5603.85108819]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2006.25435137 2084.19802129 -180.6           3.52184257]
------
Step:4, Action:North
State  208
Old Q Values:  [10690.99398776   757.56706381 -2651.70614553   803.31074191]
New Q values:  [21214.09984495   757.56706381 -2651.70614553   803.31074191]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  3512.58304545  -180.00807518 56441.00749949]
------
Step:5, Action:West
State  130
Old Q Values:  [18220.41077038  3512.58304545  -180.00807518 56441.00749949]
New Q values:  [18220.41077038  3512.58304545  -180.00807518 52229.93681651]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  9.88271127e+04]
------
Step:6, Action:West
State  122
Old Q Values:  [-2.81736000e+02 -8.87787327e+03  8.21684298e+02  4.07943170e+00]
New Q values:  [ -281.736      -8877.87327254   821.6842981    265.75897468]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  645.55489711  862.42400667 -272.09726687]
------
Step:7, Action:East
State  110
Old Q Values:  [ -180.6        -5648.89406634   230.45894417  -180.6       ]
New Q values:  [ -180.6        -5648.89406634   338.0888671   -180.6       ]
Reward: -1  Episode Reward:  53
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   821.6842981    265.75897468]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   821.6842981    265.75897468]
New Q values:  [ -281.736      -8877.87327254   726.61475969   265.75897468]
Reward: -1  Episode Reward:  52
xxxxx
x  ax
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1328.47013484 -180.6         233.31403971]
------
Step:9, Action:South
State  138
Old Q Values:  [-180.6        1328.47013484 -180.6         233.31403971]
New Q values:  [-180.6        1156.04746032 -180.6         233.31403971]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x .ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2006.25435137 2084.19802129 -180.6           3.52184257]
------
Step:10, Action:South
State  208
Old Q Values:  [21214.09984495   757.56706381 -2651.70614553   803.31074191]
New Q values:  [21214.09984495  1983.58215198 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1129.35404202 -8656.02923281 -7525.7277781   5603.85108819]
------
Step:11, Action:West
State  288
Old Q Values:  [ 1129.35404202 -8656.02923281 -7525.7277781   5603.85108819]
New Q values:  [ 1129.35404202 -8656.02923281 -7525.7277781  22487.15919418]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  2229.69453898 67487.39586301]
------
Step:12, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2229.69453898 67487.39586301]
New Q values:  [-2527.46239811 -6212.61234477  2229.69453898 29504.79680326]
Reward: -1  Episode Reward:  48
xxxxx
xg  x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4187.30744448 -6457.4598      8368.12819351 -2702.17995449]
------
Step:13, Action:East
State  260
Old Q Values:  [-4187.30744448 -6457.4598      8368.12819351 -2702.17995449]
New Q values:  [-4187.30744448 -6457.4598     12198.09031838 -2702.17995449]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  2229.69453898 29504.79680326]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2229.69453898 29504.79680326]
New Q values:  [-2527.46239811 -6212.61234477  2229.69453898 18395.86579296]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[21981.82357218  -180.6         3852.81245667  4453.74493219]
------
Step:15, Action:North
State  261
Old Q Values:  [ 569.14995564 -289.59534477 3659.41534804 -251.53897752]
New Q values:  [1740.91042006 -289.59534477 3659.41534804 -251.53897752]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791   20.83123995 5046.168126      0.        ]
------
Step:16, Action:East
State  178
Old Q Values:  [  0.           0.         667.43811653   0.        ]
New Q values:  [    0.             0.         61150.17890313     0.        ]
Reward: 100009  Episode Reward:  100054
xxxxx
x   x
x a x
xg  x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686        3113.9823715      49.8565746 ]
------
Step:1, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686        3113.9823715      49.8565746 ]
New Q values:  [-10156.11771313  -5995.686        1654.90312133     49.8565746 ]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x...x
x...x
xxxxx
Step:2, Action:North
State  136
Old Q Values:  [-2370.05819077  1346.36724244 -6245.61866138   383.90892328]
New Q values:  [ -724.71310357  1346.36724244 -6245.61866138   383.90892328]
Reward: -301  Episode Reward:  -292
xxxxx
xg ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1346.36724244 -6245.61866138   383.90892328]
------
Step:3, Action:South
State  136
Old Q Values:  [ -724.71310357  1346.36724244 -6245.61866138   383.90892328]
New Q values:  [ -724.71310357   987.55771716 -6245.61866138   383.90892328]
Reward: 9  Episode Reward:  -283
xxxxx
x g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1300.1343124   1478.70273395 -6170.35693855 -2387.54492731]
------
Step:4, Action:South
State  208
Old Q Values:  [21214.09984495  1983.58215198 -2651.70614553   803.31074191]
New Q values:  [21214.09984495  7544.98061905 -2651.70614553   803.31074191]
Reward: 9  Episode Reward:  -274
xxxxx
x  gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1129.35404202 -8656.02923281 -7525.7277781  22487.15919418]
------
Step:5, Action:West
State  288
Old Q Values:  [ 1129.35404202 -8656.02923281 -7525.7277781  22487.15919418]
New Q values:  [ 1129.35404202 -8656.02923281 -7525.7277781  14519.02341556]
Reward: 9  Episode Reward:  -265
xxxxx
x g x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  2229.69453898 18395.86579296]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2229.69453898 18395.86579296]
New Q values:  [-2527.46239811 -6212.61234477  2229.69453898  8497.25785103]
Reward: 9  Episode Reward:  -256
xxxxx
xg  x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[  205.63143496     0.          3778.3717795  -2468.58126138]
------
Step:7, Action:East
State  260
Old Q Values:  [-4187.30744448 -6457.4598     12198.09031838 -2702.17995449]
New Q values:  [-4187.30744448 -6457.4598      7427.81348266 -2702.17995449]
Reward: -1  Episode Reward:  -257
xxxxx
x   x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  2229.69453898  8497.25785103]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2229.69453898  8497.25785103]
New Q values:  [-2527.46239811 -6212.61234477  2229.69453898  9992.85021207]
Reward: -1  Episode Reward:  -258
xxxxx
x   x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[21981.82357218  -180.6         3852.81245667  4453.74493219]
------
Step:9, Action:North
State  261
Old Q Values:  [1740.91042006 -289.59534477 3659.41534804 -251.53897752]
New Q values:  [ 903.00413515 -289.59534477 3659.41534804 -251.53897752]
Reward: 9  Episode Reward:  -249
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   670.79989043 -1867.36199304  -180.6       ]
------
Step:10, Action:South
State  183
Old Q Values:  [  22.25138791   20.83123995 5046.168126      0.        ]
New Q values:  [  22.25138791 1105.55710039 5046.168126      0.        ]
Reward: -1  Episode Reward:  -250
xxxxx
x   x
x . x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 903.00413515 -289.59534477 3659.41534804 -251.53897752]
------
Step:11, Action:East
State  261
Old Q Values:  [ 903.00413515 -289.59534477 3659.41534804 -251.53897752]
New Q values:  [  903.00413515  -289.59534477 -1538.97879716  -251.53897752]
Reward: -10001  Episode Reward:  -10251
xxxxx
x   x
x . x
x g x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21214.09984495  7544.98061905 -2651.70614553   803.31074191]
------
Step:1, Action:North
State  216
Old Q Values:  [ 1300.1343124   1478.70273395 -6170.35693855 -2387.54492731]
New Q values:  [  872.26796306  1478.70273395 -6170.35693855 -2387.54492731]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1156.04746032 -180.6         233.31403971]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6        1156.04746032 -180.6         233.31403971]
New Q values:  [-180.6        1087.07839052 -180.6         233.31403971]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2006.25435137 2084.19802129 -180.6           3.52184257]
------
Step:3, Action:South
State  210
Old Q Values:  [2006.25435137 2084.19802129 -180.6           3.52184257]
New Q values:  [2006.25435137 -805.21376682 -180.6           3.52184257]
Reward: -9991  Episode Reward:  -9983
xxxxx
x.. x
x . x
x..gx
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1129.35404202 -8656.02923281 -7525.7277781  14519.02341556]
------
Step:1, Action:West
State  288
Old Q Values:  [ 1129.35404202 -8656.02923281 -7525.7277781  14519.02341556]
New Q values:  [ 1129.35404202 -8656.02923281 -7525.7277781   9015.34785269]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  -452.95359414 10674.46162154]
------
Step:2, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  -452.95359414 10674.46162154]
New Q values:  [  37.74111519 -168.92307549 -452.95359414 4546.08588916]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  903.00413515  -289.59534477 -1538.97879716  -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [  903.00413515  -289.59534477 -1538.97879716  -251.53897752]
New Q values:  [  567.84162119  -289.59534477 -1538.97879716  -251.53897752]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   670.79989043 -1867.36199304  -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [   13.85659648   670.79989043 -1867.36199304  -180.6       ]
New Q values:  [   13.85659648   438.07244253 -1867.36199304  -180.6       ]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  567.84162119  -289.59534477 -1538.97879716  -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [  567.84162119  -289.59534477 -1538.97879716  -251.53897752]
New Q values:  [  357.95838124  -289.59534477 -1538.97879716  -251.53897752]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   438.07244253 -1867.36199304  -180.6       ]
------
Step:6, Action:South
State  181
Old Q Values:  [   13.85659648   438.07244253 -1867.36199304  -180.6       ]
New Q values:  [   13.85659648   282.01649138 -1867.36199304  -180.6       ]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  357.95838124  -289.59534477 -1538.97879716  -251.53897752]
------
Step:7, Action:North
State  261
Old Q Values:  [  357.95838124  -289.59534477 -1538.97879716  -251.53897752]
New Q values:  [  227.18829991  -289.59534477 -1538.97879716  -251.53897752]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   282.01649138 -1867.36199304  -180.6       ]
------
Step:8, Action:South
State  183
Old Q Values:  [  22.25138791 1105.55710039 5046.168126      0.        ]
New Q values:  [  22.25138791  509.77933013 5046.168126      0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
x ..x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  227.18829991  -289.59534477 -1538.97879716  -251.53897752]
------
Step:9, Action:North
State  261
Old Q Values:  [  227.18829991  -289.59534477 -1538.97879716  -251.53897752]
New Q values:  [  174.88026738  -289.59534477 -1538.97879716  -251.53897752]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   282.01649138 -1867.36199304  -180.6       ]
------
Step:10, Action:South
State  183
Old Q Values:  [  22.25138791  509.77933013 5046.168126      0.        ]
New Q values:  [  22.25138791  255.77581227 5046.168126      0.        ]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
x ..x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  174.88026738  -289.59534477 -1538.97879716  -251.53897752]
------
Step:11, Action:North
State  261
Old Q Values:  [  174.88026738  -289.59534477 -1538.97879716  -251.53897752]
New Q values:  [ 1583.20254475  -289.59534477 -1538.97879716  -251.53897752]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  255.77581227 5046.168126      0.        ]
------
Step:12, Action:East
State  181
Old Q Values:  [   13.85659648   282.01649138 -1867.36199304  -180.6       ]
New Q values:  [   13.85659648   282.01649138 -3255.52590138  -180.6       ]
Reward: -9991  Episode Reward:  -9972
xxxxx
x.. x
x g.x
x   x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   987.55771716 -6245.61866138   383.90892328]
------
Step:1, Action:South
State  136
Old Q Values:  [ -724.71310357   987.55771716 -6245.61866138   383.90892328]
New Q values:  [ -724.71310357   844.03390705 -6245.61866138   383.90892328]
Reward: 9  Episode Reward:  9
xxxxx
x g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  872.26796306  1478.70273395 -6170.35693855 -2387.54492731]
------
Step:2, Action:South
State  208
Old Q Values:  [21214.09984495  7544.98061905 -2651.70614553   803.31074191]
New Q values:  [21214.09984495  5727.99660342 -2651.70614553   803.31074191]
Reward: 9  Episode Reward:  18
xxxxx
x . x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1129.35404202 -8656.02923281 -7525.7277781   9015.34785269]
------
Step:3, Action:West
State  288
Old Q Values:  [ 1129.35404202 -8656.02923281 -7525.7277781   9015.34785269]
New Q values:  [ 1129.35404202 -8656.02923281 -7525.7277781    609.39420469]
Reward: -9991  Episode Reward:  -9973
xxxxx
x . x
x.. x
x.g x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2006.25435137 -805.21376682 -180.6           3.52184257]
------
Step:1, Action:North
State  210
Old Q Values:  [2006.25435137 -805.21376682 -180.6           3.52184257]
New Q values:  [1134.0252577  -805.21376682 -180.6           3.52184257]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1087.07839052 -180.6         233.31403971]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6        1087.07839052 -180.6         233.31403971]
New Q values:  [-180.6         774.43893352 -180.6         233.31403971]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1134.0252577  -805.21376682 -180.6           3.52184257]
------
Step:3, Action:North
State  210
Old Q Values:  [1134.0252577  -805.21376682 -180.6           3.52184257]
New Q values:  [ 685.34178314 -805.21376682 -180.6           3.52184257]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         774.43893352 -180.6         233.31403971]
------
Step:4, Action:South
State  138
Old Q Values:  [-180.6         774.43893352 -180.6         233.31403971]
New Q values:  [-180.6        6673.40552689 -180.6         233.31403971]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21214.09984495  5727.99660342 -2651.70614553   803.31074191]
------
Step:5, Action:North
State  208
Old Q Values:  [21214.09984495  5727.99660342 -2651.70614553   803.31074191]
New Q values:  [10487.06159605  5727.99660342 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        6673.40552689 -180.6         233.31403971]
------
Step:6, Action:West
State  138
Old Q Values:  [-180.6        6673.40552689 -180.6         233.31403971]
New Q values:  [-180.6        6673.40552689 -180.6         442.6704092 ]
Reward: 9  Episode Reward:  14
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1146.48264438    24.38756513]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   726.61475969   265.75897468]
New Q values:  [ -281.736      -8877.87327254  2292.06756194   265.75897468]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        6673.40552689 -180.6         442.6704092 ]
------
Step:8, Action:South
State  138
Old Q Values:  [-180.6        6673.40552689 -180.6         442.6704092 ]
New Q values:  [-180.6       2874.3647457 -180.6        442.6704092]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 685.34178314 -805.21376682 -180.6           3.52184257]
------
Step:9, Action:North
State  208
Old Q Values:  [10487.06159605  5727.99660342 -2651.70614553   803.31074191]
New Q values:  [ 5056.53406213  5727.99660342 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       2874.3647457 -180.6        442.6704092]
------
Step:10, Action:South
State  138
Old Q Values:  [-180.6       2874.3647457 -180.6        442.6704092]
New Q values:  [-180.6        1354.74843322 -180.6         442.6704092 ]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 685.34178314 -805.21376682 -180.6           3.52184257]
------
Step:11, Action:North
State  208
Old Q Values:  [ 5056.53406213  5727.99660342 -2651.70614553   803.31074191]
New Q values:  [ 2428.43815482  5727.99660342 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  9
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1354.74843322 -180.6         442.6704092 ]
------
Step:12, Action:South
State  138
Old Q Values:  [-180.6        1354.74843322 -180.6         442.6704092 ]
New Q values:  [-180.6        2259.69835432 -180.6         442.6704092 ]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2428.43815482  5727.99660342 -2651.70614553   803.31074191]
------
Step:13, Action:South
State  208
Old Q Values:  [ 2428.43815482  5727.99660342 -2651.70614553   803.31074191]
New Q values:  [ 2428.43815482  2629.40485398 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1129.35404202 -8656.02923281 -7525.7277781    609.39420469]
------
Step:14, Action:North
State  288
Old Q Values:  [ 1129.35404202 -8656.02923281 -7525.7277781    609.39420469]
New Q values:  [ 1239.963073   -8656.02923281 -7525.7277781    609.39420469]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2428.43815482  2629.40485398 -2651.70614553   803.31074191]
------
Step:15, Action:South
State  208
Old Q Values:  [ 2428.43815482  2629.40485398 -2651.70614553   803.31074191]
New Q values:  [ 2428.43815482  1423.15086349 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  5
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1239.963073   -8656.02923281 -7525.7277781    609.39420469]
------
Step:16, Action:North
State  288
Old Q Values:  [ 1239.963073   -8656.02923281 -7525.7277781    609.39420469]
New Q values:  [ 1223.91667565 -8656.02923281 -7525.7277781    609.39420469]
Reward: -1  Episode Reward:  4
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2428.43815482  1423.15086349 -2651.70614553   803.31074191]
------
Step:17, Action:North
State  208
Old Q Values:  [ 2428.43815482  1423.15086349 -2651.70614553   803.31074191]
New Q values:  [ 1648.68476822  1423.15086349 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  3
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2259.69835432 -180.6         442.6704092 ]
------
Step:18, Action:South
State  138
Old Q Values:  [-180.6        2259.69835432 -180.6         442.6704092 ]
New Q values:  [-180.6        1108.88187667 -180.6         442.6704092 ]
Reward: -1  Episode Reward:  2
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 685.34178314 -805.21376682 -180.6           3.52184257]
------
Step:19, Action:North
State  208
Old Q Values:  [ 1648.68476822  1423.15086349 -2651.70614553   803.31074191]
New Q values:  [  991.53847029  1423.15086349 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  1
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1108.88187667 -180.6         442.6704092 ]
------
Step:20, Action:South
State  138
Old Q Values:  [-180.6        1108.88187667 -180.6         442.6704092 ]
New Q values:  [-180.6         869.89800971 -180.6         442.6704092 ]
Reward: -1  Episode Reward:  0
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  991.53847029  1423.15086349 -2651.70614553   803.31074191]
------
Step:21, Action:South
State  208
Old Q Values:  [  991.53847029  1423.15086349 -2651.70614553   803.31074191]
New Q values:  [  991.53847029   935.83534809 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  -1
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1223.91667565 -8656.02923281 -7525.7277781    609.39420469]
------
Step:22, Action:North
State  288
Old Q Values:  [ 1223.91667565 -8656.02923281 -7525.7277781    609.39420469]
New Q values:  [  786.42821134 -8656.02923281 -7525.7277781    609.39420469]
Reward: -1  Episode Reward:  -2
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  991.53847029   935.83534809 -2651.70614553   803.31074191]
------
Step:23, Action:South
State  208
Old Q Values:  [  991.53847029   935.83534809 -2651.70614553   803.31074191]
New Q values:  [  991.53847029   609.66260264 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  -3
xxxxx
x.  x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  786.42821134 -8656.02923281 -7525.7277781    609.39420469]
------
Step:24, Action:West
State  288
Old Q Values:  [  786.42821134 -8656.02923281 -7525.7277781    609.39420469]
New Q values:  [  786.42821134 -8656.02923281 -7525.7277781   1612.98344863]
Reward: 9  Episode Reward:  6
xxxxx
x.  x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 4546.08588916]
------
Step:25, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414 4546.08588916]
New Q values:  [  37.74111519 -168.92307549 -452.95359414 8418.38142732]
Reward: 9  Episode Reward:  15
xxxxx
x.  x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[21981.82357218  -180.6         3852.81245667  4453.74493219]
------
Step:26, Action:North
State  257
Old Q Values:  [21981.82357218  -180.6         3852.81245667  4453.74493219]
New Q values:  [19005.82931963  -180.6         3852.81245667  4453.74493219]
Reward: 9  Episode Reward:  24
xxxxx
x.  x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         34025.66630253 18466.46146429     0.        ]
------
Step:27, Action:South
State  177
Old Q Values:  [    0.         17101.20554202 15504.78527399     0.        ]
New Q values:  [    0.         12541.6310127  15504.78527399     0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[19005.82931963  -180.6         3852.81245667  4453.74493219]
------
Step:28, Action:North
State  257
Old Q Values:  [19005.82931963  -180.6         3852.81245667  4453.74493219]
New Q values:  [12253.16731005  -180.6         3852.81245667  4453.74493219]
Reward: -1  Episode Reward:  22
xxxxx
x. gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         12541.6310127  15504.78527399     0.        ]
------
Step:29, Action:East
State  177
Old Q Values:  [    0.         12541.6310127  15504.78527399     0.        ]
New Q values:  [    0.         12541.6310127   9693.33300543     0.        ]
Reward: 9  Episode Reward:  31
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.16200630e+04 7.62715392e+03 0.00000000e+00]
------
Step:30, Action:South
State  193
Old Q Values:  [-5922.26708831  7053.84308572 -3909.58186816  2096.80577447]
New Q values:  [-5922.26708831  5346.45166248 -3909.58186816  2096.80577447]
Reward: -1  Episode Reward:  30
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 8418.38142732]
------
Step:31, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2229.69453898  9992.85021207]
New Q values:  [-2527.46239811 -6212.61234477  2229.69453898  7672.49027784]
Reward: -1  Episode Reward:  29
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[12253.16731005  -180.6         3852.81245667  4453.74493219]
------
Step:32, Action:North
State  256
Old Q Values:  [  205.63143496     0.          3778.3717795  -2468.58126138]
New Q values:  [ 6043.26786367     0.          3778.3717795  -2468.58126138]
Reward: -1  Episode Reward:  28
xxxxx
xg  x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NW
[    0.           501.88489382 19872.05096563     0.        ]
------
Step:33, Action:East
State  176
Old Q Values:  [    0.           501.88489382 19872.05096563     0.        ]
New Q values:  [    0.           501.88489382 11434.23928209     0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x.  x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.16200630e+04 7.62715392e+03 0.00000000e+00]
------
Step:34, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.16200630e+04 7.62715392e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 6.94917228e+03 7.62715392e+03 0.00000000e+00]
Reward: -1  Episode Reward:  26
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  2229.69453898  7672.49027784]
------
Step:35, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2229.69453898  7672.49027784]
New Q values:  [-2527.46239811 -6212.61234477  2229.69453898  6744.34630415]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[12253.16731005  -180.6         3852.81245667  4453.74493219]
------
Step:36, Action:North
State  257
Old Q Values:  [12253.16731005  -180.6         3852.81245667  4453.74493219]
New Q values:  [8663.15622783 -180.6        3852.81245667 4453.74493219]
Reward: -1  Episode Reward:  24
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         12541.6310127   9693.33300543     0.        ]
------
Step:37, Action:South
State  177
Old Q Values:  [    0.         12541.6310127   9693.33300543     0.        ]
New Q values:  [   0.         7614.99927343 9693.33300543    0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x. gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[8663.15622783 -180.6        3852.81245667 4453.74493219]
------
Step:38, Action:North
State  257
Old Q Values:  [8663.15622783 -180.6        3852.81245667 4453.74493219]
New Q values:  [6372.66239276 -180.6        3852.81245667 4453.74493219]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         7614.99927343 9693.33300543    0.        ]
------
Step:39, Action:East
State  179
Old Q Values:  [    0.         34025.66630253 18466.46146429     0.        ]
New Q values:  [    0.         34025.66630253 15437.79452068     0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  3081.56358951  1388.1600337  26839.36644989]
------
Step:40, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  2.92601219e+03  2.79697622e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  2.92601219e+03  1.13258904e+04]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         34025.66630253 15437.79452068     0.        ]
------
Step:41, Action:South
State  177
Old Q Values:  [   0.         7614.99927343 9693.33300543    0.        ]
New Q values:  [   0.         4957.1984272  9693.33300543    0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[6372.66239276 -180.6        3852.81245667 4453.74493219]
------
Step:42, Action:North
State  257
Old Q Values:  [6372.66239276 -180.6        3852.81245667 4453.74493219]
New Q values:  [5456.46485873 -180.6        3852.81245667 4453.74493219]
Reward: -1  Episode Reward:  18
xxxxx
x.g x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         4957.1984272  9693.33300543    0.        ]
------
Step:43, Action:East
State  177
Old Q Values:  [   0.         4957.1984272  9693.33300543    0.        ]
New Q values:  [   0.         4957.1984272  5480.66870092    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x. gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5346.45166248 -3909.58186816  2096.80577447]
------
Step:44, Action:South
State  193
Old Q Values:  [-5922.26708831  5346.45166248 -3909.58186816  2096.80577447]
New Q values:  [-5922.26708831  4663.49509319 -3909.58186816  2096.80577447]
Reward: -1  Episode Reward:  16
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 8418.38142732]
------
Step:45, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2229.69453898  6744.34630415]
New Q values:  [-2527.46239811 -6212.61234477  2229.69453898  4334.07797928]
Reward: -1  Episode Reward:  15
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[5456.46485873 -180.6        3852.81245667 4453.74493219]
------
Step:46, Action:North
State  257
Old Q Values:  [5456.46485873 -180.6        3852.81245667 4453.74493219]
New Q values:  [3826.18655377 -180.6        3852.81245667 4453.74493219]
Reward: -1  Episode Reward:  14
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         4957.1984272  5480.66870092    0.        ]
------
Step:47, Action:East
State  177
Old Q Values:  [   0.         4957.1984272  5480.66870092    0.        ]
New Q values:  [   0.         4957.1984272  3590.71600832    0.        ]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4663.49509319 -3909.58186816  2096.80577447]
------
Step:48, Action:South
State  193
Old Q Values:  [-5922.26708831  4663.49509319 -3909.58186816  2096.80577447]
New Q values:  [-5922.26708831  4390.31246547 -3909.58186816  2096.80577447]
Reward: -1  Episode Reward:  12
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 8418.38142732]
------
Step:49, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2229.69453898  4334.07797928]
New Q values:  [-2527.46239811 -6212.61234477  2229.69453898  3069.15467137]
Reward: -1  Episode Reward:  11
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[3826.18655377 -180.6        3852.81245667 4453.74493219]
------
Step:50, Action:West
State  256
Old Q Values:  [ 6043.26786367     0.          3778.3717795  -2468.58126138]
New Q values:  [6043.26786367    0.         3778.3717795   644.94785455]
Reward: -301  Episode Reward:  -290
xxxxx
xg  x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[6043.26786367    0.         3778.3717795   644.94785455]
------
Step:51, Action:North
State  257
Old Q Values:  [3826.18655377 -180.6        3852.81245667 4453.74493219]
New Q values:  [3017.03414967 -180.6        3852.81245667 4453.74493219]
Reward: -1  Episode Reward:  -291
xxxxx
x.g x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         4957.1984272  3590.71600832    0.        ]
------
Step:52, Action:South
State  177
Old Q Values:  [   0.         4957.1984272  3590.71600832    0.        ]
New Q values:  [   0.         3318.40285054 3590.71600832    0.        ]
Reward: -1  Episode Reward:  -292
xxxxx
x. gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[3017.03414967 -180.6        3852.81245667 4453.74493219]
------
Step:53, Action:West
State  257
Old Q Values:  [3017.03414967 -180.6        3852.81245667 4453.74493219]
New Q values:  [3017.03414967 -180.6        3852.81245667 2937.02145253]
Reward: -301  Episode Reward:  -593
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[3017.03414967 -180.6        3852.81245667 2937.02145253]
------
Step:54, Action:East
State  256
Old Q Values:  [6043.26786367    0.         3778.3717795   644.94785455]
New Q values:  [6043.26786367    0.         2431.49511321  644.94785455]
Reward: -1  Episode Reward:  -594
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  2229.69453898  3069.15467137]
------
Step:55, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2229.69453898  3069.15467137]
New Q values:  [-2527.46239811 -6212.61234477  2229.69453898  3040.04222765]
Reward: -1  Episode Reward:  -595
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[6043.26786367    0.         2431.49511321  644.94785455]
------
Step:56, Action:East
State  256
Old Q Values:  [6043.26786367    0.         2431.49511321  644.94785455]
New Q values:  [6043.26786367    0.         1884.01071358  644.94785455]
Reward: -1  Episode Reward:  -596
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  2229.69453898  3040.04222765]
------
Step:57, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2229.69453898  3040.04222765]
New Q values:  [-2527.46239811 -6212.61234477  1375.17285018  3040.04222765]
Reward: -1  Episode Reward:  -597
xxxxx
x.  x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  786.42821134 -8656.02923281 -7525.7277781   1612.98344863]
------
Step:58, Action:West
State  288
Old Q Values:  [  786.42821134 -8656.02923281 -7525.7277781   1612.98344863]
New Q values:  [  786.42821134 -8656.02923281 -7525.7277781   1556.60604775]
Reward: -1  Episode Reward:  -598
xxxxx
x.  x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1375.17285018  3040.04222765]
------
Step:59, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1375.17285018  3040.04222765]
New Q values:  [-2527.46239811 -6212.61234477  1375.17285018  3028.39725016]
Reward: -1  Episode Reward:  -599
xxxxx
xg  x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[6043.26786367    0.         1884.01071358  644.94785455]
------
Step:60, Action:North
State  256
Old Q Values:  [6043.26786367    0.         1884.01071358  644.94785455]
New Q values:  [-153.0210699     0.         1884.01071358  644.94785455]
Reward: -10001  Episode Reward:  -10600
xxxxx
x.  x
xg  x
x   x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816  2096.80577447]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.94917228e+03 7.62715392e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 3.69358809e+03 7.62715392e+03 0.00000000e+00]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1375.17285018  3028.39725016]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1375.17285018  3028.39725016]
New Q values:  [-2527.46239811 -6212.61234477  1375.17285018  3445.10294486]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4187.30744448 -6457.4598      7427.81348266 -2702.17995449]
------
Step:3, Action:East
State  260
Old Q Values:  [-4187.30744448 -6457.4598      7427.81348266 -2702.17995449]
New Q values:  [-4187.30744448 -6457.4598      4004.05627652 -2702.17995449]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1375.17285018  3445.10294486]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1375.17285018  3445.10294486]
New Q values:  [-2527.46239811 -6212.61234477  1375.17285018 -3421.3419391 ]
Reward: -10001  Episode Reward:  -9984
xxxxx
x.. x
x. .x
xg .x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1583.20254475  -289.59534477 -1538.97879716  -251.53897752]
------
Step:1, Action:North
State  261
Old Q Values:  [ 1583.20254475  -289.59534477 -1538.97879716  -251.53897752]
New Q values:  [  723.28596532  -289.59534477 -1538.97879716  -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   282.01649138 -3255.52590138  -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [   13.85659648   282.01649138 -3255.52590138  -180.6       ]
New Q values:  [   13.85659648   329.19238615 -3255.52590138  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  723.28596532  -289.59534477 -1538.97879716  -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [  723.28596532  -289.59534477 -1538.97879716  -251.53897752]
New Q values:  [  387.47210197  -289.59534477 -1538.97879716  -251.53897752]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   329.19238615 -3255.52590138  -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [   13.85659648   329.19238615 -3255.52590138  -180.6       ]
New Q values:  [   13.85659648   247.31858505 -3255.52590138  -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  387.47210197  -289.59534477 -1538.97879716  -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [  387.47210197  -289.59534477 -1538.97879716  -251.53897752]
New Q values:  [  228.5844163   -289.59534477 -1538.97879716  -251.53897752]
Reward: -1  Episode Reward:  5
xxxxx
x.g.x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   247.31858505 -3255.52590138  -180.6       ]
------
Step:6, Action:South
State  181
Old Q Values:  [   13.85659648   247.31858505 -3255.52590138  -180.6       ]
New Q values:  [   13.85659648   166.90275891 -3255.52590138  -180.6       ]
Reward: -1  Episode Reward:  4
xxxxx
x..gx
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  228.5844163   -289.59534477 -1538.97879716  -251.53897752]
------
Step:7, Action:North
State  261
Old Q Values:  [  228.5844163   -289.59534477 -1538.97879716  -251.53897752]
New Q values:  [  140.90459419  -289.59534477 -1538.97879716  -251.53897752]
Reward: -1  Episode Reward:  3
xxxxx
x...x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   166.90275891 -3255.52590138  -180.6       ]
------
Step:8, Action:South
State  181
Old Q Values:  [   13.85659648   166.90275891 -3255.52590138  -180.6       ]
New Q values:  [   13.85659648   108.43248182 -3255.52590138  -180.6       ]
Reward: -1  Episode Reward:  2
xxxxx
x..gx
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  140.90459419  -289.59534477 -1538.97879716  -251.53897752]
------
Step:9, Action:North
State  261
Old Q Values:  [  140.90459419  -289.59534477 -1538.97879716  -251.53897752]
New Q values:  [   88.29158222  -289.59534477 -1538.97879716  -251.53897752]
Reward: -1  Episode Reward:  1
xxxxx
x...x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   108.43248182 -3255.52590138  -180.6       ]
------
Step:10, Action:South
State  181
Old Q Values:  [   13.85659648   108.43248182 -3255.52590138  -180.6       ]
New Q values:  [   13.85659648    69.2604674  -3255.52590138  -180.6       ]
Reward: -1  Episode Reward:  0
xxxxx
x..gx
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   88.29158222  -289.59534477 -1538.97879716  -251.53897752]
------
Step:11, Action:North
State  261
Old Q Values:  [   88.29158222  -289.59534477 -1538.97879716  -251.53897752]
New Q values:  [   55.49477311  -289.59534477 -1538.97879716  -251.53897752]
Reward: -1  Episode Reward:  -1
xxxxx
x.g.x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    69.2604674  -3255.52590138  -180.6       ]
------
Step:12, Action:South
State  180
Old Q Values:  [-1367.02476015  1116.33624525  5712.49284801     0.        ]
New Q values:  [-1367.02476015  1647.15138106  5712.49284801     0.        ]
Reward: -1  Episode Reward:  -2
xxxxx
xg..x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4187.30744448 -6457.4598      4004.05627652 -2702.17995449]
------
Step:13, Action:East
State  261
Old Q Values:  [   55.49477311  -289.59534477 -1538.97879716  -251.53897752]
New Q values:  [  55.49477311 -289.59534477 1876.99825478 -251.53897752]
Reward: 9  Episode Reward:  7
xxxxx
x.g.x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8290.63257881 1160.19572766]
------
Step:14, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        8290.63257881 1160.19572766]
New Q values:  [  16.82637525 -180.6        3788.63484585 1160.19572766]
Reward: 9  Episode Reward:  16
xxxxx
xg..x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  786.42821134 -8656.02923281 -7525.7277781   1556.60604775]
------
Step:15, Action:West
State  288
Old Q Values:  [  786.42821134 -8656.02923281 -7525.7277781   1556.60604775]
New Q values:  [  786.42821134 -8656.02923281 -7525.7277781   1034.59427415]
Reward: -1  Episode Reward:  15
xxxxx
x...x
xg .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1375.17285018 -3421.3419391 ]
------
Step:16, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        3788.63484585 1160.19572766]
New Q values:  [  16.82637525 -180.6        1825.23222059 1160.19572766]
Reward: -1  Episode Reward:  14
xxxxx
xg..x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  786.42821134 -8656.02923281 -7525.7277781   1034.59427415]
------
Step:17, Action:West
State  288
Old Q Values:  [  786.42821134 -8656.02923281 -7525.7277781   1034.59427415]
New Q values:  [  786.42821134 -8656.02923281 -7525.7277781    825.78956471]
Reward: -1  Episode Reward:  13
xxxxx
x...x
xg .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1375.17285018 -3421.3419391 ]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1375.17285018 -3421.3419391 ]
New Q values:  [-2527.46239811 -6212.61234477   797.20600949 -3421.3419391 ]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x  .x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  786.42821134 -8656.02923281 -7525.7277781    825.78956471]
------
Step:19, Action:West
State  288
Old Q Values:  [  786.42821134 -8656.02923281 -7525.7277781    825.78956471]
New Q values:  [  786.42821134 -8656.02923281 -7525.7277781  -5431.12237127]
Reward: -10001  Episode Reward:  -9989
xxxxx
x...x
x  .x
x g x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    69.2604674  -3255.52590138  -180.6       ]
------
Step:1, Action:South
State  183
Old Q Values:  [  22.25138791  255.77581227 5046.168126      0.        ]
New Q values:  [  22.25138791  670.80980134 5046.168126      0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  55.49477311 -289.59534477 1876.99825478 -251.53897752]
------
Step:2, Action:East
State  261
Old Q Values:  [  55.49477311 -289.59534477 1876.99825478 -251.53897752]
New Q values:  [  55.49477311 -289.59534477 3281.71373011 -251.53897752]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x .gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 8418.38142732]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   797.20600949 -3421.3419391 ]
New Q values:  [-2527.46239811 -6212.61234477   797.20600949  -384.62265661]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  55.49477311 -289.59534477 3281.71373011 -251.53897752]
------
Step:4, Action:East
State  261
Old Q Values:  [  55.49477311 -289.59534477 3281.71373011 -251.53897752]
New Q values:  [  55.49477311 -289.59534477 1551.24729489 -251.53897752]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x . x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   797.20600949  -384.62265661]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   797.20600949  -384.62265661]
New Q values:  [-2527.46239811 -6212.61234477   560.2108672   -384.62265661]
Reward: 9  Episode Reward:  25
xxxxx
xg..x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  786.42821134 -8656.02923281 -7525.7277781  -5431.12237127]
------
Step:6, Action:North
State  288
Old Q Values:  [  786.42821134 -8656.02923281 -7525.7277781  -5431.12237127]
New Q values:  [  611.43282562 -8656.02923281 -7525.7277781  -5431.12237127]
Reward: -1  Episode Reward:  24
xxxxx
x.g.x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  991.53847029   609.66260264 -2651.70614553   803.31074191]
------
Step:7, Action:North
State  208
Old Q Values:  [  991.53847029   609.66260264 -2651.70614553   803.31074191]
New Q values:  [ 6708.15983285   609.66260264 -2651.70614553   803.31074191]
Reward: 9  Episode Reward:  33
xxxxx
xg.ax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 6067.82062533  1504.73148864 -8652.84       21020.48148244]
------
Step:8, Action:West
State  130
Old Q Values:  [18220.41077038  3512.58304545  -180.00807518 52229.93681651]
New Q values:  [18220.41077038  3512.58304545  -180.00807518 53833.48717724]
Reward: 9  Episode Reward:  42
xxxxx
x.a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  1.09787042e+05]
------
Step:9, Action:West
State  114
Old Q Values:  [-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  1.09787042e+05]
New Q values:  [-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  5.62061845e+04]
Reward: 9  Episode Reward:  51
xxxxx
xa  x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SE
[    0.         40953.22618548 29647.29381671     0.        ]
------
Step:10, Action:South
State  111
Old Q Values:  [-177.44732869  645.55489711  862.42400667 -272.09726687]
New Q values:  [-177.44732869 1771.47239665  862.42400667 -272.09726687]
Reward: -1  Episode Reward:  50
xxxxx
x   x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  670.80980134 5046.168126      0.        ]
------
Step:11, Action:East
State  178
Old Q Values:  [    0.             0.         61150.17890313     0.        ]
New Q values:  [    0.             0.         87863.23867435     0.        ]
Reward: 100009  Episode Reward:  100059
xxxxx
x   x
x a x
xg  x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6708.15983285   609.66260264 -2651.70614553   803.31074191]
------
Step:1, Action:North
State  216
Old Q Values:  [  872.26796306  1478.70273395 -6170.35693855 -2387.54492731]
New Q values:  [  607.51735734  1478.70273395 -6170.35693855 -2387.54492731]
Reward: 9  Episode Reward:  9
xxxxx
xg ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   844.03390705 -6245.61866138   383.90892328]
------
Step:2, Action:South
State  136
Old Q Values:  [ -724.71310357   844.03390705 -6245.61866138   383.90892328]
New Q values:  [ -724.71310357  2349.46151267 -6245.61866138   383.90892328]
Reward: -1  Episode Reward:  8
xxxxx
x.g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6708.15983285   609.66260264 -2651.70614553   803.31074191]
------
Step:3, Action:North
State  216
Old Q Values:  [  607.51735734  1478.70273395 -6170.35693855 -2387.54492731]
New Q values:  [  947.24539674  1478.70273395 -6170.35693855 -2387.54492731]
Reward: -1  Episode Reward:  7
xxxxx
xg ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  2349.46151267 -6245.61866138   383.90892328]
------
Step:4, Action:South
State  138
Old Q Values:  [-180.6         869.89800971 -180.6         442.6704092 ]
New Q values:  [-180.6         790.97002407 -180.6         442.6704092 ]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  947.24539674  1478.70273395 -6170.35693855 -2387.54492731]
------
Step:5, Action:South
State  210
Old Q Values:  [ 685.34178314 -805.21376682 -180.6           3.52184257]
New Q values:  [ 685.34178314 -133.25565904 -180.6           3.52184257]
Reward: 9  Episode Reward:  15
xxxxx
x.  x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  611.43282562 -8656.02923281 -7525.7277781  -5431.12237127]
------
Step:6, Action:North
State  288
Old Q Values:  [  611.43282562 -8656.02923281 -7525.7277781  -5431.12237127]
New Q values:  [ 2256.4210801  -8656.02923281 -7525.7277781  -5431.12237127]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6708.15983285   609.66260264 -2651.70614553   803.31074191]
------
Step:7, Action:North
State  210
Old Q Values:  [ 685.34178314 -133.25565904 -180.6           3.52184257]
New Q values:  [ 510.82772048 -133.25565904 -180.6           3.52184257]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         790.97002407 -180.6         442.6704092 ]
------
Step:8, Action:South
State  138
Old Q Values:  [-180.6         790.97002407 -180.6         442.6704092 ]
New Q values:  [-180.6        2328.23595948 -180.6         442.6704092 ]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6708.15983285   609.66260264 -2651.70614553   803.31074191]
------
Step:9, Action:North
State  208
Old Q Values:  [ 6708.15983285   609.66260264 -2651.70614553   803.31074191]
New Q values:  [ 3381.13472098   609.66260264 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2328.23595948 -180.6         442.6704092 ]
------
Step:10, Action:South
State  138
Old Q Values:  [-180.6        2328.23595948 -180.6         442.6704092 ]
New Q values:  [-180.6        1083.94269994 -180.6         442.6704092 ]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 510.82772048 -133.25565904 -180.6           3.52184257]
------
Step:11, Action:North
State  208
Old Q Values:  [ 3381.13472098   609.66260264 -2651.70614553   803.31074191]
New Q values:  [ 1677.03669837   609.66260264 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  9
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1083.94269994 -180.6         442.6704092 ]
------
Step:12, Action:South
State  136
Old Q Values:  [ -724.71310357  2349.46151267 -6245.61866138   383.90892328]
New Q values:  [ -724.71310357  1442.29561458 -6245.61866138   383.90892328]
Reward: -1  Episode Reward:  8
xxxxx
x.g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1677.03669837   609.66260264 -2651.70614553   803.31074191]
------
Step:13, Action:North
State  208
Old Q Values:  [ 1677.03669837   609.66260264 -2651.70614553   803.31074191]
New Q values:  [  995.39748933   609.66260264 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1083.94269994 -180.6         442.6704092 ]
------
Step:14, Action:South
State  138
Old Q Values:  [-180.6        1083.94269994 -180.6         442.6704092 ]
New Q values:  [-180.6         586.22539612 -180.6         442.6704092 ]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 510.82772048 -133.25565904 -180.6           3.52184257]
------
Step:15, Action:North
State  210
Old Q Values:  [ 510.82772048 -133.25565904 -180.6           3.52184257]
New Q values:  [ 379.59870703 -133.25565904 -180.6           3.52184257]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         586.22539612 -180.6         442.6704092 ]
------
Step:16, Action:South
State  138
Old Q Values:  [-180.6         586.22539612 -180.6         442.6704092 ]
New Q values:  [-180.6         347.76977055 -180.6         442.6704092 ]
Reward: -1  Episode Reward:  4
xxxxx
x.  x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 379.59870703 -133.25565904 -180.6           3.52184257]
------
Step:17, Action:North
State  208
Old Q Values:  [  995.39748933   609.66260264 -2651.70614553   803.31074191]
New Q values:  [  530.36011849   609.66260264 -2651.70614553   803.31074191]
Reward: -1  Episode Reward:  3
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         347.76977055 -180.6         442.6704092 ]
------
Step:18, Action:West
State  138
Old Q Values:  [-180.6         347.76977055 -180.6         442.6704092 ]
New Q values:  [-180.6         347.76977055 -180.6         864.08843226]
Reward: -1  Episode Reward:  2
xxxxx
x.a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254  2292.06756194   265.75897468]
------
Step:19, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  2292.06756194   265.75897468]
New Q values:  [ -281.736      -8877.87327254  1175.45355446   265.75897468]
Reward: -1  Episode Reward:  1
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         347.76977055 -180.6         864.08843226]
------
Step:20, Action:West
State  138
Old Q Values:  [-180.6         347.76977055 -180.6         864.08843226]
New Q values:  [-180.6         347.76977055 -180.6         688.98016622]
Reward: -1  Episode Reward:  0
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1146.48264438    24.38756513]
------
Step:21, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1146.48264438    24.38756513]
New Q values:  [ -253.44886264 -1902.20915811   664.68710762    24.38756513]
Reward: -1  Episode Reward:  -1
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         347.76977055 -180.6         688.98016622]
------
Step:22, Action:West
State  138
Old Q Values:  [-180.6         347.76977055 -180.6         688.98016622]
New Q values:  [-180.6         347.76977055 -180.6         474.39819877]
Reward: -1  Episode Reward:  -2
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   664.68710762    24.38756513]
------
Step:23, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   664.68710762    24.38756513]
New Q values:  [ -253.44886264 -1902.20915811   407.59430268    24.38756513]
Reward: -1  Episode Reward:  -3
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         347.76977055 -180.6         474.39819877]
------
Step:24, Action:West
State  138
Old Q Values:  [-180.6         347.76977055 -180.6         474.39819877]
New Q values:  [-180.6         347.76977055 -180.6         541.79534585]
Reward: -1  Episode Reward:  -4
xxxxx
x.a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254  1175.45355446   265.75897468]
------
Step:25, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  1175.45355446   265.75897468]
New Q values:  [ -281.736      -8877.87327254   632.12002554   265.75897468]
Reward: -1  Episode Reward:  -5
xxxxx
x. ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         347.76977055 -180.6         541.79534585]
------
Step:26, Action:West
State  138
Old Q Values:  [-180.6         347.76977055 -180.6         541.79534585]
New Q values:  [-180.6         347.76977055 -180.6         405.754146  ]
Reward: -1  Episode Reward:  -6
xxxxx
x.a x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   632.12002554   265.75897468]
------
Step:27, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686        1654.90312133     49.8565746 ]
New Q values:  [-10156.11771313  -5995.686        1094.04993291     49.8565746 ]
Reward: -1  Episode Reward:  -7
xxxxx
xg ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1442.29561458 -6245.61866138   383.90892328]
------
Step:28, Action:South
State  138
Old Q Values:  [-180.6         347.76977055 -180.6         405.754146  ]
New Q values:  [-180.6        379.5011308 -180.6        405.754146 ]
Reward: -1  Episode Reward:  -8
xxxxx
x.  x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  530.36011849   609.66260264 -2651.70614553   803.31074191]
------
Step:29, Action:West
State  208
Old Q Values:  [  530.36011849   609.66260264 -2651.70614553   803.31074191]
New Q values:  [  530.36011849   609.66260264 -2651.70614553 -3385.12952694]
Reward: -9991  Episode Reward:  -9999
xxxxx
x.  x
x.g x
x.. x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1442.29561458 -6245.61866138   383.90892328]
------
Step:1, Action:South
State  136
Old Q Values:  [ -724.71310357  1442.29561458 -6245.61866138   383.90892328]
New Q values:  [ -724.71310357  1025.92906602 -6245.61866138   383.90892328]
Reward: 9  Episode Reward:  9
xxxxx
x g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  947.24539674  1478.70273395 -6170.35693855 -2387.54492731]
------
Step:2, Action:South
State  208
Old Q Values:  [  530.36011849   609.66260264 -2651.70614553 -3385.12952694]
New Q values:  [  530.36011849   926.19136509 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2256.4210801  -8656.02923281 -7525.7277781  -5431.12237127]
------
Step:3, Action:North
State  288
Old Q Values:  [ 2256.4210801  -8656.02923281 -7525.7277781  -5431.12237127]
New Q values:  [-4820.17415843 -8656.02923281 -7525.7277781  -5431.12237127]
Reward: -10001  Episode Reward:  -9983
xxxxx
x . x
x..gx
x.. x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    69.2604674  -3255.52590138  -180.6       ]
------
Step:1, Action:South
State  181
Old Q Values:  [   13.85659648    69.2604674  -3255.52590138  -180.6       ]
New Q values:  [   13.85659648   498.47837543 -3255.52590138  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  55.49477311 -289.59534477 1551.24729489 -251.53897752]
------
Step:2, Action:East
State  261
Old Q Values:  [  55.49477311 -289.59534477 1551.24729489 -251.53897752]
New Q values:  [  55.49477311 -289.59534477  793.96217811 -251.53897752]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   560.2108672   -384.62265661]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   560.2108672   -384.62265661]
New Q values:  [-2527.46239811 -6212.61234477 -1216.56790065  -384.62265661]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x ..x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4820.17415843 -8656.02923281 -7525.7277781  -5431.12237127]
------
Step:4, Action:North
State  288
Old Q Values:  [-4820.17415843 -8656.02923281 -7525.7277781  -5431.12237127]
New Q values:  [-1808.79005127 -8656.02923281 -7525.7277781  -5431.12237127]
Reward: 9  Episode Reward:  36
xxxxx
x.. x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 379.59870703 -133.25565904 -180.6           3.52184257]
------
Step:5, Action:North
State  208
Old Q Values:  [  530.36011849   926.19136509 -2651.70614553 -3385.12952694]
New Q values:  [16361.59020057   926.19136509 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  35
xxxxx
x..ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  3512.58304545  -180.00807518 53833.48717724]
------
Step:6, Action:West
State  130
Old Q Values:  [18220.41077038  3512.58304545  -180.00807518 53833.48717724]
New Q values:  [18220.41077038  3512.58304545  -180.00807518 51186.92868761]
Reward: 9  Episode Reward:  44
xxxxx
x.a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  9.88271127e+04]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   632.12002554   265.75897468]
New Q values:  [ -281.736      -8877.87327254   632.12002554   643.14530887]
Reward: 9  Episode Reward:  53
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1771.47239665  862.42400667 -272.09726687]
------
Step:8, Action:South
State  99
Old Q Values:  [    0.         40953.22618548 29647.29381671     0.        ]
New Q values:  [    0.         17457.90527669 29647.29381671     0.        ]
Reward: -1  Episode Reward:  52
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         3318.40285054 3590.71600832    0.        ]
------
Step:9, Action:South
State  183
Old Q Values:  [  22.25138791  670.80980134 5046.168126      0.        ]
New Q values:  [  22.25138791  505.91257397 5046.168126      0.        ]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  55.49477311 -289.59534477  793.96217811 -251.53897752]
------
Step:10, Action:North
State  261
Old Q Values:  [  55.49477311 -289.59534477  793.96217811 -251.53897752]
New Q values:  [1535.44834704 -289.59534477  793.96217811 -251.53897752]
Reward: -1  Episode Reward:  50
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  505.91257397 5046.168126      0.        ]
------
Step:11, Action:East
State  179
Old Q Values:  [    0.         34025.66630253 15437.79452068     0.        ]
New Q values:  [    0.         34025.66630253 69578.28492137     0.        ]
Reward: 100009  Episode Reward:  100059
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 -1216.56790065  -384.62265661]
------
Step:1, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        1825.23222059 1160.19572766]
New Q values:  [  16.82637525 -180.6        1825.23222059  930.11279518]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1535.44834704 -289.59534477  793.96217811 -251.53897752]
------
Step:2, Action:North
State  261
Old Q Values:  [1535.44834704 -289.59534477  793.96217811 -251.53897752]
New Q values:  [ 769.12285145 -289.59534477  793.96217811 -251.53897752]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   498.47837543 -3255.52590138  -180.6       ]
------
Step:3, Action:South
State  180
Old Q Values:  [-1367.02476015  1647.15138106  5712.49284801     0.        ]
New Q values:  [-1367.02476015  1859.47743538  5712.49284801     0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4187.30744448 -6457.4598      4004.05627652 -2702.17995449]
------
Step:4, Action:East
State  261
Old Q Values:  [ 769.12285145 -289.59534477  793.96217811 -251.53897752]
New Q values:  [ 769.12285145 -289.59534477  864.55453742 -251.53897752]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1825.23222059  930.11279518]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 -1216.56790065  -384.62265661]
New Q values:  [-2527.46239811 -6212.61234477 -1023.86417564  -384.62265661]
Reward: 9  Episode Reward:  25
xxxxx
x ..x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1808.79005127 -8656.02923281 -7525.7277781  -5431.12237127]
------
Step:6, Action:North
State  288
Old Q Values:  [-1808.79005127 -8656.02923281 -7525.7277781  -5431.12237127]
New Q values:  [ -604.2364084  -8656.02923281 -7525.7277781  -5431.12237127]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 379.59870703 -133.25565904 -180.6           3.52184257]
------
Step:7, Action:North
State  210
Old Q Values:  [ 379.59870703 -133.25565904 -180.6           3.52184257]
New Q values:  [ 1.55133181e+04 -1.33255659e+02 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  43
xxxxx
x .ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  3512.58304545  -180.00807518 51186.92868761]
------
Step:8, Action:West
State  130
Old Q Values:  [18220.41077038  3512.58304545  -180.00807518 51186.92868761]
New Q values:  [18220.41077038  3512.58304545  -180.00807518 37342.02681199]
Reward: 9  Episode Reward:  52
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  5.62061845e+04]
------
Step:9, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686        1094.04993291     49.8565746 ]
New Q values:  [-10156.11771313  -5995.686        1094.04993291    180.241376  ]
Reward: -1  Episode Reward:  51
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   536.32915387 -8213.80649336  -180.6       ]
------
Step:10, Action:South
State  99
Old Q Values:  [    0.         17457.90527669 29647.29381671     0.        ]
New Q values:  [    0.          8059.77691317 29647.29381671     0.        ]
Reward: -1  Episode Reward:  50
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         3318.40285054 3590.71600832    0.        ]
------
Step:11, Action:South
State  183
Old Q Values:  [  22.25138791  505.91257397 5046.168126      0.        ]
New Q values:  [  22.25138791  461.13139081 5046.168126      0.        ]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 769.12285145 -289.59534477  864.55453742 -251.53897752]
------
Step:12, Action:North
State  261
Old Q Values:  [ 769.12285145 -289.59534477  864.55453742 -251.53897752]
New Q values:  [1820.89957838 -289.59534477  864.55453742 -251.53897752]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  461.13139081 5046.168126      0.        ]
------
Step:13, Action:East
State  177
Old Q Values:  [   0.         3318.40285054 3590.71600832    0.        ]
New Q values:  [    0.          3318.40285054 62758.78014297     0.        ]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -604.2364084  -8656.02923281 -7525.7277781  -5431.12237127]
------
Step:1, Action:North
State  288
Old Q Values:  [ -604.2364084  -8656.02923281 -7525.7277781  -5431.12237127]
New Q values:  [ 4672.18249681 -8656.02923281 -7525.7277781  -5431.12237127]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16361.59020057   926.19136509 -2651.70614553 -3385.12952694]
------
Step:2, Action:North
State  208
Old Q Values:  [16361.59020057   926.19136509 -2651.70614553 -3385.12952694]
New Q values:  [ 6671.76232403   926.19136509 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        379.5011308 -180.6        405.754146 ]
------
Step:3, Action:West
State  136
Old Q Values:  [ -724.71310357  1025.92906602 -6245.61866138   383.90892328]
New Q values:  [ -724.71310357  1025.92906602 -6245.61866138   487.17854918]
Reward: 9  Episode Reward:  27
xxxxx
xga x
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686        1094.04993291    180.241376  ]
------
Step:4, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686        1094.04993291    180.241376  ]
New Q values:  [-10156.11771313  -5995.686         744.79869297    180.241376  ]
Reward: -1  Episode Reward:  26
xxxxx
x.gax
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1025.92906602 -6245.61866138   487.17854918]
------
Step:5, Action:South
State  136
Old Q Values:  [ -724.71310357  1025.92906602 -6245.61866138   487.17854918]
New Q values:  [ -724.71310357   853.38244659 -6245.61866138   487.17854918]
Reward: -1  Episode Reward:  25
xxxxx
xg  x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  947.24539674  1478.70273395 -6170.35693855 -2387.54492731]
------
Step:6, Action:South
State  208
Old Q Values:  [ 6671.76232403   926.19136509 -2651.70614553 -3385.12952694]
New Q values:  [ 6671.76232403  1771.53129508 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  24
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4672.18249681 -8656.02923281 -7525.7277781  -5431.12237127]
------
Step:7, Action:North
State  288
Old Q Values:  [ 4672.18249681 -8656.02923281 -7525.7277781  -5431.12237127]
New Q values:  [ 2311.88381891 -8656.02923281 -7525.7277781  -5431.12237127]
Reward: -1  Episode Reward:  23
xxxxx
xg  x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  947.24539674  1478.70273395 -6170.35693855 -2387.54492731]
------
Step:8, Action:South
State  208
Old Q Values:  [ 6671.76232403  1771.53129508 -2651.70614553 -3385.12952694]
New Q values:  [ 6671.76232403  1401.5776637  -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2311.88381891 -8656.02923281 -7525.7277781  -5431.12237127]
------
Step:9, Action:North
State  288
Old Q Values:  [ 2311.88381891 -8656.02923281 -7525.7277781  -5431.12237127]
New Q values:  [ 1367.76434775 -8656.02923281 -7525.7277781  -5431.12237127]
Reward: -1  Episode Reward:  21
xxxxx
xg  x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  947.24539674  1478.70273395 -6170.35693855 -2387.54492731]
------
Step:10, Action:South
State  208
Old Q Values:  [ 6671.76232403  1401.5776637  -2651.70614553 -3385.12952694]
New Q values:  [ 6671.76232403   970.36036981 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  20
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1367.76434775 -8656.02923281 -7525.7277781  -5431.12237127]
------
Step:11, Action:North
State  288
Old Q Values:  [ 1367.76434775 -8656.02923281 -7525.7277781  -5431.12237127]
New Q values:  [ 2548.03443631 -8656.02923281 -7525.7277781  -5431.12237127]
Reward: -1  Episode Reward:  19
xxxxx
x. gx
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6671.76232403   970.36036981 -2651.70614553 -3385.12952694]
------
Step:12, Action:South
State  208
Old Q Values:  [ 6671.76232403   970.36036981 -2651.70614553 -3385.12952694]
New Q values:  [ 6671.76232403  1151.95447881 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  18
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2548.03443631 -8656.02923281 -7525.7277781  -5431.12237127]
------
Step:13, Action:North
State  288
Old Q Values:  [ 2548.03443631 -8656.02923281 -7525.7277781  -5431.12237127]
New Q values:  [ 1462.22459471 -8656.02923281 -7525.7277781  -5431.12237127]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  947.24539674  1478.70273395 -6170.35693855 -2387.54492731]
------
Step:14, Action:South
State  208
Old Q Values:  [ 6671.76232403  1151.95447881 -2651.70614553 -3385.12952694]
New Q values:  [ 6671.76232403   898.84916994 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x .gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1462.22459471 -8656.02923281 -7525.7277781  -5431.12237127]
------
Step:15, Action:West
State  288
Old Q Values:  [ 1462.22459471 -8656.02923281 -7525.7277781  -5431.12237127]
New Q values:  [ 1462.22459471 -8656.02923281 -7525.7277781    358.46547969]
Reward: 9  Episode Reward:  25
xxxxx
x. gx
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 8418.38142732]
------
Step:16, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414 8418.38142732]
New Q values:  [  37.74111519 -168.92307549 -452.95359414 4528.59630793]
Reward: 9  Episode Reward:  34
xxxxx
x.  x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[3017.03414967 -180.6        3852.81245667 2937.02145253]
------
Step:17, Action:East
State  257
Old Q Values:  [3017.03414967 -180.6        3852.81245667 2937.02145253]
New Q values:  [3017.03414967 -180.6        2899.10387504 2937.02145253]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 4528.59630793]
------
Step:18, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 -1023.86417564  -384.62265661]
New Q values:  [-2527.46239811 -6212.61234477 -1023.86417564   750.66118226]
Reward: -1  Episode Reward:  32
xxxxx
x.  x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[3017.03414967 -180.6        2899.10387504 2937.02145253]
------
Step:19, Action:North
State  257
Old Q Values:  [3017.03414967 -180.6        2899.10387504 2937.02145253]
New Q values:  [20033.84770276  -180.6         2899.10387504  2937.02145253]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.          3318.40285054 62758.78014297     0.        ]
------
Step:20, Action:South
State  176
Old Q Values:  [    0.           501.88489382 11434.23928209     0.        ]
New Q values:  [    0.           765.3571716  11434.23928209     0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[-153.0210699     0.         1884.01071358  644.94785455]
------
Step:21, Action:East
State  260
Old Q Values:  [-4187.30744448 -6457.4598      4004.05627652 -2702.17995449]
New Q values:  [-4187.30744448 -6457.4598      1826.22086529 -2702.17995449]
Reward: -1  Episode Reward:  29
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 -1023.86417564   750.66118226]
------
Step:22, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 -1023.86417564   750.66118226]
New Q values:  [-2527.46239811 -6212.61234477 -1023.86417564   864.86768698]
Reward: -1  Episode Reward:  28
xxxxx
x.  x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[-153.0210699     0.         1884.01071358  644.94785455]
------
Step:23, Action:East
State  260
Old Q Values:  [-4187.30744448 -6457.4598      1826.22086529 -2702.17995449]
New Q values:  [-4187.30744448 -6457.4598       989.34865221 -2702.17995449]
Reward: -1  Episode Reward:  27
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 -1023.86417564   864.86768698]
------
Step:24, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 -1023.86417564   864.86768698]
New Q values:  [-2527.46239811 -6212.61234477 -1023.86417564  6355.50138562]
Reward: -1  Episode Reward:  26
xxxxx
x.g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[20033.84770276  -180.6         2899.10387504  2937.02145253]
------
Step:25, Action:North
State  257
Old Q Values:  [20033.84770276  -180.6         2899.10387504  2937.02145253]
New Q values:  [26840.57312399  -180.6         2899.10387504  2937.02145253]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.          3318.40285054 62758.78014297     0.        ]
------
Step:26, Action:South
State  179
Old Q Values:  [    0.         34025.66630253 69578.28492137     0.        ]
New Q values:  [    0.         21661.83845821 69578.28492137     0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[26840.57312399  -180.6         2899.10387504  2937.02145253]
------
Step:27, Action:North
State  257
Old Q Values:  [26840.57312399  -180.6         2899.10387504  2937.02145253]
New Q values:  [29563.26329249  -180.6         2899.10387504  2937.02145253]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.          3318.40285054 62758.78014297     0.        ]
------
Step:28, Action:South
State  177
Old Q Values:  [    0.          3318.40285054 62758.78014297     0.        ]
New Q values:  [    0.         10195.74012796 62758.78014297     0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x.g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[29563.26329249  -180.6         2899.10387504  2937.02145253]
------
Step:29, Action:North
State  257
Old Q Values:  [29563.26329249  -180.6         2899.10387504  2937.02145253]
New Q values:  [30652.33935989  -180.6         2899.10387504  2937.02145253]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         10195.74012796 62758.78014297     0.        ]
------
Step:30, Action:South
State  179
Old Q Values:  [    0.         21661.83845821 69578.28492137     0.        ]
New Q values:  [    0.         17859.83719125 69578.28492137     0.        ]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[30652.33935989  -180.6         2899.10387504  2937.02145253]
------
Step:31, Action:North
State  257
Old Q Values:  [30652.33935989  -180.6         2899.10387504  2937.02145253]
New Q values:  [31087.96978685  -180.6         2899.10387504  2937.02145253]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         10195.74012796 62758.78014297     0.        ]
------
Step:32, Action:South
State  176
Old Q Values:  [    0.           765.3571716  11434.23928209     0.        ]
New Q values:  [    0.           870.74608271 11434.23928209     0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x.  x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[-153.0210699     0.         1884.01071358  644.94785455]
------
Step:33, Action:East
State  256
Old Q Values:  [-153.0210699     0.         1884.01071358  644.94785455]
New Q values:  [-153.0210699     0.         2659.65470112  644.94785455]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 -1023.86417564  6355.50138562]
------
Step:34, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 -1023.86417564  6355.50138562]
New Q values:  [-2527.46239811 -6212.61234477    28.52170816  6355.50138562]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1462.22459471 -8656.02923281 -7525.7277781    358.46547969]
------
Step:35, Action:North
State  288
Old Q Values:  [ 1462.22459471 -8656.02923281 -7525.7277781    358.46547969]
New Q values:  [ 5238.28526461 -8656.02923281 -7525.7277781    358.46547969]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.55133181e+04 -1.33255659e+02 -1.80600000e+02  3.52184257e+00]
------
Step:36, Action:North
State  208
Old Q Values:  [ 6671.76232403   898.84916994 -2651.70614553 -3385.12952694]
New Q values:  [13870.71297321   898.84916994 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  3512.58304545  -180.00807518 37342.02681199]
------
Step:37, Action:West
State  130
Old Q Values:  [18220.41077038  3512.58304545  -180.00807518 37342.02681199]
New Q values:  [18220.41077038  3512.58304545  -180.00807518 31798.06606174]
Reward: -1  Episode Reward:  13
xxxxx
x.a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  5.62061845e+04]
------
Step:38, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   632.12002554   643.14530887]
New Q values:  [ -281.736      -8877.87327254   632.12002554   794.09984254]
Reward: 9  Episode Reward:  22
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1771.47239665  862.42400667 -272.09726687]
------
Step:39, Action:South
State  111
Old Q Values:  [-177.44732869 1771.47239665  862.42400667 -272.09726687]
New Q values:  [-177.44732869 2221.83939646  862.42400667 -272.09726687]
Reward: -1  Episode Reward:  21
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  461.13139081 5046.168126      0.        ]
------
Step:40, Action:East
State  179
Old Q Values:  [    0.         17859.83719125 69578.28492137     0.        ]
New Q values:  [    0.         17859.83719125 95888.52390352     0.        ]
Reward: 100009  Episode Reward:  100030
xxxxx
x   x
x a x
x  gx
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.69358809e+03 7.62715392e+03 0.00000000e+00]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.69358809e+03 7.62715392e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 3.69358809e+03 7.21747546e+03 0.00000000e+00]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13870.71297321   898.84916994 -2651.70614553 -3385.12952694]
------
Step:2, Action:North
State  216
Old Q Values:  [  947.24539674  1478.70273395 -6170.35693855 -2387.54492731]
New Q values:  [  506.02440249  1478.70273395 -6170.35693855 -2387.54492731]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        379.5011308 -180.6        405.754146 ]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6        379.5011308 -180.6        405.754146 ]
New Q values:  [-180.6        379.5011308 -180.6        289.9799492]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   407.59430268    24.38756513]
------
Step:4, Action:East
State  114
Old Q Values:  [-1.80600000e+02 -8.28092007e+03 -9.91433515e-01  5.62061845e+04]
New Q values:  [ -180.6        -8280.92007422   112.85376583 56206.18445649]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        379.5011308 -180.6        289.9799492]
------
Step:5, Action:South
State  138
Old Q Values:  [-180.6        379.5011308 -180.6        289.9799492]
New Q values:  [-180.6        594.8112725 -180.6        289.9799492]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  506.02440249  1478.70273395 -6170.35693855 -2387.54492731]
------
Step:6, Action:South
State  216
Old Q Values:  [  506.02440249  1478.70273395 -6170.35693855 -2387.54492731]
New Q values:  [  506.02440249  2168.36667296 -6170.35693855 -2387.54492731]
Reward: 9  Episode Reward:  34
xxxxx
x.  x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5238.28526461 -8656.02923281 -7525.7277781    358.46547969]
------
Step:7, Action:West
State  288
Old Q Values:  [ 5238.28526461 -8656.02923281 -7525.7277781    358.46547969]
New Q values:  [ 5238.28526461 -8656.02923281 -7525.7277781   1507.36508425]
Reward: 9  Episode Reward:  43
xxxxx
x.  x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 4528.59630793]
------
Step:8, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414 4528.59630793]
New Q values:  [   37.74111519  -168.92307549  -452.95359414 11143.22945922]
Reward: 9  Episode Reward:  52
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[31087.96978685  -180.6         2899.10387504  2937.02145253]
------
Step:9, Action:North
State  257
Old Q Values:  [31087.96978685  -180.6         2899.10387504  2937.02145253]
New Q values:  [41201.14508579  -180.6         2899.10387504  2937.02145253]
Reward: -1  Episode Reward:  51
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         17859.83719125 95888.52390352     0.        ]
------
Step:10, Action:East
State  179
Old Q Values:  [    0.         17859.83719125 95888.52390352     0.        ]
New Q values:  [    0.         17859.83719125 41752.57667451     0.        ]
Reward: -1  Episode Reward:  50
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  2.92601219e+03  1.13258904e+04]
------
Step:11, Action:West
State  192
Old Q Values:  [3.89777037e-01 3.69358809e+03 7.21747546e+03 0.00000000e+00]
New Q values:  [3.89777037e-01 3.69358809e+03 7.21747546e+03 1.88270340e+04]
Reward: -1  Episode Reward:  49
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         10195.74012796 62758.78014297     0.        ]
------
Step:12, Action:South
State  177
Old Q Values:  [    0.         10195.74012796 62758.78014297     0.        ]
New Q values:  [    0.         16438.03957692 62758.78014297     0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41201.14508579  -180.6         2899.10387504  2937.02145253]
------
Step:13, Action:North
State  256
Old Q Values:  [-153.0210699     0.         2659.65470112  644.94785455]
New Q values:  [3368.46335666    0.         2659.65470112  644.94785455]
Reward: -1  Episode Reward:  47
xxxxx
xg  x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NW
[    0.           870.74608271 11434.23928209     0.        ]
------
Step:14, Action:East
State  177
Old Q Values:  [    0.         16438.03957692 62758.78014297     0.        ]
New Q values:  [    0.         16438.03957692 30751.02227006     0.        ]
Reward: -1  Episode Reward:  46
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.69358809e+03 7.21747546e+03 1.88270340e+04]
------
Step:15, Action:West
State  193
Old Q Values:  [-5922.26708831  4390.31246547 -3909.58186816  2096.80577447]
New Q values:  [-5922.26708831  4390.31246547 -3909.58186816 10063.42899081]
Reward: -1  Episode Reward:  45
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692 30751.02227006     0.        ]
------
Step:16, Action:East
State  177
Old Q Values:  [    0.         16438.03957692 30751.02227006     0.        ]
New Q values:  [    0.         16438.03957692 17947.91912089     0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.69358809e+03 7.21747546e+03 1.88270340e+04]
------
Step:17, Action:West
State  192
Old Q Values:  [3.89777037e-01 3.69358809e+03 7.21747546e+03 1.88270340e+04]
New Q values:  [3.89777037e-01 3.69358809e+03 7.21747546e+03 1.29145894e+04]
Reward: -1  Episode Reward:  43
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692 17947.91912089     0.        ]
------
Step:18, Action:South
State  176
Old Q Values:  [    0.           870.74608271 11434.23928209     0.        ]
New Q values:  [    0.          1358.23744008 11434.23928209     0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[3368.46335666    0.         2659.65470112  644.94785455]
------
Step:19, Action:East
State  256
Old Q Values:  [3368.46335666    0.         2659.65470112  644.94785455]
New Q values:  [3368.46335666    0.         2969.91229613  644.94785455]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477    28.52170816  6355.50138562]
------
Step:20, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477    28.52170816  6355.50138562]
New Q values:  [-2527.46239811 -6212.61234477    28.52170816  3552.13956125]
Reward: -1  Episode Reward:  40
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[3368.46335666    0.         2969.91229613  644.94785455]
------
Step:21, Action:East
State  256
Old Q Values:  [3368.46335666    0.         2969.91229613  644.94785455]
New Q values:  [3368.46335666    0.         2253.00678683  644.94785455]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477    28.52170816  3552.13956125]
------
Step:22, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477    28.52170816  3552.13956125]
New Q values:  [-2527.46239811 -6212.61234477    28.52170816  2430.7948315 ]
Reward: -1  Episode Reward:  38
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[3368.46335666    0.         2253.00678683  644.94785455]
------
Step:23, Action:East
State  257
Old Q Values:  [41201.14508579  -180.6         2899.10387504  2937.02145253]
New Q values:  [41201.14508579  -180.6         1888.27999947  2937.02145253]
Reward: -1  Episode Reward:  37
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477    28.52170816  2430.7948315 ]
------
Step:24, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  -452.95359414 11143.22945922]
New Q values:  [   37.74111519  -168.92307549  -452.95359414 16817.03530943]
Reward: -1  Episode Reward:  36
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41201.14508579  -180.6         1888.27999947  2937.02145253]
------
Step:25, Action:North
State  257
Old Q Values:  [41201.14508579  -180.6         1888.27999947  2937.02145253]
New Q values:  [21864.23377058  -180.6         1888.27999947  2937.02145253]
Reward: -1  Episode Reward:  35
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692 17947.91912089     0.        ]
------
Step:26, Action:East
State  177
Old Q Values:  [    0.         16438.03957692 17947.91912089     0.        ]
New Q values:  [    0.         16438.03957692 10197.5963456      0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816 10063.42899081]
------
Step:27, Action:West
State  195
Old Q Values:  [   38.85388605  3081.56358951  1388.1600337  26839.36644989]
New Q values:  [   38.85388605  3081.56358951  1388.1600337  23260.91958231]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         17859.83719125 41752.57667451     0.        ]
------
Step:28, Action:East
State  177
Old Q Values:  [    0.         16438.03957692 10197.5963456      0.        ]
New Q values:  [    0.         16438.03957692  7097.46723548     0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816 10063.42899081]
------
Step:29, Action:West
State  192
Old Q Values:  [3.89777037e-01 3.69358809e+03 7.21747546e+03 1.29145894e+04]
New Q values:  [3.89777037e-01 3.69358809e+03 7.21747546e+03 1.00966476e+04]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692  7097.46723548     0.        ]
------
Step:30, Action:South
State  176
Old Q Values:  [    0.          1358.23744008 11434.23928209     0.        ]
New Q values:  [    0.          1553.23398303 11434.23928209     0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[3368.46335666    0.         2253.00678683  644.94785455]
------
Step:31, Action:East
State  257
Old Q Values:  [21864.23377058  -180.6         1888.27999947  2937.02145253]
New Q values:  [21864.23377058  -180.6         1483.95044924  2937.02145253]
Reward: -1  Episode Reward:  29
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477    28.52170816  2430.7948315 ]
------
Step:32, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  -452.95359414 16817.03530943]
New Q values:  [   37.74111519  -168.92307549  -452.95359414 13285.48425495]
Reward: -1  Episode Reward:  28
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[21864.23377058  -180.6         1483.95044924  2937.02145253]
------
Step:33, Action:North
State  257
Old Q Values:  [21864.23377058  -180.6         1483.95044924  2937.02145253]
New Q values:  [21270.86651059  -180.6         1483.95044924  2937.02145253]
Reward: -1  Episode Reward:  27
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         17859.83719125 41752.57667451     0.        ]
------
Step:34, Action:East
State  177
Old Q Values:  [    0.         16438.03957692  7097.46723548     0.        ]
New Q values:  [    0.         16438.03957692  5857.41559143     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816 10063.42899081]
------
Step:35, Action:West
State  192
Old Q Values:  [3.89777037e-01 3.69358809e+03 7.21747546e+03 1.00966476e+04]
New Q values:  [3.89777037e-01 3.69358809e+03 7.21747546e+03 8.96947092e+03]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692  5857.41559143     0.        ]
------
Step:36, Action:South
State  176
Old Q Values:  [    0.          1553.23398303 11434.23928209     0.        ]
New Q values:  [    0.          1631.23260021 11434.23928209     0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[3368.46335666    0.         2253.00678683  644.94785455]
------
Step:37, Action:East
State  257
Old Q Values:  [21270.86651059  -180.6         1483.95044924  2937.02145253]
New Q values:  [21270.86651059  -180.6         1322.21862914  2937.02145253]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477    28.52170816  2430.7948315 ]
------
Step:38, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  -452.95359414 13285.48425495]
New Q values:  [   37.74111519  -168.92307549  -452.95359414 11694.85365515]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[21270.86651059  -180.6         1322.21862914  2937.02145253]
------
Step:39, Action:North
State  257
Old Q Values:  [21270.86651059  -180.6         1322.21862914  2937.02145253]
New Q values:  [21033.51960659  -180.6         1322.21862914  2937.02145253]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         17859.83719125 41752.57667451     0.        ]
------
Step:40, Action:East
State  179
Old Q Values:  [    0.         17859.83719125 41752.57667451     0.        ]
New Q values:  [    0.         17859.83719125 20098.1977829      0.        ]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  2.92601219e+03  1.13258904e+04]
------
Step:41, Action:West
State  195
Old Q Values:  [   38.85388605  3081.56358951  1388.1600337  23260.91958231]
New Q values:  [   38.85388605  3081.56358951  1388.1600337  15333.22716779]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         17859.83719125 20098.1977829      0.        ]
------
Step:42, Action:East
State  177
Old Q Values:  [    0.         16438.03957692  5857.41559143     0.        ]
New Q values:  [    0.         16438.03957692  5361.39493382     0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816 10063.42899081]
------
Step:43, Action:West
State  195
Old Q Values:  [   38.85388605  3081.56358951  1388.1600337  15333.22716779]
New Q values:  [   38.85388605  3081.56358951  1388.1600337  12162.15020199]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         17859.83719125 20098.1977829      0.        ]
------
Step:44, Action:East
State  179
Old Q Values:  [    0.         17859.83719125 20098.1977829      0.        ]
New Q values:  [    0.         17859.83719125 11436.44622626     0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  2.92601219e+03  1.13258904e+04]
------
Step:45, Action:West
State  192
Old Q Values:  [3.89777037e-01 3.69358809e+03 7.21747546e+03 8.96947092e+03]
New Q values:  [3.89777037e-01 3.69358809e+03 7.21747546e+03 8.51860024e+03]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692  5361.39493382     0.        ]
------
Step:46, Action:South
State  179
Old Q Values:  [    0.         17859.83719125 11436.44622626     0.        ]
New Q values:  [    0.         13453.39075848 11436.44622626     0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[21033.51960659  -180.6         1322.21862914  2937.02145253]
------
Step:47, Action:North
State  257
Old Q Values:  [21033.51960659  -180.6         1322.21862914  2937.02145253]
New Q values:  [12448.82507018  -180.6         1322.21862914  2937.02145253]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         13453.39075848 11436.44622626     0.        ]
------
Step:48, Action:South
State  179
Old Q Values:  [    0.         13453.39075848 11436.44622626     0.        ]
New Q values:  [    0.          9115.40382444 11436.44622626     0.        ]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x   x
xag x
xxxxx
Step:49, Action:South
State  257
Old Q Values:  [12448.82507018  -180.6         1322.21862914  2937.02145253]
New Q values:  [12448.82507018  3481.80752105  1322.21862914  2937.02145253]
Reward: -301  Episode Reward:  -289
xxxxx
x.  x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[12448.82507018  3481.80752105  1322.21862914  2937.02145253]
------
Step:50, Action:North
State  256
Old Q Values:  [3368.46335666    0.         2253.00678683  644.94785455]
New Q values:  [-1222.94287271     0.          2253.00678683   644.94785455]
Reward: -10001  Episode Reward:  -10290
xxxxx
x.  x
xg  x
x   x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2221.83939646  862.42400667 -272.09726687]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6          49.89209034    0.            0.        ]
New Q values:  [-180.6        1539.20727394    0.            0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa .x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  461.13139081 5046.168126      0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [   13.85659648   498.47837543 -3255.52590138  -180.6       ]
New Q values:  [   13.85659648   498.47837543 -6126.27981178  -180.6       ]
Reward: -10001  Episode Reward:  -9992
xxxxx
x ..x
x g.x
x...x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13870.71297321   898.84916994 -2651.70614553 -3385.12952694]
------
Step:1, Action:North
State  208
Old Q Values:  [13870.71297321   898.84916994 -2651.70614553 -3385.12952694]
New Q values:  [ 5732.12857103   898.84916994 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        594.8112725 -180.6        289.9799492]
------
Step:2, Action:West
State  136
Old Q Values:  [ -724.71310357   853.38244659 -6245.61866138   487.17854918]
New Q values:  [ -724.71310357   853.38244659 -6245.61866138   201.73793064]
Reward: 9  Episode Reward:  18
xxxxx
x.agx
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
------
Step:3, Action:West
State  121
Old Q Values:  [ 0.00000000e+00  0.00000000e+00 -9.95420583e+03  4.88836988e+00]
New Q values:  [    0.             0.         -9954.20583251    56.10465519]
Reward: 9  Episode Reward:  27
xxxxx
xa gx
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         162.4976908   -98.31015718    0.        ]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 2221.83939646  862.42400667 -272.09726687]
New Q values:  [-177.44732869  920.73193833  862.42400667 -272.09726687]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294    88.65393249 -4680.74267672  -244.98066897]
------
Step:5, Action:South
State  189
Old Q Values:  [    9.84673294    88.65393249 -4680.74267672  -244.98066897]
New Q values:  [    9.84673294   587.13144651 -4680.74267672  -244.98066897]
Reward: 9  Episode Reward:  45
xxxxx
x  gx
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1820.89957838 -289.59534477  864.55453742 -251.53897752]
------
Step:6, Action:North
State  261
Old Q Values:  [1820.89957838 -289.59534477  864.55453742 -251.53897752]
New Q values:  [ 903.8992653  -289.59534477  864.55453742 -251.53897752]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   587.13144651 -4680.74267672  -244.98066897]
------
Step:7, Action:South
State  191
Old Q Values:  [  3.06655861 595.32276737  46.04536991   0.        ]
New Q values:  [  3.06655861 508.69888654  46.04536991   0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x   x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 903.8992653  -289.59534477  864.55453742 -251.53897752]
------
Step:8, Action:North
State  261
Old Q Values:  [ 903.8992653  -289.59534477  864.55453742 -251.53897752]
New Q values:  [ 513.56937208 -289.59534477  864.55453742 -251.53897752]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 508.69888654  46.04536991   0.        ]
------
Step:9, Action:South
State  190
Old Q Values:  [ 1.04129094  0.         30.35921172  0.        ]
New Q values:  [ 1.04129094e+00 -5.70379540e+03  3.03592117e+01  0.00000000e+00]
Reward: -10001  Episode Reward:  -9959
xxxxx
x   x
x   x
xg..x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.55133181e+04 -1.33255659e+02 -1.80600000e+02  3.52184257e+00]
------
Step:1, Action:North
State  210
Old Q Values:  [ 1.55133181e+04 -1.33255659e+02 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 6.38917062e+03 -1.33255659e+02 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        594.8112725 -180.6        289.9799492]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6        594.8112725 -180.6        289.9799492]
New Q values:  [-180.6        2154.07569422 -180.6         289.9799492 ]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6.38917062e+03 -1.33255659e+02 -1.80600000e+02  3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [ 6.38917062e+03 -1.33255659e+02 -1.80600000e+02  3.52184257e+00]
New Q values:  [3201.29095522 -133.25565904 -180.6           3.52184257]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2154.07569422 -180.6         289.9799492 ]
------
Step:4, Action:South
State  138
Old Q Values:  [-180.6        2154.07569422 -180.6         289.9799492 ]
New Q values:  [ -180.6       -3419.331151   -180.6         289.9799492]
Reward: -10001  Episode Reward:  -9994
xxxxx
x.. x
x..gx
x. .x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5732.12857103   898.84916994 -2651.70614553 -3385.12952694]
------
Step:1, Action:North
State  216
Old Q Values:  [  506.02440249  2168.36667296 -6170.35693855 -2387.54492731]
New Q values:  [  294.80374576  2168.36667296 -6170.35693855 -2387.54492731]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6       -3419.331151   -180.6         289.9799492]
------
Step:2, Action:West
State  138
Old Q Values:  [ -180.6       -3419.331151   -180.6         289.9799492]
New Q values:  [ -180.6        -3419.331151    -180.6          237.67027048]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   407.59430268    24.38756513]
------
Step:3, Action:East
State  121
Old Q Values:  [    0.             0.         -9954.20583251    56.10465519]
New Q values:  [    0.             0.         -9726.26759903    56.10465519]
Reward: -10001  Episode Reward:  -9993
xxxxx
x. gx
x.. x
x...x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5732.12857103   898.84916994 -2651.70614553 -3385.12952694]
------
Step:1, Action:South
State  208
Old Q Values:  [ 5732.12857103   898.84916994 -2651.70614553 -3385.12952694]
New Q values:  [ 5732.12857103  1936.42524736 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5238.28526461 -8656.02923281 -7525.7277781   1507.36508425]
------
Step:2, Action:North
State  288
Old Q Values:  [ 5238.28526461 -8656.02923281 -7525.7277781   1507.36508425]
New Q values:  [ 3814.35267715 -8656.02923281 -7525.7277781   1507.36508425]
Reward: -1  Episode Reward:  8
xxxxx
xg. x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5732.12857103  1936.42524736 -2651.70614553 -3385.12952694]
------
Step:3, Action:North
State  208
Old Q Values:  [ 5732.12857103  1936.42524736 -2651.70614553 -3385.12952694]
New Q values:  [ 2548.26616239  1936.42524736 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  7
xxxxx
xg.ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   853.38244659 -6245.61866138   201.73793064]
------
Step:4, Action:South
State  136
Old Q Values:  [ -724.71310357   853.38244659 -6245.61866138   201.73793064]
New Q values:  [ -724.71310357  1105.23282735 -6245.61866138   201.73793064]
Reward: -1  Episode Reward:  6
xxxxx
x.g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2548.26616239  1936.42524736 -2651.70614553 -3385.12952694]
------
Step:5, Action:North
State  208
Old Q Values:  [ 2548.26616239  1936.42524736 -2651.70614553 -3385.12952694]
New Q values:  [ 1090.0075461   1936.42524736 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          237.67027048]
------
Step:6, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6          237.67027048]
New Q values:  [ -180.6      -3419.331151  -180.6        222.746399]
Reward: 9  Episode Reward:  14
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   407.59430268    24.38756513]
------
Step:7, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   407.59430268    24.38756513]
New Q values:  [ -253.44886264 -1902.20915811   229.26164077    24.38756513]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6      -3419.331151  -180.6        222.746399]
------
Step:8, Action:West
State  138
Old Q Values:  [ -180.6      -3419.331151  -180.6        222.746399]
New Q values:  [ -180.6        -3419.331151    -180.6          326.72851236]
Reward: -1  Episode Reward:  12
xxxxx
x.a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   632.12002554   794.09984254]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   632.12002554   794.09984254]
New Q values:  [ -281.736      -8877.87327254   632.12002554   388.97535914]
Reward: 9  Episode Reward:  21
xxxxx
xa  x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573  219.78474043 -252.78192178]
------
Step:10, Action:East
State  110
Old Q Values:  [ -180.6        -5648.89406634   338.0888671   -180.6       ]
New Q values:  [ -180.6        -5648.89406634   324.2715545   -180.6       ]
Reward: -1  Episode Reward:  20
xxxxx
x a x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   632.12002554   388.97535914]
------
Step:11, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686         744.79869297    180.241376  ]
New Q values:  [-10156.11771313  -5995.686         628.88932539    180.241376  ]
Reward: -1  Episode Reward:  19
xxxxx
xg ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1105.23282735 -6245.61866138   201.73793064]
------
Step:12, Action:South
State  136
Old Q Values:  [ -724.71310357  1105.23282735 -6245.61866138   201.73793064]
New Q values:  [ -724.71310357  1022.42070515 -6245.61866138   201.73793064]
Reward: -1  Episode Reward:  18
xxxxx
x g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1090.0075461   1936.42524736 -2651.70614553 -3385.12952694]
------
Step:13, Action:South
State  208
Old Q Values:  [ 1090.0075461   1936.42524736 -2651.70614553 -3385.12952694]
New Q values:  [ 1090.0075461   1918.27590209 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  17
xxxxx
xg  x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3814.35267715 -8656.02923281 -7525.7277781   1507.36508425]
------
Step:14, Action:North
State  288
Old Q Values:  [ 3814.35267715 -8656.02923281 -7525.7277781   1507.36508425]
New Q values:  [ 2175.65107275 -8656.02923281 -7525.7277781   1507.36508425]
Reward: -1  Episode Reward:  16
xxxxx
x   x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  294.80374576  2168.36667296 -6170.35693855 -2387.54492731]
------
Step:15, Action:South
State  208
Old Q Values:  [ 1090.0075461   1918.27590209 -2651.70614553 -3385.12952694]
New Q values:  [ 1090.0075461   1419.40568266 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  15
xxxxx
xg  x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2175.65107275 -8656.02923281 -7525.7277781   1507.36508425]
------
Step:16, Action:North
State  288
Old Q Values:  [ 2175.65107275 -8656.02923281 -7525.7277781   1507.36508425]
New Q values:  [ 1295.4821339  -8656.02923281 -7525.7277781   1507.36508425]
Reward: -1  Episode Reward:  14
xxxxx
x g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1090.0075461   1419.40568266 -2651.70614553 -3385.12952694]
------
Step:17, Action:South
State  208
Old Q Values:  [ 1090.0075461   1419.40568266 -2651.70614553 -3385.12952694]
New Q values:  [ 1090.0075461   1019.37179834 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  13
xxxxx
x  gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1295.4821339  -8656.02923281 -7525.7277781   1507.36508425]
------
Step:18, Action:West
State  288
Old Q Values:  [ 1295.4821339  -8656.02923281 -7525.7277781   1507.36508425]
New Q values:  [ 1295.4821339  -8656.02923281 -7525.7277781   4116.80213025]
Reward: 9  Episode Reward:  22
xxxxx
x   x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  -452.95359414 11694.85365515]
------
Step:19, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  -452.95359414 11694.85365515]
New Q values:  [  37.74111519 -168.92307549 -452.95359414 8417.98898311]
Reward: 9  Episode Reward:  31
xxxxx
x  gx
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[12448.82507018  3481.80752105  1322.21862914  2937.02145253]
------
Step:20, Action:North
State  261
Old Q Values:  [ 513.56937208 -289.59534477  864.55453742 -251.53897752]
New Q values:  [ 360.37126146 -289.59534477  864.55453742 -251.53897752]
Reward: 9  Episode Reward:  40
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   498.47837543 -6126.27981178  -180.6       ]
------
Step:21, Action:South
State  181
Old Q Values:  [   13.85659648   498.47837543 -6126.27981178  -180.6       ]
New Q values:  [   13.85659648   458.1577114  -6126.27981178  -180.6       ]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 360.37126146 -289.59534477  864.55453742 -251.53897752]
------
Step:22, Action:East
State  261
Old Q Values:  [ 360.37126146 -289.59534477  864.55453742 -251.53897752]
New Q values:  [ 360.37126146 -289.59534477 1074.46026442 -251.53897752]
Reward: -1  Episode Reward:  38
xxxxx
x g x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477    28.52170816  2430.7948315 ]
------
Step:23, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477    28.52170816  2430.7948315 ]
New Q values:  [-2527.46239811 -6212.61234477    28.52170816  4706.36545365]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[12448.82507018  3481.80752105  1322.21862914  2937.02145253]
------
Step:24, Action:North
State  261
Old Q Values:  [ 360.37126146 -289.59534477 1074.46026442 -251.53897752]
New Q values:  [ 280.995818   -289.59534477 1074.46026442 -251.53897752]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   458.1577114  -6126.27981178  -180.6       ]
------
Step:25, Action:South
State  183
Old Q Values:  [  22.25138791  461.13139081 5046.168126      0.        ]
New Q values:  [  22.25138791  506.19063565 5046.168126      0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x . x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 280.995818   -289.59534477 1074.46026442 -251.53897752]
------
Step:26, Action:East
State  261
Old Q Values:  [ 280.995818   -289.59534477 1074.46026442 -251.53897752]
New Q values:  [ 280.995818   -289.59534477 2954.5808007  -251.53897752]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 8417.98898311]
------
Step:27, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414 8417.98898311]
New Q values:  [  37.74111519 -168.92307549 -452.95359414 4252.96983346]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 280.995818   -289.59534477 2954.5808007  -251.53897752]
------
Step:28, Action:East
State  261
Old Q Values:  [ 280.995818   -289.59534477 2954.5808007  -251.53897752]
New Q values:  [ 280.995818   -289.59534477 2593.14195638 -251.53897752]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477    28.52170816  4706.36545365]
------
Step:29, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414 4252.96983346]
New Q values:  [  37.74111519 -168.92307549 -452.95359414 2478.5305203 ]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 280.995818   -289.59534477 2593.14195638 -251.53897752]
------
Step:30, Action:East
State  261
Old Q Values:  [ 280.995818   -289.59534477 2593.14195638 -251.53897752]
New Q values:  [ 280.995818   -289.59534477 2448.56641865 -251.53897752]
Reward: -1  Episode Reward:  30
xxxxx
x g x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477    28.52170816  4706.36545365]
------
Step:31, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414 2478.5305203 ]
New Q values:  [  37.74111519 -168.92307549 -452.95359414 1725.38213371]
Reward: -1  Episode Reward:  29
xxxxx
x  gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 280.995818   -289.59534477 2448.56641865 -251.53897752]
------
Step:32, Action:East
State  261
Old Q Values:  [ 280.995818   -289.59534477 2448.56641865 -251.53897752]
New Q values:  [ 280.995818   -289.59534477 1496.44120757 -251.53897752]
Reward: -1  Episode Reward:  28
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 1725.38213371]
------
Step:33, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477    28.52170816  4706.36545365]
New Q values:  [-2527.46239811 -6212.61234477    28.52170816  2330.87854373]
Reward: -1  Episode Reward:  27
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 280.995818   -289.59534477 1496.44120757 -251.53897752]
------
Step:34, Action:East
State  260
Old Q Values:  [-4187.30744448 -6457.4598       989.34865221 -2702.17995449]
New Q values:  [-4187.30744448 -6457.4598      1094.403024   -2702.17995449]
Reward: -1  Episode Reward:  26
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477    28.52170816  2330.87854373]
------
Step:35, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477    28.52170816  2330.87854373]
New Q values:  [-2527.46239811 -6212.61234477    28.52170816  1260.07232469]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4187.30744448 -6457.4598      1094.403024   -2702.17995449]
------
Step:36, Action:East
State  260
Old Q Values:  [-4187.30744448 -6457.4598      1094.403024   -2702.17995449]
New Q values:  [-4187.30744448 -6457.4598       815.18290701 -2702.17995449]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477    28.52170816  1260.07232469]
------
Step:37, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477    28.52170816  1260.07232469]
New Q values:  [-2527.46239811 -6212.61234477  1245.84932234  1260.07232469]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1295.4821339  -8656.02923281 -7525.7277781   4116.80213025]
------
Step:38, Action:North
State  288
Old Q Values:  [ 1295.4821339  -8656.02923281 -7525.7277781   4116.80213025]
New Q values:  [ 1477.98014013 -8656.02923281 -7525.7277781   4116.80213025]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3201.29095522 -133.25565904 -180.6           3.52184257]
------
Step:39, Action:North
State  210
Old Q Values:  [3201.29095522 -133.25565904 -180.6           3.52184257]
New Q values:  [1377.9349358  -133.25565904 -180.6           3.52184257]
Reward: -1  Episode Reward:  21
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          326.72851236]
------
Step:40, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6          326.72851236]
New Q values:  [ -180.6        -3419.331151    -180.6          319.72741261]
Reward: -1  Episode Reward:  20
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   632.12002554   388.97535914]
------
Step:41, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   632.12002554   388.97535914]
New Q values:  [ -281.736      -8877.87327254   348.166234     388.97535914]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          319.72741261]
------
Step:42, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6          319.72741261]
New Q values:  [ -180.6        -3419.331151    -180.6          243.98357279]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   348.166234     388.97535914]
------
Step:43, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   348.166234     388.97535914]
New Q values:  [ -281.736      -8877.87327254   348.166234     252.27161001]
Reward: -1  Episode Reward:  17
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -5648.89406634   324.2715545   -180.6       ]
------
Step:44, Action:East
State  99
Old Q Values:  [    0.          8059.77691317 29647.29381671     0.        ]
New Q values:  [    0.          8059.77691317 28720.17286363     0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6        -8280.92007422   112.85376583 56206.18445649]
------
Step:45, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   348.166234     252.27161001]
New Q values:  [ -281.736      -8877.87327254   348.166234     197.59011035]
Reward: -1  Episode Reward:  15
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -5648.89406634   324.2715545   -180.6       ]
------
Step:46, Action:East
State  99
Old Q Values:  [    0.          8059.77691317 28720.17286363     0.        ]
New Q values:  [    0.          8059.77691317 28349.3244824      0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6        -8280.92007422   112.85376583 56206.18445649]
------
Step:47, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   348.166234     197.59011035]
New Q values:  [ -281.736      -8877.87327254   348.166234     175.71751049]
Reward: -1  Episode Reward:  13
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -5648.89406634   324.2715545   -180.6       ]
------
Step:48, Action:East
State  99
Old Q Values:  [    0.          8059.77691317 28349.3244824      0.        ]
New Q values:  [    0.          8059.77691317 28200.98512991     0.        ]
Reward: -1  Episode Reward:  12
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6        -8280.92007422   112.85376583 56206.18445649]
------
Step:49, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686         628.88932539    180.241376  ]
New Q values:  [-10156.11771313  -5995.686         628.88932539    232.39529656]
Reward: -1  Episode Reward:  11
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   536.32915387 -8213.80649336  -180.6       ]
------
Step:50, Action:South
State  99
Old Q Values:  [    0.          8059.77691317 28200.98512991     0.        ]
New Q values:  [    0.          8154.72263835 28200.98512991     0.        ]
Reward: -1  Episode Reward:  10
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692  5361.39493382     0.        ]
------
Step:51, Action:South
State  180
Old Q Values:  [-1367.02476015  1859.47743538  5712.49284801     0.        ]
New Q values:  [-1367.02476015   987.74584625  5712.49284801     0.        ]
Reward: -1  Episode Reward:  9
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4187.30744448 -6457.4598       815.18290701 -2702.17995449]
------
Step:52, Action:East
State  260
Old Q Values:  [-4187.30744448 -6457.4598       815.18290701 -2702.17995449]
New Q values:  [-4187.30744448 -6457.4598       703.49486021 -2702.17995449]
Reward: -1  Episode Reward:  8
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1245.84932234  1260.07232469]
------
Step:53, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1245.84932234  1260.07232469]
New Q values:  [-2527.46239811 -6212.61234477  1245.84932234   952.36129215]
Reward: -1  Episode Reward:  7
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 280.995818   -289.59534477 1496.44120757 -251.53897752]
------
Step:54, Action:East
State  260
Old Q Values:  [-4187.30744448 -6457.4598       703.49486021 -2702.17995449]
New Q values:  [-4187.30744448 -6457.4598       654.55274079 -2702.17995449]
Reward: -1  Episode Reward:  6
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1245.84932234   952.36129215]
------
Step:55, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1245.84932234   952.36129215]
New Q values:  [-2527.46239811 -6212.61234477  1732.78036801   952.36129215]
Reward: -1  Episode Reward:  5
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1477.98014013 -8656.02923281 -7525.7277781   4116.80213025]
------
Step:56, Action:West
State  288
Old Q Values:  [ 1477.98014013 -8656.02923281 -7525.7277781   4116.80213025]
New Q values:  [ 1477.98014013 -8656.02923281 -7525.7277781   2165.9549625 ]
Reward: -1  Episode Reward:  4
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1732.78036801   952.36129215]
------
Step:57, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1732.78036801   952.36129215]
New Q values:  [-2527.46239811 -6212.61234477  1342.29863595   952.36129215]
Reward: -1  Episode Reward:  3
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1477.98014013 -8656.02923281 -7525.7277781   2165.9549625 ]
------
Step:58, Action:West
State  288
Old Q Values:  [ 1477.98014013 -8656.02923281 -7525.7277781   2165.9549625 ]
New Q values:  [ 1477.98014013 -8656.02923281 -7525.7277781   1268.47157579]
Reward: -1  Episode Reward:  2
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1342.29863595   952.36129215]
------
Step:59, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1342.29863595   952.36129215]
New Q values:  [-2527.46239811 -6212.61234477   979.71349642   952.36129215]
Reward: -1  Episode Reward:  1
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1477.98014013 -8656.02923281 -7525.7277781   1268.47157579]
------
Step:60, Action:North
State  288
Old Q Values:  [ 1477.98014013 -8656.02923281 -7525.7277781   1268.47157579]
New Q values:  [  917.59431988 -8656.02923281 -7525.7277781   1268.47157579]
Reward: -1  Episode Reward:  0
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1090.0075461   1019.37179834 -2651.70614553 -3385.12952694]
------
Step:61, Action:North
State  208
Old Q Values:  [ 1090.0075461   1019.37179834 -2651.70614553 -3385.12952694]
New Q values:  [  742.12922999  1019.37179834 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  -1
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1022.42070515 -6245.61866138   201.73793064]
------
Step:62, Action:South
State  130
Old Q Values:  [18220.41077038  3512.58304545  -180.00807518 31798.06606174]
New Q values:  [18220.41077038  1710.24475768  -180.00807518 31798.06606174]
Reward: -1  Episode Reward:  -2
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  742.12922999  1019.37179834 -2651.70614553 -3385.12952694]
------
Step:63, Action:South
State  210
Old Q Values:  [1377.9349358  -133.25565904 -180.6           3.52184257]
New Q values:  [1377.9349358   326.63920912 -180.6           3.52184257]
Reward: -1  Episode Reward:  -3
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  917.59431988 -8656.02923281 -7525.7277781   1268.47157579]
------
Step:64, Action:North
State  288
Old Q Values:  [  917.59431988 -8656.02923281 -7525.7277781   1268.47157579]
New Q values:  [  779.81820869 -8656.02923281 -7525.7277781   1268.47157579]
Reward: -1  Episode Reward:  -4
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1377.9349358   326.63920912 -180.6           3.52184257]
------
Step:65, Action:North
State  210
Old Q Values:  [1377.9349358   326.63920912 -180.6           3.52184257]
New Q values:  [ 623.76904615  326.63920912 -180.6           3.52184257]
Reward: -1  Episode Reward:  -5
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          243.98357279]
------
Step:66, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6          243.98357279]
New Q values:  [ -180.6        -3419.331151    -180.6          165.77192135]
Reward: -1  Episode Reward:  -6
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   229.26164077    24.38756513]
------
Step:67, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   229.26164077    24.38756513]
New Q values:  [ -253.44886264 -1902.20915811   140.83623271    24.38756513]
Reward: -1  Episode Reward:  -7
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          165.77192135]
------
Step:68, Action:West
State  130
Old Q Values:  [18220.41077038  1710.24475768  -180.00807518 31798.06606174]
New Q values:  [18220.41077038  1710.24475768  -180.00807518 29580.48176165]
Reward: -1  Episode Reward:  -8
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6        -8280.92007422   112.85376583 56206.18445649]
------
Step:69, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   348.166234     175.71751049]
New Q values:  [ -281.736      -8877.87327254   348.166234     345.9065857 ]
Reward: -1  Episode Reward:  -9
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  920.73193833  862.42400667 -272.09726687]
------
Step:70, Action:South
State  99
Old Q Values:  [    0.          8154.72263835 28200.98512991     0.        ]
New Q values:  [    0.          8192.70092842 28200.98512991     0.        ]
Reward: -1  Episode Reward:  -10
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692  5361.39493382     0.        ]
------
Step:71, Action:South
State  181
Old Q Values:  [   13.85659648   458.1577114  -6126.27981178  -180.6       ]
New Q values:  [   13.85659648   631.59544683 -6126.27981178  -180.6       ]
Reward: -1  Episode Reward:  -11
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 280.995818   -289.59534477 1496.44120757 -251.53897752]
------
Step:72, Action:East
State  260
Old Q Values:  [-4187.30744448 -6457.4598       654.55274079 -2702.17995449]
New Q values:  [-4187.30744448 -6457.4598       555.13514524 -2702.17995449]
Reward: -1  Episode Reward:  -12
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   979.71349642   952.36129215]
------
Step:73, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   979.71349642   952.36129215]
New Q values:  [-2527.46239811 -6212.61234477   771.8268713    952.36129215]
Reward: -1  Episode Reward:  -13
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  779.81820869 -8656.02923281 -7525.7277781   1268.47157579]
------
Step:74, Action:West
State  288
Old Q Values:  [  779.81820869 -8656.02923281 -7525.7277781   1268.47157579]
New Q values:  [  779.81820869 -8656.02923281 -7525.7277781    792.49701796]
Reward: -1  Episode Reward:  -14
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   771.8268713    952.36129215]
------
Step:75, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   771.8268713    952.36129215]
New Q values:  [-2527.46239811 -6212.61234477   545.87985391   952.36129215]
Reward: -1  Episode Reward:  -15
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  779.81820869 -8656.02923281 -7525.7277781    792.49701796]
------
Step:76, Action:North
State  288
Old Q Values:  [  779.81820869 -8656.02923281 -7525.7277781    792.49701796]
New Q values:  [  498.45799732 -8656.02923281 -7525.7277781    792.49701796]
Reward: -1  Episode Reward:  -16
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 623.76904615  326.63920912 -180.6           3.52184257]
------
Step:77, Action:North
State  208
Old Q Values:  [  742.12922999  1019.37179834 -2651.70614553 -3385.12952694]
New Q values:  [  345.9832684   1019.37179834 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  -17
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          165.77192135]
------
Step:78, Action:West
State  130
Old Q Values:  [18220.41077038  1710.24475768  -180.00807518 29580.48176165]
New Q values:  [18220.41077038  1710.24475768  -180.00807518 28693.44804161]
Reward: -1  Episode Reward:  -18
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6        -8280.92007422   112.85376583 56206.18445649]
------
Step:79, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   348.166234     345.9065857 ]
New Q values:  [ -281.736      -8877.87327254   348.166234     413.98221578]
Reward: -1  Episode Reward:  -19
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  920.73193833  862.42400667 -272.09726687]
------
Step:80, Action:South
State  111
Old Q Values:  [-177.44732869  920.73193833  862.42400667 -272.09726687]
New Q values:  [-177.44732869 1881.54321313  862.42400667 -272.09726687]
Reward: -1  Episode Reward:  -20
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  506.19063565 5046.168126      0.        ]
------
Step:81, Action:East
State  179
Old Q Values:  [    0.          9115.40382444 11436.44622626     0.        ]
New Q values:  [    0.          9115.40382444 67977.7456036      0.        ]
Reward: 100009  Episode Reward:  99989
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  498.45799732 -8656.02923281 -7525.7277781    792.49701796]
------
Step:1, Action:North
State  288
Old Q Values:  [  498.45799732 -8656.02923281 -7525.7277781    792.49701796]
New Q values:  [  510.59473843 -8656.02923281 -7525.7277781    792.49701796]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  345.9832684   1019.37179834 -2651.70614553 -3385.12952694]
------
Step:2, Action:South
State  208
Old Q Values:  [  345.9832684   1019.37179834 -2651.70614553 -3385.12952694]
New Q values:  [  345.9832684    644.89782472 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  510.59473843 -8656.02923281 -7525.7277781    792.49701796]
------
Step:3, Action:West
State  288
Old Q Values:  [  510.59473843 -8656.02923281 -7525.7277781    792.49701796]
New Q values:  [  510.59473843 -8656.02923281 -7525.7277781    602.10719483]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   545.87985391   952.36129215]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   545.87985391   952.36129215]
New Q values:  [-2527.46239811 -6212.61234477   545.87985391   835.27687913]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 280.995818   -289.59534477 1496.44120757 -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [ 280.995818   -289.59534477 1496.44120757 -251.53897752]
New Q values:  [ 307.27696125 -289.59534477 1496.44120757 -251.53897752]
Reward: 9  Episode Reward:  25
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   631.59544683 -6126.27981178  -180.6       ]
------
Step:6, Action:South
State  181
Old Q Values:  [   13.85659648   631.59544683 -6126.27981178  -180.6       ]
New Q values:  [   13.85659648   700.970541   -6126.27981178  -180.6       ]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 307.27696125 -289.59534477 1496.44120757 -251.53897752]
------
Step:7, Action:East
State  261
Old Q Values:  [ 307.27696125 -289.59534477 1496.44120757 -251.53897752]
New Q values:  [ 307.27696125 -289.59534477 1115.59112314 -251.53897752]
Reward: -1  Episode Reward:  23
xxxxx
x..gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 1725.38213371]
------
Step:8, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414 1725.38213371]
New Q values:  [  37.74111519 -168.92307549 -452.95359414 1024.23019043]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 307.27696125 -289.59534477 1115.59112314 -251.53897752]
------
Step:9, Action:East
State  261
Old Q Values:  [ 307.27696125 -289.59534477 1115.59112314 -251.53897752]
New Q values:  [ 307.27696125 -289.59534477  752.90550639 -251.53897752]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 1024.23019043]
------
Step:10, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414 1024.23019043]
New Q values:  [  37.74111519 -168.92307549 -452.95359414  634.96372809]
Reward: -1  Episode Reward:  20
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 307.27696125 -289.59534477  752.90550639 -251.53897752]
------
Step:11, Action:East
State  261
Old Q Values:  [ 307.27696125 -289.59534477  752.90550639 -251.53897752]
New Q values:  [ 307.27696125 -289.59534477  551.14526629 -251.53897752]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   545.87985391   835.27687913]
------
Step:12, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414  634.96372809]
New Q values:  [  37.74111519 -168.92307549 -452.95359414  418.72907112]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 307.27696125 -289.59534477  551.14526629 -251.53897752]
------
Step:13, Action:East
State  261
Old Q Values:  [ 307.27696125 -289.59534477  551.14526629 -251.53897752]
New Q values:  [ 307.27696125 -289.59534477  470.44117026 -251.53897752]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   545.87985391   835.27687913]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   545.87985391   835.27687913]
New Q values:  [-2527.46239811 -6212.61234477   545.87985391   474.64310273]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 307.27696125 -289.59534477  470.44117026 -251.53897752]
------
Step:15, Action:East
State  261
Old Q Values:  [ 307.27696125 -289.59534477  470.44117026 -251.53897752]
New Q values:  [ 307.27696125 -289.59534477  313.19518944 -251.53897752]
Reward: -1  Episode Reward:  15
xxxxx
x..gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414  418.72907112]
------
Step:16, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414  418.72907112]
New Q values:  [  37.74111519 -168.92307549 -452.95359414  260.85018528]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 307.27696125 -289.59534477  313.19518944 -251.53897752]
------
Step:17, Action:East
State  261
Old Q Values:  [ 307.27696125 -289.59534477  313.19518944 -251.53897752]
New Q values:  [ 307.27696125 -289.59534477  202.93313136 -251.53897752]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414  260.85018528]
------
Step:18, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   545.87985391   474.64310273]
New Q values:  [-2527.46239811 -6212.61234477   545.87985391   281.44032947]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x . x
xag x
xxxxx
Step:19, Action:West
State  260
Old Q Values:  [-4187.30744448 -6457.4598       555.13514524 -2702.17995449]
New Q values:  [-4187.30744448 -6457.4598       555.13514524 -7094.93143822]
Reward: -10301  Episode Reward:  -10289
xxxxx
x...x
x . x
xg  x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  345.9832684    644.89782472 -2651.70614553 -3385.12952694]
------
Step:1, Action:South
State  208
Old Q Values:  [  345.9832684    644.89782472 -2651.70614553 -3385.12952694]
New Q values:  [  345.9832684    443.99128834 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  510.59473843 -8656.02923281 -7525.7277781    602.10719483]
------
Step:2, Action:West
State  288
Old Q Values:  [  510.59473843 -8656.02923281 -7525.7277781    602.10719483]
New Q values:  [  510.59473843 -8656.02923281 -7525.7277781    410.0068341 ]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   545.87985391   281.44032947]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   545.87985391   281.44032947]
New Q values:  [-2527.46239811 -6212.61234477   370.93036309   281.44032947]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  510.59473843 -8656.02923281 -7525.7277781    410.0068341 ]
------
Step:4, Action:North
State  288
Old Q Values:  [  510.59473843 -8656.02923281 -7525.7277781    410.0068341 ]
New Q values:  [  390.76860922 -8656.02923281 -7525.7277781    410.0068341 ]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 623.76904615  326.63920912 -180.6           3.52184257]
------
Step:5, Action:North
State  210
Old Q Values:  [ 623.76904615  326.63920912 -180.6           3.52184257]
New Q values:  [ 304.63919487  326.63920912 -180.6           3.52184257]
Reward: 9  Episode Reward:  25
xxxxx
x. ax
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          165.77192135]
------
Step:6, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6          165.77192135]
New Q values:  [ -180.6        -3419.331151    -180.6          189.90343327]
Reward: -1  Episode Reward:  24
xxxxx
x.a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   348.166234     413.98221578]
------
Step:7, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686         628.88932539    232.39529656]
New Q values:  [-10156.11771313  -5995.686         628.88932539  -5587.95006911]
Reward: -9991  Episode Reward:  -9967
xxxxx
xg  x
x.. x
x.  x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  345.9832684    443.99128834 -2651.70614553 -3385.12952694]
------
Step:1, Action:South
State  208
Old Q Values:  [  345.9832684    443.99128834 -2651.70614553 -3385.12952694]
New Q values:  [  345.9832684    305.99856557 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  390.76860922 -8656.02923281 -7525.7277781    410.0068341 ]
------
Step:2, Action:West
State  288
Old Q Values:  [  390.76860922 -8656.02923281 -7525.7277781    410.0068341 ]
New Q values:  [  390.76860922 -8656.02923281 -7525.7277781    280.68184257]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   370.93036309   281.44032947]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   370.93036309   281.44032947]
New Q values:  [-2527.46239811 -6212.61234477   265.002728     281.44032947]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  390.76860922 -8656.02923281 -7525.7277781    280.68184257]
------
Step:4, Action:North
State  288
Old Q Values:  [  390.76860922 -8656.02923281 -7525.7277781    280.68184257]
New Q values:  [  259.50242421 -8656.02923281 -7525.7277781    280.68184257]
Reward: -1  Episode Reward:  16
xxxxx
xg..x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  345.9832684    305.99856557 -2651.70614553 -3385.12952694]
------
Step:5, Action:North
State  208
Old Q Values:  [  345.9832684    305.99856557 -2651.70614553 -3385.12952694]
New Q values:  [  450.5195189    305.99856557 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  25
xxxxx
x gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1022.42070515 -6245.61866138   201.73793064]
------
Step:6, Action:South
State  136
Old Q Values:  [ -724.71310357  1022.42070515 -6245.61866138   201.73793064]
New Q values:  [ -724.71310357   543.52413773 -6245.61866138   201.73793064]
Reward: -1  Episode Reward:  24
xxxxx
x .gx
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  450.5195189    305.99856557 -2651.70614553 -3385.12952694]
------
Step:7, Action:South
State  208
Old Q Values:  [  450.5195189    305.99856557 -2651.70614553 -3385.12952694]
New Q values:  [  450.5195189    206.003979   -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  23
xxxxx
x g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  259.50242421 -8656.02923281 -7525.7277781    280.68184257]
------
Step:8, Action:West
State  288
Old Q Values:  [  259.50242421 -8656.02923281 -7525.7277781    280.68184257]
New Q values:  [  259.50242421 -8656.02923281 -7525.7277781    189.92779261]
Reward: -1  Episode Reward:  22
xxxxx
x .gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414  260.85018528]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   265.002728     281.44032947]
New Q values:  [-2527.46239811 -6212.61234477   265.002728    3852.62365284]
Reward: 9  Episode Reward:  31
xxxxx
x g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[12448.82507018  3481.80752105  1322.21862914  2937.02145253]
------
Step:10, Action:North
State  261
Old Q Values:  [ 307.27696125 -289.59534477  202.93313136 -251.53897752]
New Q values:  [ 338.6019468  -289.59534477  202.93313136 -251.53897752]
Reward: 9  Episode Reward:  40
xxxxx
x .gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   700.970541   -6126.27981178  -180.6       ]
------
Step:11, Action:South
State  181
Old Q Values:  [   13.85659648   700.970541   -6126.27981178  -180.6       ]
New Q values:  [   13.85659648   381.36880044 -6126.27981178  -180.6       ]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 338.6019468  -289.59534477  202.93313136 -251.53897752]
------
Step:12, Action:North
State  261
Old Q Values:  [ 338.6019468  -289.59534477  202.93313136 -251.53897752]
New Q values:  [ 249.25141885 -289.59534477  202.93313136 -251.53897752]
Reward: -1  Episode Reward:  38
xxxxx
x .gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   381.36880044 -6126.27981178  -180.6       ]
------
Step:13, Action:South
State  181
Old Q Values:  [   13.85659648   381.36880044 -6126.27981178  -180.6       ]
New Q values:  [   13.85659648   226.72294583 -6126.27981178  -180.6       ]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 249.25141885 -289.59534477  202.93313136 -251.53897752]
------
Step:14, Action:North
State  260
Old Q Values:  [-4187.30744448 -6457.4598       555.13514524 -7094.93143822]
New Q values:  [   38.22487661 -6457.4598       555.13514524 -7094.93143822]
Reward: -1  Episode Reward:  36
xxxxx
xg. x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   987.74584625  5712.49284801     0.        ]
------
Step:15, Action:East
State  180
Old Q Values:  [-1367.02476015   987.74584625  5712.49284801     0.        ]
New Q values:  [-1367.02476015   987.74584625  4845.97721139     0.        ]
Reward: 9  Episode Reward:  45
xxxxx
xg. x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.69358809e+03 7.21747546e+03 8.51860024e+03]
------
Step:16, Action:West
State  192
Old Q Values:  [3.89777037e-01 3.69358809e+03 7.21747546e+03 8.51860024e+03]
New Q values:  [ 3.89777037e-01  3.69358809e+03  7.21747546e+03 -1.13936674e+03]
Reward: -10001  Episode Reward:  -9956
xxxxx
x . x
xg  x
x   x
xxxxx
Episode # 300
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  259.50242421 -8656.02923281 -7525.7277781    189.92779261]
------
Step:1, Action:North
State  288
Old Q Values:  [  259.50242421 -8656.02923281 -7525.7277781    189.92779261]
New Q values:  [  244.35682535 -8656.02923281 -7525.7277781    189.92779261]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  450.5195189    206.003979   -2651.70614553 -3385.12952694]
------
Step:2, Action:North
State  208
Old Q Values:  [  450.5195189    206.003979   -2651.70614553 -3385.12952694]
New Q values:  [  348.66504888   206.003979   -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
xg.ax
x.. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   543.52413773 -6245.61866138   201.73793064]
------
Step:3, Action:South
State  136
Old Q Values:  [ -724.71310357   543.52413773 -6245.61866138   201.73793064]
New Q values:  [ -724.71310357   321.40916976 -6245.61866138   201.73793064]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  348.66504888   206.003979   -2651.70614553 -3385.12952694]
------
Step:4, Action:North
State  208
Old Q Values:  [  348.66504888   206.003979   -2651.70614553 -3385.12952694]
New Q values:  [-5764.71122952   206.003979   -2651.70614553 -3385.12952694]
Reward: -10001  Episode Reward:  -9984
xxxxx
x..gx
x.. x
x . x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  506.19063565 5046.168126      0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [  22.25138791  506.19063565 5046.168126      0.        ]
New Q values:  [  22.25138791  506.19063565 5672.512311      0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  3081.56358951  1388.1600337  12162.15020199]
------
Step:2, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  2.92601219e+03  1.13258904e+04]
New Q values:  [-6.00000000e-01  1.06176528e+02  2.92601219e+03  6.23150984e+03]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa .x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  506.19063565 5672.512311      0.        ]
------
Step:3, Action:East
State  181
Old Q Values:  [   13.85659648   226.72294583 -6126.27981178  -180.6       ]
New Q values:  [   13.85659648   226.72294583 -6285.8692866   -180.6       ]
Reward: -10001  Episode Reward:  -9993
xxxxx
x...x
x g.x
x. .x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414  260.85018528]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   265.002728    3852.62365284]
New Q values:  [-2527.46239811 -6212.61234477   265.002728    1621.22488679]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 249.25141885 -289.59534477  202.93313136 -251.53897752]
------
Step:2, Action:North
State  261
Old Q Values:  [ 249.25141885 -289.59534477  202.93313136 -251.53897752]
New Q values:  [ 173.11745129 -289.59534477  202.93313136 -251.53897752]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   226.72294583 -6285.8692866   -180.6       ]
------
Step:3, Action:South
State  181
Old Q Values:  [   13.85659648   226.72294583 -6285.8692866   -180.6       ]
New Q values:  [   13.85659648   150.96911774 -6285.8692866   -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 173.11745129 -289.59534477  202.93313136 -251.53897752]
------
Step:4, Action:East
State  261
Old Q Values:  [ 173.11745129 -289.59534477  202.93313136 -251.53897752]
New Q values:  [ 173.11745129 -289.59534477  158.82830813 -251.53897752]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x ..x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414  260.85018528]
------
Step:5, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414  260.85018528]
New Q values:  [  37.74111519 -168.92307549 -452.95359414  155.6753095 ]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 173.11745129 -289.59534477  158.82830813 -251.53897752]
------
Step:6, Action:North
State  261
Old Q Values:  [ 173.11745129 -289.59534477  158.82830813 -251.53897752]
New Q values:  [ 113.93771584 -289.59534477  158.82830813 -251.53897752]
Reward: -1  Episode Reward:  14
xxxxx
x..gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   150.96911774 -6285.8692866   -180.6       ]
------
Step:7, Action:South
State  181
Old Q Values:  [   13.85659648   150.96911774 -6285.8692866   -180.6       ]
New Q values:  [   13.85659648   107.43613953 -6285.8692866   -180.6       ]
Reward: -1  Episode Reward:  13
xxxxx
x.g x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477  158.82830813 -251.53897752]
------
Step:8, Action:East
State  261
Old Q Values:  [ 113.93771584 -289.59534477  158.82830813 -251.53897752]
New Q values:  [ 113.93771584 -289.59534477  549.29878929 -251.53897752]
Reward: -1  Episode Reward:  12
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   265.002728    1621.22488679]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   265.002728    1621.22488679]
New Q values:  [-2527.46239811 -6212.61234477   265.002728     812.6795915 ]
Reward: -1  Episode Reward:  11
xxxxx
x.g x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477  549.29878929 -251.53897752]
------
Step:10, Action:East
State  261
Old Q Values:  [ 113.93771584 -289.59534477  549.29878929 -251.53897752]
New Q values:  [ 113.93771584 -289.59534477  462.92339317 -251.53897752]
Reward: -1  Episode Reward:  10
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   265.002728     812.6795915 ]
------
Step:11, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   265.002728     812.6795915 ]
New Q values:  [-2527.46239811 -6212.61234477   265.002728     491.01238017]
Reward: -1  Episode Reward:  9
xxxxx
x.. x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[   38.22487661 -6457.4598       555.13514524 -7094.93143822]
------
Step:12, Action:East
State  260
Old Q Values:  [   38.22487661 -6457.4598       555.13514524 -7094.93143822]
New Q values:  [   38.22487661 -6457.4598       368.75777215 -7094.93143822]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x ..x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   265.002728     491.01238017]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   265.002728     491.01238017]
New Q values:  [-2527.46239811 -6212.61234477   184.70813881   491.01238017]
Reward: 9  Episode Reward:  17
xxxxx
x.. x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  244.35682535 -8656.02923281 -7525.7277781    189.92779261]
------
Step:14, Action:North
State  288
Old Q Values:  [  244.35682535 -8656.02923281 -7525.7277781    189.92779261]
New Q values:  [  164.94392384 -8656.02923281 -7525.7277781    189.92779261]
Reward: 9  Episode Reward:  26
xxxxx
xg. x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5764.71122952   206.003979   -2651.70614553 -3385.12952694]
------
Step:15, Action:South
State  208
Old Q Values:  [-5764.71122952   206.003979   -2651.70614553 -3385.12952694]
New Q values:  [-5764.71122952   138.77992938 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  164.94392384 -8656.02923281 -7525.7277781    189.92779261]
------
Step:16, Action:West
State  288
Old Q Values:  [  164.94392384 -8656.02923281 -7525.7277781    189.92779261]
New Q values:  [  164.94392384 -8656.02923281 -7525.7277781    222.6748311 ]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   184.70813881   491.01238017]
------
Step:17, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   184.70813881   491.01238017]
New Q values:  [-2527.46239811 -6212.61234477   140.08570485   491.01238017]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  164.94392384 -8656.02923281 -7525.7277781    222.6748311 ]
------
Step:18, Action:North
State  288
Old Q Values:  [  164.94392384 -8656.02923281 -7525.7277781    222.6748311 ]
New Q values:  [  163.36933227 -8656.02923281 -7525.7277781    222.6748311 ]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 304.63919487  326.63920912 -180.6           3.52184257]
------
Step:19, Action:South
State  210
Old Q Values:  [ 304.63919487  326.63920912 -180.6           3.52184257]
New Q values:  [ 304.63919487  196.85813298 -180.6           3.52184257]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  163.36933227 -8656.02923281 -7525.7277781    222.6748311 ]
------
Step:20, Action:North
State  288
Old Q Values:  [  163.36933227 -8656.02923281 -7525.7277781    222.6748311 ]
New Q values:  [  106.38171172 -8656.02923281 -7525.7277781    222.6748311 ]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5764.71122952   138.77992938 -2651.70614553 -3385.12952694]
------
Step:21, Action:South
State  208
Old Q Values:  [-5764.71122952   138.77992938 -2651.70614553 -3385.12952694]
New Q values:  [-5764.71122952   121.71442108 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  19
xxxxx
x.g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  106.38171172 -8656.02923281 -7525.7277781    222.6748311 ]
------
Step:22, Action:West
State  288
Old Q Values:  [  106.38171172 -8656.02923281 -7525.7277781    222.6748311 ]
New Q values:  [  106.38171172 -8656.02923281 -7525.7277781    235.77364649]
Reward: -1  Episode Reward:  18
xxxxx
xg. x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   140.08570485   491.01238017]
------
Step:23, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   140.08570485   491.01238017]
New Q values:  [-2527.46239811 -6212.61234477   140.08570485   306.43228371]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[   38.22487661 -6457.4598       368.75777215 -7094.93143822]
------
Step:24, Action:East
State  260
Old Q Values:  [   38.22487661 -6457.4598       368.75777215 -7094.93143822]
New Q values:  [   38.22487661 -6457.4598       238.83279397 -7094.93143822]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   140.08570485   306.43228371]
------
Step:25, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   140.08570485   306.43228371]
New Q values:  [-2527.46239811 -6212.61234477   126.16637589   306.43228371]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  106.38171172 -8656.02923281 -7525.7277781    235.77364649]
------
Step:26, Action:West
State  288
Old Q Values:  [  106.38171172 -8656.02923281 -7525.7277781    235.77364649]
New Q values:  [  106.38171172 -8656.02923281 -7525.7277781    185.63914371]
Reward: -1  Episode Reward:  14
xxxxx
xg. x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   126.16637589   306.43228371]
------
Step:27, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   126.16637589   306.43228371]
New Q values:  [-2527.46239811 -6212.61234477   126.16637589   193.62275168]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[   38.22487661 -6457.4598       238.83279397 -7094.93143822]
------
Step:28, Action:East
State  257
Old Q Values:  [12448.82507018  3481.80752105  1322.21862914  2937.02145253]
New Q values:  [12448.82507018  3481.80752105   586.37427716  2937.02145253]
Reward: -1  Episode Reward:  12
xxxxx
x.. x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   126.16637589   193.62275168]
------
Step:29, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   126.16637589   193.62275168]
New Q values:  [-2527.46239811 -6212.61234477   126.16637589   215.72611862]
Reward: -1  Episode Reward:  11
xxxxx
x.. x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477  462.92339317 -251.53897752]
------
Step:30, Action:North
State  257
Old Q Values:  [12448.82507018  3481.80752105   586.37427716  2937.02145253]
New Q values:  [9910.34190115 3481.80752105  586.37427716 2937.02145253]
Reward: -1  Episode Reward:  10
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692  5361.39493382     0.        ]
------
Step:31, Action:South
State  181
Old Q Values:  [   13.85659648   107.43613953 -6285.8692866   -180.6       ]
New Q values:  [   13.85659648   181.25147376 -6285.8692866   -180.6       ]
Reward: -1  Episode Reward:  9
xxxxx
x.. x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477  462.92339317 -251.53897752]
------
Step:32, Action:East
State  261
Old Q Values:  [ 113.93771584 -289.59534477  462.92339317 -251.53897752]
New Q values:  [ 113.93771584 -289.59534477  231.27195012 -251.53897752]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414  155.6753095 ]
------
Step:33, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   126.16637589   215.72611862]
New Q values:  [-2527.46239811 -6212.61234477   126.16637589  3058.79301779]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[9910.34190115 3481.80752105  586.37427716 2937.02145253]
------
Step:34, Action:North
State  257
Old Q Values:  [9910.34190115 3481.80752105  586.37427716 2937.02145253]
New Q values:  [8894.94863354 3481.80752105  586.37427716 2937.02145253]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692  5361.39493382     0.        ]
------
Step:35, Action:South
State  181
Old Q Values:  [   13.85659648   181.25147376 -6285.8692866   -180.6       ]
New Q values:  [   13.85659648   141.28217454 -6285.8692866   -180.6       ]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477  231.27195012 -251.53897752]
------
Step:36, Action:East
State  257
Old Q Values:  [8894.94863354 3481.80752105  586.37427716 2937.02145253]
New Q values:  [8894.94863354 3481.80752105 1151.5876162  2937.02145253]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   126.16637589  3058.79301779]
------
Step:37, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414  155.6753095 ]
New Q values:  [  37.74111519 -168.92307549 -452.95359414  131.05170883]
Reward: -1  Episode Reward:  3
xxxxx
x.. x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477  231.27195012 -251.53897752]
------
Step:38, Action:East
State  261
Old Q Values:  [ 113.93771584 -289.59534477  231.27195012 -251.53897752]
New Q values:  [ 113.93771584 -289.59534477  131.2242927  -251.53897752]
Reward: -1  Episode Reward:  2
xxxxx
x.. x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414  131.05170883]
------
Step:39, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414  131.05170883]
New Q values:  [  37.74111519 -168.92307549 -452.95359414   91.18797134]
Reward: -1  Episode Reward:  1
xxxxx
x.. x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477  131.2242927  -251.53897752]
------
Step:40, Action:East
State  261
Old Q Values:  [ 113.93771584 -289.59534477  131.2242927  -251.53897752]
New Q values:  [ 113.93771584 -289.59534477   79.24610848 -251.53897752]
Reward: -1  Episode Reward:  0
xxxxx
x..gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414   91.18797134]
------
Step:41, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   126.16637589  3058.79301779]
New Q values:  [-2527.46239811 -6212.61234477   126.16637589  3891.40179718]
Reward: -1  Episode Reward:  -1
xxxxx
x.g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[8894.94863354 3481.80752105 1151.5876162  2937.02145253]
------
Step:42, Action:North
State  260
Old Q Values:  [   38.22487661 -6457.4598       238.83279397 -7094.93143822]
New Q values:  [ 1468.48311406 -6457.4598       238.83279397 -7094.93143822]
Reward: -1  Episode Reward:  -2
xxxxx
xg. x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   987.74584625  4845.97721139     0.        ]
------
Step:43, Action:East
State  176
Old Q Values:  [    0.          1631.23260021 11434.23928209     0.        ]
New Q values:  [   0.         1631.23260021 6744.33835094    0.        ]
Reward: 9  Episode Reward:  7
xxxxx
x.. x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  3.69358809e+03  7.21747546e+03 -1.13936674e+03]
------
Step:44, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  3.69358809e+03  7.21747546e+03 -1.13936674e+03]
New Q values:  [ 3.89777037e-01  3.69358809e+03  2.92290451e+03 -1.13936674e+03]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5764.71122952   121.71442108 -2651.70614553 -3385.12952694]
------
Step:45, Action:South
State  208
Old Q Values:  [-5764.71122952   121.71442108 -2651.70614553 -3385.12952694]
New Q values:  [-5764.71122952   103.77751155 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  106.38171172 -8656.02923281 -7525.7277781    185.63914371]
------
Step:46, Action:West
State  288
Old Q Values:  [  106.38171172 -8656.02923281 -7525.7277781    185.63914371]
New Q values:  [  106.38171172 -8656.02923281 -7525.7277781    101.01204889]
Reward: -1  Episode Reward:  4
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414   91.18797134]
------
Step:47, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414   91.18797134]
New Q values:  [  37.74111519 -168.92307549 -452.95359414 2704.3597786 ]
Reward: -1  Episode Reward:  3
xxxxx
x.. x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[8894.94863354 3481.80752105 1151.5876162  2937.02145253]
------
Step:48, Action:North
State  257
Old Q Values:  [8894.94863354 3481.80752105 1151.5876162  2937.02145253]
New Q values:  [23950.7031345   3481.80752105  1151.5876162   2937.02145253]
Reward: -1  Episode Reward:  2
xxxxx
x.. x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.          9115.40382444 67977.7456036      0.        ]
------
Step:49, Action:East
State  177
Old Q Values:  [    0.         16438.03957692  5361.39493382     0.        ]
New Q values:  [    0.         16438.03957692  5162.98667077     0.        ]
Reward: -1  Episode Reward:  1
xxxxx
x.. x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816 10063.42899081]
------
Step:50, Action:West
State  192
Old Q Values:  [ 3.89777037e-01  3.69358809e+03  2.92290451e+03 -1.13936674e+03]
New Q values:  [3.89777037e-01 3.69358809e+03 2.92290451e+03 4.47506518e+03]
Reward: -1  Episode Reward:  0
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692  5162.98667077     0.        ]
------
Step:51, Action:South
State  176
Old Q Values:  [   0.         1631.23260021 6744.33835094    0.        ]
New Q values:  [   0.         1327.79507613 6744.33835094    0.        ]
Reward: -1  Episode Reward:  -1
xxxxx
x.. x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[-1222.94287271     0.          2253.00678683   644.94785455]
------
Step:52, Action:East
State  257
Old Q Values:  [23950.7031345   3481.80752105  1151.5876162   2937.02145253]
New Q values:  [23950.7031345   3481.80752105  1627.45558563  2937.02145253]
Reward: -1  Episode Reward:  -2
xxxxx
x.. x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   126.16637589  3891.40179718]
------
Step:53, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414 2704.3597786 ]
New Q values:  [  37.74111519 -168.92307549 -452.95359414 8266.35485179]
Reward: -1  Episode Reward:  -3
xxxxx
x.. x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[23950.7031345   3481.80752105  1627.45558563  2937.02145253]
------
Step:54, Action:North
State  257
Old Q Values:  [23950.7031345   3481.80752105  1627.45558563  2937.02145253]
New Q values:  [14511.09312687  3481.80752105  1627.45558563  2937.02145253]
Reward: -1  Episode Reward:  -4
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692  5162.98667077     0.        ]
------
Step:55, Action:South
State  179
Old Q Values:  [    0.          9115.40382444 67977.7456036      0.        ]
New Q values:  [    0.          7998.88946784 67977.7456036      0.        ]
Reward: -1  Episode Reward:  -5
xxxxx
x.. x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[14511.09312687  3481.80752105  1627.45558563  2937.02145253]
------
Step:56, Action:North
State  257
Old Q Values:  [14511.09312687  3481.80752105  1627.45558563  2937.02145253]
New Q values:  [10735.24912383  3481.80752105  1627.45558563  2937.02145253]
Reward: -1  Episode Reward:  -6
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.         16438.03957692  5162.98667077     0.        ]
------
Step:57, Action:South
State  177
Old Q Values:  [    0.         16438.03957692  5162.98667077     0.        ]
New Q values:  [   0.         9795.19056792 5162.98667077    0.        ]
Reward: -1  Episode Reward:  -7
xxxxx
x.. x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[10735.24912383  3481.80752105  1627.45558563  2937.02145253]
------
Step:58, Action:North
State  257
Old Q Values:  [10735.24912383  3481.80752105  1627.45558563  2937.02145253]
New Q values:  [7232.05681991 3481.80752105 1627.45558563 2937.02145253]
Reward: -1  Episode Reward:  -8
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         9795.19056792 5162.98667077    0.        ]
------
Step:59, Action:South
State  177
Old Q Values:  [   0.         9795.19056792 5162.98667077    0.        ]
New Q values:  [   0.         6087.09327314 5162.98667077    0.        ]
Reward: -1  Episode Reward:  -9
xxxxx
x.. x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[7232.05681991 3481.80752105 1627.45558563 2937.02145253]
------
Step:60, Action:North
State  257
Old Q Values:  [7232.05681991 3481.80752105 1627.45558563 2937.02145253]
New Q values:  [4718.3507099  3481.80752105 1627.45558563 2937.02145253]
Reward: -1  Episode Reward:  -10
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         6087.09327314 5162.98667077    0.        ]
------
Step:61, Action:South
State  179
Old Q Values:  [    0.          7998.88946784 67977.7456036      0.        ]
New Q values:  [    0.          4614.46100011 67977.7456036      0.        ]
Reward: -1  Episode Reward:  -11
xxxxx
x.. x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[4718.3507099  3481.80752105 1627.45558563 2937.02145253]
------
Step:62, Action:North
State  256
Old Q Values:  [-1222.94287271     0.          2253.00678683   644.94785455]
New Q values:  [25869.19445322     0.          2253.00678683   644.94785455]
Reward: -1  Episode Reward:  -12
xxxxx
x.. x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:63, Action:East
State  176
Old Q Values:  [   0.         1327.79507613 6744.33835094    0.        ]
New Q values:  [   0.         1327.79507613 4039.65489346    0.        ]
Reward: -1  Episode Reward:  -13
xxxxx
x.. x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.69358809e+03 2.92290451e+03 4.47506518e+03]
------
Step:64, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.69358809e+03 2.92290451e+03 4.47506518e+03]
New Q values:  [3.89777037e-01 2.64425577e+03 2.92290451e+03 4.47506518e+03]
Reward: -1  Episode Reward:  -14
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   126.16637589  3891.40179718]
------
Step:65, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   126.16637589  3891.40179718]
New Q values:  [-2527.46239811 -6212.61234477   126.16637589  2971.46593184]
Reward: -1  Episode Reward:  -15
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[4718.3507099  3481.80752105 1627.45558563 2937.02145253]
------
Step:66, Action:North
State  260
Old Q Values:  [ 1468.48311406 -6457.4598       238.83279397 -7094.93143822]
New Q values:  [ 2040.58640904 -6457.4598       238.83279397 -7094.93143822]
Reward: -1  Episode Reward:  -16
xxxxx
xg. x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   987.74584625  4845.97721139     0.        ]
------
Step:67, Action:East
State  176
Old Q Values:  [   0.         1327.79507613 4039.65489346    0.        ]
New Q values:  [   0.         1327.79507613 2957.78151047    0.        ]
Reward: -1  Episode Reward:  -17
xxxxx
x.. x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.64425577e+03 2.92290451e+03 4.47506518e+03]
------
Step:68, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  2.92601219e+03  6.23150984e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  6.23150984e+03]
Reward: -1  Episode Reward:  -18
xxxxx
x.. x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 304.63919487  196.85813298 -180.6           3.52184257]
------
Step:69, Action:North
State  208
Old Q Values:  [-5764.71122952   103.77751155 -2651.70614553 -3385.12952694]
New Q values:  [ 6301.54992067   103.77751155 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  -19
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  1710.24475768  -180.00807518 28693.44804161]
------
Step:70, Action:West
State  130
Old Q Values:  [18220.41077038  1710.24475768  -180.00807518 28693.44804161]
New Q values:  [18220.41077038  1710.24475768  -180.00807518 28344.63455359]
Reward: 9  Episode Reward:  -10
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6        -8280.92007422   112.85376583 56206.18445649]
------
Step:71, Action:West
State  114
Old Q Values:  [ -180.6        -8280.92007422   112.85376583 56206.18445649]
New Q values:  [ -180.6        -8280.92007422   112.85376583 82571.71898528]
Reward: 100009  Episode Reward:  99999
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  106.38171172 -8656.02923281 -7525.7277781    101.01204889]
------
Step:1, Action:North
State  288
Old Q Values:  [  106.38171172 -8656.02923281 -7525.7277781    101.01204889]
New Q values:  [ 1938.41766089 -8656.02923281 -7525.7277781    101.01204889]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6301.54992067   103.77751155 -2651.70614553 -3385.12952694]
------
Step:2, Action:North
State  208
Old Q Values:  [ 6301.54992067   103.77751155 -2651.70614553 -3385.12952694]
New Q values:  [ 2582.99099825   103.77751155 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          189.90343327]
------
Step:3, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6          189.90343327]
New Q values:  [ -180.6        -3419.331151    -180.6          205.55603804]
Reward: 9  Episode Reward:  27
xxxxx
x a x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   348.166234     413.98221578]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   348.166234     413.98221578]
New Q values:  [ -281.736      -8877.87327254   348.166234     230.92830844]
Reward: -1  Episode Reward:  26
xxxxx
xa  x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573  219.78474043 -252.78192178]
------
Step:5, Action:East
State  105
Old Q Values:  [-180.6         162.4976908   -98.31015718    0.        ]
New Q values:  [ -180.6          162.4976908  -5851.25726525     0.        ]
Reward: -10001  Episode Reward:  -9975
xxxxx
x g x
x.. x
x.. x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   126.16637589  2971.46593184]
------
Step:1, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        1825.23222059  930.11279518]
New Q values:  [  16.82637525 -180.6        1825.23222059  989.62104078]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2040.58640904 -6457.4598       238.83279397 -7094.93143822]
------
Step:2, Action:East
State  261
Old Q Values:  [ 113.93771584 -289.59534477   79.24610848 -251.53897752]
New Q values:  [ 113.93771584 -289.59534477  578.66810957 -251.53897752]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1825.23222059  989.62104078]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   126.16637589  2971.46593184]
New Q values:  [-2527.46239811 -6212.61234477   637.39184862  2971.46593184]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1938.41766089 -8656.02923281 -7525.7277781    101.01204889]
------
Step:4, Action:North
State  288
Old Q Values:  [ 1938.41766089 -8656.02923281 -7525.7277781    101.01204889]
New Q values:  [ 1555.66436383 -8656.02923281 -7525.7277781    101.01204889]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2582.99099825   103.77751155 -2651.70614553 -3385.12952694]
------
Step:5, Action:North
State  208
Old Q Values:  [ 2582.99099825   103.77751155 -2651.70614553 -3385.12952694]
New Q values:  [ 1100.26321071   103.77751155 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  35
xxxxx
x .ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          205.55603804]
------
Step:6, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6          205.55603804]
New Q values:  [ -180.6        -3419.331151    -180.6          192.07228542]
Reward: 9  Episode Reward:  44
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   348.166234     230.92830844]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   348.166234     230.92830844]
New Q values:  [ -281.736      -8877.87327254   196.28817922   230.92830844]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          192.07228542]
------
Step:8, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6          192.07228542]
New Q values:  [ -180.6       -3419.331151   -180.6         145.5074067]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   196.28817922   230.92830844]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   196.28817922   230.92830844]
New Q values:  [ -281.736      -8877.87327254   196.28817922   157.7067455 ]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573  219.78474043 -252.78192178]
------
Step:10, Action:East
State  106
Old Q Values:  [-1.80600000e+02 -6.00060000e+03  3.69944132e+00 -1.80600000e+02]
New Q values:  [ -180.6       -6000.6          59.7662303  -180.6      ]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   196.28817922   157.7067455 ]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   196.28817922   157.7067455 ]
New Q values:  [ -281.736      -8877.87327254   121.5674937    157.7067455 ]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6       -3419.331151   -180.6         145.5074067]
------
Step:12, Action:West
State  138
Old Q Values:  [ -180.6       -3419.331151   -180.6         145.5074067]
New Q values:  [ -180.6        -3419.331151    -180.6           99.85383249]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x.. x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   140.83623271    24.38756513]
------
Step:13, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   121.5674937    157.7067455 ]
New Q values:  [ -281.736      -8877.87327254    77.98314723   157.7067455 ]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6           99.85383249]
------
Step:14, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6           99.85383249]
New Q values:  [ -180.6        -3419.331151    -180.6           81.59240281]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x.. x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   140.83623271    24.38756513]
------
Step:15, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   140.83623271    24.38756513]
New Q values:  [ -253.44886264 -1902.20915811    80.21221393    24.38756513]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6           81.59240281]
------
Step:16, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6           81.59240281]
New Q values:  [ -180.6        -3419.331151    -180.6           79.34898478]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    77.98314723   157.7067455 ]
------
Step:17, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    77.98314723   157.7067455 ]
New Q values:  [ -281.736      -8877.87327254    77.98314723   128.41812033]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573  219.78474043 -252.78192178]
------
Step:18, Action:East
State  106
Old Q Values:  [ -180.6       -6000.6          59.7662303  -180.6      ]
New Q values:  [ -180.6        -6000.6           61.83192822  -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    77.98314723   128.41812033]
------
Step:19, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    77.98314723   128.41812033]
New Q values:  [ -281.736      -8877.87327254    77.98314723   116.70267026]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573  219.78474043 -252.78192178]
------
Step:20, Action:East
State  106
Old Q Values:  [ -180.6        -6000.6           61.83192822  -180.6       ]
New Q values:  [ -180.6        -6000.6           59.14357236  -180.6       ]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    77.98314723   116.70267026]
------
Step:21, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    77.98314723   116.70267026]
New Q values:  [ -281.736      -8877.87327254    77.98314723   143.36253445]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -5648.89406634   324.2715545   -180.6       ]
------
Step:22, Action:East
State  104
Old Q Values:  [-8.65284000e+03  4.32678599e-01  1.04563937e+03 -8.65284000e+03]
New Q values:  [-8.65284000e+03  4.32678599e-01  6.06322547e+02 -8.65284000e+03]
Reward: -1  Episode Reward:  28
xxxxx
xga x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         628.88932539  -5587.95006911]
------
Step:23, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254    77.98314723   143.36253445]
New Q values:  [ -281.736      -8877.87327254    54.39795432   143.36253445]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6           79.34898478]
------
Step:24, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6           79.34898478]
New Q values:  [ -180.6        -3419.331151    -180.6           74.14835425]
Reward: -1  Episode Reward:  26
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    54.39795432   143.36253445]
------
Step:25, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686         628.88932539  -5587.95006911]
New Q values:  [-10156.11771313  -5995.686         628.88932539  -2187.0307204 ]
Reward: -1  Episode Reward:  25
xxxxx
xag x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6          162.4976908  -5851.25726525     0.        ]
------
Step:26, Action:South
State  109
Old Q Values:  [ -241.10880094   536.32915387 -8213.80649336  -180.6       ]
New Q values:  [ -241.10880094   262.31631391 -8213.80649336  -180.6       ]
Reward: 9  Episode Reward:  34
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   141.28217454 -6285.8692866   -180.6       ]
------
Step:27, Action:South
State  181
Old Q Values:  [   13.85659648   141.28217454 -6285.8692866   -180.6       ]
New Q values:  [   13.85659648   229.51330269 -6285.8692866   -180.6       ]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477  578.66810957 -251.53897752]
------
Step:28, Action:East
State  260
Old Q Values:  [ 2040.58640904 -6457.4598       238.83279397 -7094.93143822]
New Q values:  [ 2040.58640904 -6457.4598       986.37289714 -7094.93143822]
Reward: -1  Episode Reward:  32
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   637.39184862  2971.46593184]
------
Step:29, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   637.39184862  2971.46593184]
New Q values:  [-2527.46239811 -6212.61234477   637.39184862  1800.16229545]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2040.58640904 -6457.4598       986.37289714 -7094.93143822]
------
Step:30, Action:East
State  260
Old Q Values:  [ 2040.58640904 -6457.4598       986.37289714 -7094.93143822]
New Q values:  [ 2040.58640904 -6457.4598       933.99784749 -7094.93143822]
Reward: -1  Episode Reward:  30
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   637.39184862  1800.16229545]
------
Step:31, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   637.39184862  1800.16229545]
New Q values:  [-2527.46239811 -6212.61234477   637.39184862  1331.64084089]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2040.58640904 -6457.4598       933.99784749 -7094.93143822]
------
Step:32, Action:East
State  257
Old Q Values:  [4718.3507099  3481.80752105 1627.45558563 2937.02145253]
New Q values:  [4718.3507099  3481.80752105 1049.87448652 2937.02145253]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   637.39184862  1331.64084089]
------
Step:33, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   637.39184862  1331.64084089]
New Q values:  [-2527.46239811 -6212.61234477   637.39184862   705.65676923]
Reward: -1  Episode Reward:  27
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477  578.66810957 -251.53897752]
------
Step:34, Action:East
State  261
Old Q Values:  [ 113.93771584 -289.59534477  578.66810957 -251.53897752]
New Q values:  [ 113.93771584 -289.59534477 2710.77369936 -251.53897752]
Reward: -1  Episode Reward:  26
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 8266.35485179]
------
Step:35, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   637.39184862   705.65676923]
New Q values:  [-2527.46239811 -6212.61234477   637.39184862  1094.8948175 ]
Reward: -1  Episode Reward:  25
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477 2710.77369936 -251.53897752]
------
Step:36, Action:East
State  261
Old Q Values:  [ 113.93771584 -289.59534477 2710.77369936 -251.53897752]
New Q values:  [ 113.93771584 -289.59534477 3563.61593528 -251.53897752]
Reward: -1  Episode Reward:  24
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -452.95359414 8266.35485179]
------
Step:37, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414 8266.35485179]
New Q values:  [  37.74111519 -168.92307549 -452.95359414 4375.0267213 ]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477 3563.61593528 -251.53897752]
------
Step:38, Action:East
State  257
Old Q Values:  [4718.3507099  3481.80752105 1049.87448652 2937.02145253]
New Q values:  [4718.3507099  3481.80752105  747.81823986 2937.02145253]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   637.39184862  1094.8948175 ]
------
Step:39, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   637.39184862  1094.8948175 ]
New Q values:  [-2527.46239811 -6212.61234477   637.39184862  1506.44270758]
Reward: -1  Episode Reward:  21
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477 3563.61593528 -251.53897752]
------
Step:40, Action:East
State  257
Old Q Values:  [4718.3507099  3481.80752105  747.81823986 2937.02145253]
New Q values:  [4718.3507099  3481.80752105  750.46010822 2937.02145253]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   637.39184862  1506.44270758]
------
Step:41, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   637.39184862  1506.44270758]
New Q values:  [-2527.46239811 -6212.61234477   637.39184862  1214.15300575]
Reward: -1  Episode Reward:  19
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2040.58640904 -6457.4598       933.99784749 -7094.93143822]
------
Step:42, Action:East
State  260
Old Q Values:  [ 2040.58640904 -6457.4598       933.99784749 -7094.93143822]
New Q values:  [ 2040.58640904 -6457.4598       737.24504072 -7094.93143822]
Reward: -1  Episode Reward:  18
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   637.39184862  1214.15300575]
------
Step:43, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   637.39184862  1214.15300575]
New Q values:  [-2527.46239811 -6212.61234477   721.0560486   1214.15300575]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1555.66436383 -8656.02923281 -7525.7277781    101.01204889]
------
Step:44, Action:North
State  288
Old Q Values:  [ 1555.66436383 -8656.02923281 -7525.7277781    101.01204889]
New Q values:  [  713.05750399 -8656.02923281 -7525.7277781    101.01204889]
Reward: -1  Episode Reward:  16
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 304.63919487  196.85813298 -180.6           3.52184257]
------
Step:45, Action:North
State  208
Old Q Values:  [ 1100.26321071   103.77751155 -2651.70614553 -3385.12952694]
New Q values:  [  461.74979056   103.77751155 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6           74.14835425]
------
Step:46, Action:West
State  136
Old Q Values:  [ -724.71310357   321.40916976 -6245.61866138   201.73793064]
New Q values:  [ -724.71310357   321.40916976 -6245.61866138   268.76196987]
Reward: -1  Episode Reward:  14
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         628.88932539  -2187.0307204 ]
------
Step:47, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686         628.88932539  -2187.0307204 ]
New Q values:  [-10156.11771313  -5995.686         347.37848108  -2187.0307204 ]
Reward: -1  Episode Reward:  13
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   321.40916976 -6245.61866138   268.76196987]
------
Step:48, Action:South
State  136
Old Q Values:  [ -724.71310357   321.40916976 -6245.61866138   268.76196987]
New Q values:  [ -724.71310357   266.48860507 -6245.61866138   268.76196987]
Reward: -1  Episode Reward:  12
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  461.74979056   103.77751155 -2651.70614553 -3385.12952694]
------
Step:49, Action:North
State  208
Old Q Values:  [  461.74979056   103.77751155 -2651.70614553 -3385.12952694]
New Q values:  [  206.3444225    103.77751155 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  11
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6           74.14835425]
------
Step:50, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6           74.14835425]
New Q values:  [ -180.6        -3419.331151    -180.6           72.06810203]
Reward: -1  Episode Reward:  10
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    54.39795432   143.36253445]
------
Step:51, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    54.39795432   143.36253445]
New Q values:  [ -281.736      -8877.87327254    54.39795432   154.02648013]
Reward: -1  Episode Reward:  9
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -5648.89406634   324.2715545   -180.6       ]
------
Step:52, Action:East
State  99
Old Q Values:  [    0.          8192.70092842 28200.98512991     0.        ]
New Q values:  [    0.          8192.70092842 36051.30974755     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6        -8280.92007422   112.85376583 82571.71898528]
------
Step:53, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    80.21221393    24.38756513]
New Q values:  [ -253.44886264 -1902.20915811    80.21221393   573.61798999]
Reward: -1  Episode Reward:  7
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1881.54321313  862.42400667 -272.09726687]
------
Step:54, Action:South
State  99
Old Q Values:  [    0.          8192.70092842 36051.30974755     0.        ]
New Q values:  [    0.          5102.60835331 36051.30974755     0.        ]
Reward: -1  Episode Reward:  6
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         6087.09327314 5162.98667077    0.        ]
------
Step:55, Action:South
State  183
Old Q Values:  [  22.25138791  506.19063565 5672.512311      0.        ]
New Q values:  [  22.25138791 1270.96103485 5672.512311      0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 113.93771584 -289.59534477 3563.61593528 -251.53897752]
------
Step:56, Action:North
State  261
Old Q Values:  [ 113.93771584 -289.59534477 3563.61593528 -251.53897752]
New Q values:  [1746.72877963 -289.59534477 3563.61593528 -251.53897752]
Reward: -1  Episode Reward:  4
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791 1270.96103485 5672.512311      0.        ]
------
Step:57, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 67977.7456036      0.        ]
New Q values:  [    0.          4614.46100011 89065.95119467     0.        ]
Reward: 100009  Episode Reward:  100013
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   229.51330269 -6285.8692866   -180.6       ]
------
Step:1, Action:South
State  181
Old Q Values:  [   13.85659648   229.51330269 -6285.8692866   -180.6       ]
New Q values:  [   13.85659648  1166.29010166 -6285.8692866   -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1746.72877963 -289.59534477 3563.61593528 -251.53897752]
------
Step:2, Action:East
State  261
Old Q Values:  [1746.72877963 -289.59534477 3563.61593528 -251.53897752]
New Q values:  [1746.72877963 -289.59534477 1795.09227584 -251.53897752]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   721.0560486   1214.15300575]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   721.0560486   1214.15300575]
New Q values:  [-2527.46239811 -6212.61234477   721.0560486   1097.23712501]
Reward: -1  Episode Reward:  17
xxxxx
xg. x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2040.58640904 -6457.4598       737.24504072 -7094.93143822]
------
Step:4, Action:North
State  260
Old Q Values:  [ 2040.58640904 -6457.4598       737.24504072 -7094.93143822]
New Q values:  [-3730.57227297 -6457.4598       737.24504072 -7094.93143822]
Reward: -10001  Episode Reward:  -9984
xxxxx
x.. x
xg..x
x  .x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    54.39795432   154.02648013]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    54.39795432   154.02648013]
New Q values:  [ -281.736      -8877.87327254    54.39795432   164.2920584 ]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -5648.89406634   324.2715545   -180.6       ]
------
Step:2, Action:East
State  108
Old Q Values:  [-8463.16477134   608.44407335   669.94256777     0.        ]
New Q values:  [-8463.16477134   608.44407335   371.59057143     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xga.x
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         347.37848108  -2187.0307204 ]
------
Step:3, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686         347.37848108  -2187.0307204 ]
New Q values:  [-10156.11771313  -5995.686         224.9799834   -2187.0307204 ]
Reward: 9  Episode Reward:  17
xxxxx
x gax
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   266.48860507 -6245.61866138   268.76196987]
------
Step:4, Action:South
State  136
Old Q Values:  [ -724.71310357   266.48860507 -6245.61866138   268.76196987]
New Q values:  [ -724.71310357   762.50544392 -6245.61866138   268.76196987]
Reward: 9  Episode Reward:  26
xxxxx
x  gx
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  294.80374576  2168.36667296 -6170.35693855 -2387.54492731]
------
Step:5, Action:South
State  216
Old Q Values:  [  294.80374576  2168.36667296 -6170.35693855 -2387.54492731]
New Q values:  [  294.80374576  1086.66392038 -6170.35693855 -2387.54492731]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x. gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  713.05750399 -8656.02923281 -7525.7277781    101.01204889]
------
Step:6, Action:West
State  288
Old Q Values:  [  713.05750399 -8656.02923281 -7525.7277781    101.01204889]
New Q values:  [  713.05750399 -8656.02923281 -7525.7277781    374.97595706]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   721.0560486   1097.23712501]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   721.0560486   1097.23712501]
New Q values:  [-2527.46239811 -6212.61234477   721.0560486   1859.80006298]
Reward: 9  Episode Reward:  53
xxxxx
x   x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[4718.3507099  3481.80752105  750.46010822 2937.02145253]
------
Step:8, Action:North
State  257
Old Q Values:  [4718.3507099  3481.80752105  750.46010822 2937.02145253]
New Q values:  [63718.8682659   3481.80752105   750.46010822  2937.02145253]
Reward: 100009  Episode Reward:  100062
xxxxx
x   x
xag x
x   x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816 10063.42899081]
------
Step:1, Action:West
State  200
Old Q Values:  [ 6.28218634e+01  4.04386150e+03  7.15611367e+02 -8.40000000e-01]
New Q values:  [  62.8218634  4043.86149542  715.61136736  181.20343395]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   587.13144651 -4680.74267672  -244.98066897]
------
Step:2, Action:South
State  180
Old Q Values:  [-1367.02476015   987.74584625  4845.97721139     0.        ]
New Q values:  [-1367.02476015   621.67185072  4845.97721139     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3730.57227297 -6457.4598       737.24504072 -7094.93143822]
------
Step:3, Action:East
State  260
Old Q Values:  [-3730.57227297 -6457.4598       737.24504072 -7094.93143822]
New Q values:  [-3730.57227297 -6457.4598       858.23803518 -7094.93143822]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   721.0560486   1859.80006298]
------
Step:4, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        1825.23222059  989.62104078]
New Q values:  [  16.82637525 -180.6        1825.23222059  652.71982687]
Reward: -1  Episode Reward:  26
xxxxx
xg. x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3730.57227297 -6457.4598       858.23803518 -7094.93143822]
------
Step:5, Action:East
State  260
Old Q Values:  [-3730.57227297 -6457.4598       858.23803518 -7094.93143822]
New Q values:  [-3730.57227297 -6457.4598       900.63523297 -7094.93143822]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   721.0560486   1859.80006298]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   721.0560486   1859.80006298]
New Q values:  [-2527.46239811 -6212.61234477   721.0560486  -4986.48940492]
Reward: -10001  Episode Reward:  -9976
xxxxx
x.. x
x  .x
xg .x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9726.26759903    56.10465519]
------
Step:1, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686         224.9799834   -2187.0307204 ]
New Q values:  [-10156.11771313  -5995.686         224.9799834    -790.71739399]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   262.31631391 -8213.80649336  -180.6       ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 1881.54321313  862.42400667 -272.09726687]
New Q values:  [-177.44732869  934.15671921  862.42400667 -272.09726687]
Reward: 9  Episode Reward:  18
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   587.13144651 -4680.74267672  -244.98066897]
------
Step:3, Action:South
State  180
Old Q Values:  [-1367.02476015   621.67185072  4845.97721139     0.        ]
New Q values:  [-1367.02476015   524.25931018  4845.97721139     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3730.57227297 -6457.4598       900.63523297 -7094.93143822]
------
Step:4, Action:East
State  261
Old Q Values:  [1746.72877963 -289.59534477 1795.09227584 -251.53897752]
New Q values:  [1746.72877963 -289.59534477 1271.00657651 -251.53897752]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1825.23222059  652.71982687]
------
Step:5, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 -452.95359414 4375.0267213 ]
New Q values:  [  37.74111519 -168.92307549   38.13581354 4375.0267213 ]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  713.05750399 -8656.02923281 -7525.7277781    374.97595706]
------
Step:6, Action:West
State  288
Old Q Values:  [  713.05750399 -8656.02923281 -7525.7277781    374.97595706]
New Q values:  [  713.05750399 -8656.02923281 -7525.7277781   1461.89839921]
Reward: -1  Episode Reward:  44
xxxxx
x  gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549   38.13581354 4375.0267213 ]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   721.0560486  -4986.48940492]
New Q values:  [-2527.46239811 -6212.61234477   721.0560486  -1471.17712808]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1746.72877963 -289.59534477 1271.00657651 -251.53897752]
------
Step:8, Action:North
State  260
Old Q Values:  [-3730.57227297 -6457.4598       900.63523297 -7094.93143822]
New Q values:  [  -39.03574577 -6457.4598       900.63523297 -7094.93143822]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   524.25931018  4845.97721139     0.        ]
------
Step:9, Action:East
State  181
Old Q Values:  [   13.85659648  1166.29010166 -6285.8692866   -180.6       ]
New Q values:  [   13.85659648  1166.29010166 -1332.41716586  -180.6       ]
Reward: 9  Episode Reward:  51
xxxxx
x g x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  3921.76849592    73.71095389     0.        ]
------
Step:10, Action:South
State  197
Old Q Values:  [1.32443385e-01 1.27418599e+03 0.00000000e+00 0.00000000e+00]
New Q values:  [1.32443385e-01 6.25622954e+02 0.00000000e+00 0.00000000e+00]
Reward: -1  Episode Reward:  50
xxxxx
x  gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         388.49519889   0.        ]
------
Step:11, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        1825.23222059  652.71982687]
New Q values:  [  16.82637525 -180.6        1168.062408    652.71982687]
Reward: -1  Episode Reward:  49
xxxxx
x g x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  713.05750399 -8656.02923281 -7525.7277781   1461.89839921]
------
Step:12, Action:West
State  288
Old Q Values:  [  713.05750399 -8656.02923281 -7525.7277781   1461.89839921]
New Q values:  [  713.05750399 -8656.02923281 -7525.7277781    934.57808208]
Reward: -1  Episode Reward:  48
xxxxx
xg  x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1168.062408    652.71982687]
------
Step:13, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        1168.062408    652.71982687]
New Q values:  [  16.82637525 -180.6         746.99838782  652.71982687]
Reward: -1  Episode Reward:  47
xxxxx
x g x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  713.05750399 -8656.02923281 -7525.7277781    934.57808208]
------
Step:14, Action:West
State  288
Old Q Values:  [  713.05750399 -8656.02923281 -7525.7277781    934.57808208]
New Q values:  [  713.05750399 -8656.02923281 -7525.7277781    597.33074918]
Reward: -1  Episode Reward:  46
xxxxx
xg  x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6         746.99838782  652.71982687]
------
Step:15, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6         746.99838782  652.71982687]
New Q values:  [  16.82637525 -180.6         512.11660633  652.71982687]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  713.05750399 -8656.02923281 -7525.7277781    597.33074918]
------
Step:16, Action:North
State  288
Old Q Values:  [  713.05750399 -8656.02923281 -7525.7277781    597.33074918]
New Q values:  [60382.01476006 -8656.02923281 -7525.7277781    597.33074918]
Reward: 100009  Episode Reward:  100054
xxxxx
x   x
x  ax
xg  x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  206.3444225    103.77751155 -2651.70614553 -3385.12952694]
------
Step:1, Action:North
State  208
Old Q Values:  [  206.3444225    103.77751155 -2651.70614553 -3385.12952694]
New Q values:  [-5683.31059783   103.77751155 -2651.70614553 -3385.12952694]
Reward: -9991  Episode Reward:  -9991
xxxxx
x. gx
x.. x
x...x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  -39.03574577 -6457.4598       900.63523297 -7094.93143822]
------
Step:1, Action:East
State  260
Old Q Values:  [  -39.03574577 -6457.4598       900.63523297 -7094.93143822]
New Q values:  [  -39.03574577 -6457.4598       561.47004125 -7094.93143822]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6         512.11660633  652.71982687]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   721.0560486  -1471.17712808]
New Q values:  [-2527.46239811 -6212.61234477   721.0560486   -420.62983886]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  -39.03574577 -6457.4598       561.47004125 -7094.93143822]
------
Step:3, Action:East
State  260
Old Q Values:  [  -39.03574577 -6457.4598       561.47004125 -7094.93143822]
New Q values:  [  -39.03574577 -6457.4598       419.80396456 -7094.93143822]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6         512.11660633  652.71982687]
------
Step:4, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6         512.11660633  652.71982687]
New Q values:  [  16.82637525 -180.6         512.11660633  784.50656464]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1746.72877963 -289.59534477 1271.00657651 -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [1746.72877963 -289.59534477 1271.00657651 -251.53897752]
New Q values:  [1053.97854235 -289.59534477 1271.00657651 -251.53897752]
Reward: 9  Episode Reward:  15
xxxxx
x ..x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648  1166.29010166 -1332.41716586  -180.6       ]
------
Step:6, Action:South
State  183
Old Q Values:  [  22.25138791 1270.96103485 5672.512311      0.        ]
New Q values:  [  22.25138791  889.08638689 5672.512311      0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x ..x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1053.97854235 -289.59534477 1271.00657651 -251.53897752]
------
Step:7, Action:East
State  261
Old Q Values:  [1053.97854235 -289.59534477 1271.00657651 -251.53897752]
New Q values:  [ 1053.97854235  -289.59534477 -5256.8454      -251.53897752]
Reward: -10001  Episode Reward:  -9987
xxxxx
x ..x
x ..x
x g.x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   721.0560486   -420.62983886]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   721.0560486   -420.62983886]
New Q values:  [-2527.46239811 -6212.61234477 18408.42684746  -420.62983886]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x...x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[60382.01476006 -8656.02923281 -7525.7277781    597.33074918]
------
Step:2, Action:North
State  288
Old Q Values:  [60382.01476006 -8656.02923281 -7525.7277781    597.33074918]
New Q values:  [24189.33915749 -8656.02923281 -7525.7277781    597.33074918]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5683.31059783   103.77751155 -2651.70614553 -3385.12952694]
------
Step:3, Action:South
State  208
Old Q Values:  [-5683.31059783   103.77751155 -2651.70614553 -3385.12952694]
New Q values:  [-5683.31059783  7297.71275186 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  17
xxxxx
xg .x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[24189.33915749 -8656.02923281 -7525.7277781    597.33074918]
------
Step:4, Action:North
State  288
Old Q Values:  [24189.33915749 -8656.02923281 -7525.7277781    597.33074918]
New Q values:  [11864.44948855 -8656.02923281 -7525.7277781    597.33074918]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5683.31059783  7297.71275186 -2651.70614553 -3385.12952694]
------
Step:5, Action:South
State  208
Old Q Values:  [-5683.31059783  7297.71275186 -2651.70614553 -3385.12952694]
New Q values:  [-5683.31059783  6477.81994731 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  15
xxxxx
xg .x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11864.44948855 -8656.02923281 -7525.7277781    597.33074918]
------
Step:6, Action:North
State  288
Old Q Values:  [11864.44948855 -8656.02923281 -7525.7277781    597.33074918]
New Q values:  [ 6688.52577962 -8656.02923281 -7525.7277781    597.33074918]
Reward: -1  Episode Reward:  14
xxxxx
x. .x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5683.31059783  6477.81994731 -2651.70614553 -3385.12952694]
------
Step:7, Action:South
State  210
Old Q Values:  [ 304.63919487  196.85813298 -180.6           3.52184257]
New Q values:  [ 304.63919487 2084.70098708 -180.6           3.52184257]
Reward: -1  Episode Reward:  13
xxxxx
x. .x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6688.52577962 -8656.02923281 -7525.7277781    597.33074918]
------
Step:8, Action:North
State  288
Old Q Values:  [ 6688.52577962 -8656.02923281 -7525.7277781    597.33074918]
New Q values:  [ 4618.15629604 -8656.02923281 -7525.7277781    597.33074918]
Reward: -1  Episode Reward:  12
xxxxx
x. .x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5683.31059783  6477.81994731 -2651.70614553 -3385.12952694]
------
Step:9, Action:South
State  208
Old Q Values:  [-5683.31059783  6477.81994731 -2651.70614553 -3385.12952694]
New Q values:  [-5683.31059783  3975.97486774 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  11
xxxxx
x. .x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4618.15629604 -8656.02923281 -7525.7277781    597.33074918]
------
Step:10, Action:North
State  288
Old Q Values:  [ 4618.15629604 -8656.02923281 -7525.7277781    597.33074918]
New Q values:  [ 3039.45497874 -8656.02923281 -7525.7277781    597.33074918]
Reward: -1  Episode Reward:  10
xxxxx
x. .x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5683.31059783  3975.97486774 -2651.70614553 -3385.12952694]
------
Step:11, Action:South
State  210
Old Q Values:  [ 304.63919487 2084.70098708 -180.6           3.52184257]
New Q values:  [ 304.63919487 1745.11688845 -180.6           3.52184257]
Reward: -1  Episode Reward:  9
xxxxx
x. .x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3039.45497874 -8656.02923281 -7525.7277781    597.33074918]
------
Step:12, Action:North
State  288
Old Q Values:  [ 3039.45497874 -8656.02923281 -7525.7277781    597.33074918]
New Q values:  [ 1738.71705803 -8656.02923281 -7525.7277781    597.33074918]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 304.63919487 1745.11688845 -180.6           3.52184257]
------
Step:13, Action:South
State  208
Old Q Values:  [-5683.31059783  3975.97486774 -2651.70614553 -3385.12952694]
New Q values:  [-5683.31059783  2111.4050645  -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  7
xxxxx
x. .x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1738.71705803 -8656.02923281 -7525.7277781    597.33074918]
------
Step:14, Action:North
State  288
Old Q Values:  [ 1738.71705803 -8656.02923281 -7525.7277781    597.33074918]
New Q values:  [ 1328.30834256 -8656.02923281 -7525.7277781    597.33074918]
Reward: -1  Episode Reward:  6
xxxxx
x.g.x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5683.31059783  2111.4050645  -2651.70614553 -3385.12952694]
------
Step:15, Action:South
State  208
Old Q Values:  [-5683.31059783  2111.4050645  -2651.70614553 -3385.12952694]
New Q values:  [-5683.31059783  1242.45452857 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  5
xxxxx
xg .x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1328.30834256 -8656.02923281 -7525.7277781    597.33074918]
------
Step:16, Action:North
State  288
Old Q Values:  [ 1328.30834256 -8656.02923281 -7525.7277781    597.33074918]
New Q values:  [  903.4596956  -8656.02923281 -7525.7277781    597.33074918]
Reward: -1  Episode Reward:  4
xxxxx
x.g.x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5683.31059783  1242.45452857 -2651.70614553 -3385.12952694]
------
Step:17, Action:South
State  208
Old Q Values:  [-5683.31059783  1242.45452857 -2651.70614553 -3385.12952694]
New Q values:  [-5683.31059783   767.41972011 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  3
xxxxx
x. .x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  903.4596956  -8656.02923281 -7525.7277781    597.33074918]
------
Step:18, Action:North
State  288
Old Q Values:  [  903.4596956  -8656.02923281 -7525.7277781    597.33074918]
New Q values:  [  591.00979427 -8656.02923281 -7525.7277781    597.33074918]
Reward: -1  Episode Reward:  2
xxxxx
x.g.x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5683.31059783   767.41972011 -2651.70614553 -3385.12952694]
------
Step:19, Action:South
State  208
Old Q Values:  [-5683.31059783   767.41972011 -2651.70614553 -3385.12952694]
New Q values:  [-5683.31059783   485.5671128  -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  1
xxxxx
x. gx
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  591.00979427 -8656.02923281 -7525.7277781    597.33074918]
------
Step:20, Action:West
State  288
Old Q Values:  [  591.00979427 -8656.02923281 -7525.7277781    597.33074918]
New Q values:  [  591.00979427 -8656.02923281 -7525.7277781   1550.84031606]
Reward: -1  Episode Reward:  0
xxxxx
x. gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549   38.13581354 4375.0267213 ]
------
Step:21, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549   38.13581354 4375.0267213 ]
New Q values:  [   37.74111519  -168.92307549    38.13581354 20871.07116829]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[63718.8682659   3481.80752105   750.46010822  2937.02145253]
------
Step:22, Action:North
State  257
Old Q Values:  [63718.8682659   3481.80752105   750.46010822  2937.02145253]
New Q values:  [27319.0752883   3481.80752105   750.46010822  2937.02145253]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         6087.09327314 5162.98667077    0.        ]
------
Step:23, Action:South
State  183
Old Q Values:  [  22.25138791  889.08638689 5672.512311      0.        ]
New Q values:  [  22.25138791  671.22811746 5672.512311      0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1053.97854235  -289.59534477 -5256.8454      -251.53897752]
------
Step:24, Action:North
State  257
Old Q Values:  [27319.0752883   3481.80752105   750.46010822  2937.02145253]
New Q values:  [12753.15809726  3481.80752105   750.46010822  2937.02145253]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         6087.09327314 5162.98667077    0.        ]
------
Step:25, Action:South
State  180
Old Q Values:  [-1367.02476015   524.25931018  4845.97721139     0.        ]
New Q values:  [-1367.02476015   335.04491344  4845.97721139     0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x. .x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  -39.03574577 -6457.4598       419.80396456 -7094.93143822]
------
Step:26, Action:East
State  260
Old Q Values:  [  -39.03574577 -6457.4598       419.80396456 -7094.93143822]
New Q values:  [  -39.03574577 -6457.4598      5689.84964006 -7094.93143822]
Reward: -1  Episode Reward:  14
xxxxx
xg .x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477 18408.42684746  -420.62983886]
------
Step:27, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477 18408.42684746  -420.62983886]
New Q values:  [-2527.46239811 -6212.61234477  7828.0228338   -420.62983886]
Reward: -1  Episode Reward:  13
xxxxx
x.g.x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  591.00979427 -8656.02923281 -7525.7277781   1550.84031606]
------
Step:28, Action:West
State  288
Old Q Values:  [  591.00979427 -8656.02923281 -7525.7277781   1550.84031606]
New Q values:  [  591.00979427 -8656.02923281 -7525.7277781   6881.05747691]
Reward: -1  Episode Reward:  12
xxxxx
x. gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549    38.13581354 20871.07116829]
------
Step:29, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549    38.13581354 20871.07116829]
New Q values:  [  37.74111519 -168.92307549   38.13581354 8664.02203002]
Reward: -1  Episode Reward:  11
xxxxx
x. .x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1053.97854235  -289.59534477 -5256.8454      -251.53897752]
------
Step:30, Action:North
State  261
Old Q Values:  [ 1053.97854235  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [ 2122.74511024  -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  10
xxxxx
x. .x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  671.22811746 5672.512311      0.        ]
------
Step:31, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 89065.95119467     0.        ]
New Q values:  [    0.          4614.46100011 37501.2334311      0.        ]
Reward: 9  Episode Reward:  19
xxxxx
x. .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  6.23150984e+03]
------
Step:32, Action:West
State  192
Old Q Values:  [3.89777037e-01 2.64425577e+03 2.92290451e+03 4.47506518e+03]
New Q values:  [3.89777037e-01 2.64425577e+03 2.92290451e+03 3.61555405e+03]
Reward: -1  Episode Reward:  18
xxxxx
x. .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         6087.09327314 5162.98667077    0.        ]
------
Step:33, Action:South
State  177
Old Q Values:  [   0.         6087.09327314 5162.98667077    0.        ]
New Q values:  [   0.         6260.18473843 5162.98667077    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[12753.15809726  3481.80752105   750.46010822  2937.02145253]
------
Step:34, Action:North
State  260
Old Q Values:  [  -39.03574577 -6457.4598      5689.84964006 -7094.93143822]
New Q values:  [ 1437.57886511 -6457.4598      5689.84964006 -7094.93143822]
Reward: -1  Episode Reward:  16
xxxxx
xg .x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   335.04491344  4845.97721139     0.        ]
------
Step:35, Action:East
State  176
Old Q Values:  [   0.         1327.79507613 2957.78151047    0.        ]
New Q values:  [   0.         1327.79507613 2267.17882       0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x. .x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.64425577e+03 2.92290451e+03 3.61555405e+03]
------
Step:36, Action:East
State  196
Old Q Values:  [-2469.90645144  3921.76849592    73.71095389     0.        ]
New Q values:  [-2469.90645144  3921.76849592   174.55451539     0.        ]
Reward: -1  Episode Reward:  14
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5683.31059783   485.5671128  -2651.70614553 -3385.12952694]
------
Step:37, Action:South
State  208
Old Q Values:  [-5683.31059783   485.5671128  -2651.70614553 -3385.12952694]
New Q values:  [-5683.31059783  2257.94408819 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  13
xxxxx
x. .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  591.00979427 -8656.02923281 -7525.7277781   6881.05747691]
------
Step:38, Action:West
State  288
Old Q Values:  [  591.00979427 -8656.02923281 -7525.7277781   6881.05747691]
New Q values:  [  591.00979427 -8656.02923281 -7525.7277781   5100.22984091]
Reward: -1  Episode Reward:  12
xxxxx
x. .x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  7828.0228338   -420.62983886]
------
Step:39, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  7828.0228338   -420.62983886]
New Q values:  [-2527.46239811 -6212.61234477  4660.67808579  -420.62983886]
Reward: -1  Episode Reward:  11
xxxxx
x. .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  591.00979427 -8656.02923281 -7525.7277781   5100.22984091]
------
Step:40, Action:North
State  288
Old Q Values:  [  591.00979427 -8656.02923281 -7525.7277781   5100.22984091]
New Q values:  [  759.33898424 -8656.02923281 -7525.7277781   5100.22984091]
Reward: -1  Episode Reward:  10
xxxxx
x. .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 304.63919487 1745.11688845 -180.6           3.52184257]
------
Step:41, Action:North
State  208
Old Q Values:  [-5683.31059783  2257.94408819 -2651.70614553 -3385.12952694]
New Q values:  [ 6235.46612695  2257.94408819 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  19
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  1710.24475768  -180.00807518 28344.63455359]
------
Step:42, Action:West
State  130
Old Q Values:  [18220.41077038  1710.24475768  -180.00807518 28344.63455359]
New Q values:  [18220.41077038  1710.24475768  -180.00807518 36108.76951702]
Reward: -1  Episode Reward:  18
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6        -8280.92007422   112.85376583 82571.71898528]
------
Step:43, Action:West
State  114
Old Q Values:  [ -180.6        -8280.92007422   112.85376583 82571.71898528]
New Q values:  [  -180.6         -8280.92007422    112.85376583 103849.48051838]
Reward: 100009  Episode Reward:  100027
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  759.33898424 -8656.02923281 -7525.7277781   5100.22984091]
------
Step:1, Action:West
State  288
Old Q Values:  [  759.33898424 -8656.02923281 -7525.7277781   5100.22984091]
New Q values:  [  759.33898424 -8656.02923281 -7525.7277781   3443.6953621 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  4660.67808579  -420.62983886]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  4660.67808579  -420.62983886]
New Q values:  [-2527.46239811 -6212.61234477  2896.77984295  -420.62983886]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  759.33898424 -8656.02923281 -7525.7277781   3443.6953621 ]
------
Step:3, Action:West
State  288
Old Q Values:  [  759.33898424 -8656.02923281 -7525.7277781   3443.6953621 ]
New Q values:  [  759.33898424 -8656.02923281 -7525.7277781   2245.91209772]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  2896.77984295  -420.62983886]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  2896.77984295  -420.62983886]
New Q values:  [-2527.46239811 -6212.61234477  1831.8855665   -420.62983886]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  759.33898424 -8656.02923281 -7525.7277781   2245.91209772]
------
Step:5, Action:West
State  288
Old Q Values:  [  759.33898424 -8656.02923281 -7525.7277781   2245.91209772]
New Q values:  [  759.33898424 -8656.02923281 -7525.7277781   1447.33050904]
Reward: -1  Episode Reward:  5
xxxxx
x.g.x
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1831.8855665   -420.62983886]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1831.8855665   -420.62983886]
New Q values:  [-2527.46239811 -6212.61234477  1166.35337931  -420.62983886]
Reward: -1  Episode Reward:  4
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  759.33898424 -8656.02923281 -7525.7277781   1447.33050904]
------
Step:7, Action:West
State  288
Old Q Values:  [  759.33898424 -8656.02923281 -7525.7277781   1447.33050904]
New Q values:  [  759.33898424 -8656.02923281 -7525.7277781    928.23821741]
Reward: -1  Episode Reward:  3
xxxxx
x...x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477  1166.35337931  -420.62983886]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477  1166.35337931  -420.62983886]
New Q values:  [-2527.46239811 -6212.61234477   744.41281695  -420.62983886]
Reward: -1  Episode Reward:  2
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  759.33898424 -8656.02923281 -7525.7277781    928.23821741]
------
Step:9, Action:West
State  288
Old Q Values:  [  759.33898424 -8656.02923281 -7525.7277781    928.23821741]
New Q values:  [  759.33898424 -8656.02923281 -7525.7277781    594.01913205]
Reward: -1  Episode Reward:  1
xxxxx
x.g.x
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   744.41281695  -420.62983886]
------
Step:10, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549   38.13581354 8664.02203002]
New Q values:  [  37.74111519 -168.92307549  242.45602069 8664.02203002]
Reward: -1  Episode Reward:  0
xxxxx
x..gx
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  759.33898424 -8656.02923281 -7525.7277781    594.01913205]
------
Step:11, Action:North
State  288
Old Q Values:  [  759.33898424 -8656.02923281 -7525.7277781    594.01913205]
New Q values:  [-3820.22456822 -8656.02923281 -7525.7277781    594.01913205]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. gx
x.  x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648  1166.29010166 -1332.41716586  -180.6       ]
------
Step:1, Action:South
State  181
Old Q Values:  [   13.85659648  1166.29010166 -1332.41716586  -180.6       ]
New Q values:  [   13.85659648  1108.73957374 -1332.41716586  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2122.74511024  -289.59534477 -5256.8454      -251.53897752]
------
Step:2, Action:North
State  261
Old Q Values:  [ 2122.74511024  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [ 1181.11991622  -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648  1108.73957374 -1332.41716586  -180.6       ]
------
Step:3, Action:South
State  180
Old Q Values:  [-1367.02476015   335.04491344  4845.97721139     0.        ]
New Q values:  [-1367.02476015  1840.37285739  4845.97721139     0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1437.57886511 -6457.4598      5689.84964006 -7094.93143822]
------
Step:4, Action:East
State  260
Old Q Values:  [ 1437.57886511 -6457.4598      5689.84964006 -7094.93143822]
New Q values:  [ 1437.57886511 -6457.4598      2504.66370111 -7094.93143822]
Reward: 9  Episode Reward:  16
xxxxx
x.. x
x ..x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   744.41281695  -420.62983886]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   744.41281695  -420.62983886]
New Q values:  [-2527.46239811 -6212.61234477   481.37086639  -420.62983886]
Reward: 9  Episode Reward:  25
xxxxx
x.. x
x ..x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3820.22456822 -8656.02923281 -7525.7277781    594.01913205]
------
Step:6, Action:North
State  288
Old Q Values:  [-3820.22456822 -8656.02923281 -7525.7277781    594.01913205]
New Q values:  [ -999.15476075 -8656.02923281 -7525.7277781    594.01913205]
Reward: 9  Episode Reward:  34
xxxxx
x.. x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 304.63919487 1745.11688845 -180.6           3.52184257]
------
Step:7, Action:South
State  208
Old Q Values:  [ 6235.46612695  2257.94408819 -2651.70614553 -3385.12952694]
New Q values:  [ 6235.46612695  1080.78337489 -2651.70614553 -3385.12952694]
Reward: -1  Episode Reward:  33
xxxxx
x.. x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -999.15476075 -8656.02923281 -7525.7277781    594.01913205]
------
Step:8, Action:West
State  288
Old Q Values:  [ -999.15476075 -8656.02923281 -7525.7277781    594.01913205]
New Q values:  [ -999.15476075 -8656.02923281 -7525.7277781    381.41891274]
Reward: -1  Episode Reward:  32
xxxxx
x.. x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -6212.61234477   481.37086639  -420.62983886]
------
Step:9, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  242.45602069 8664.02203002]
New Q values:  [  37.74111519 -168.92307549  210.8080821  8664.02203002]
Reward: -1  Episode Reward:  31
xxxxx
x.. x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -999.15476075 -8656.02923281 -7525.7277781    381.41891274]
------
Step:10, Action:West
State  288
Old Q Values:  [ -999.15476075 -8656.02923281 -7525.7277781    381.41891274]
New Q values:  [ -999.15476075 -8656.02923281 -7525.7277781    296.37882501]
Reward: -1  Episode Reward:  30
xxxxx
x.. x
x g x
x a x
xxxxx
Step:11, Action:South
State  272
Old Q Values:  [-2527.46239811 -6212.61234477   481.37086639  -420.62983886]
New Q values:  [-2527.46239811 -8521.23367799   481.37086639  -420.62983886]
Reward: -10301  Episode Reward:  -10271
xxxxx
x.. x
x . x
x g x
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -999.15476075 -8656.02923281 -7525.7277781    296.37882501]
------
Step:1, Action:West
State  288
Old Q Values:  [ -999.15476075 -8656.02923281 -7525.7277781    296.37882501]
New Q values:  [ -999.15476075 -8656.02923281 -7525.7277781    268.36278992]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   481.37086639  -420.62983886]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   481.37086639  -420.62983886]
New Q values:  [-2527.46239811 -8521.23367799   272.45718353  -420.62983886]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -999.15476075 -8656.02923281 -7525.7277781    268.36278992]
------
Step:3, Action:West
State  288
Old Q Values:  [ -999.15476075 -8656.02923281 -7525.7277781    268.36278992]
New Q values:  [ -999.15476075 -8656.02923281 -7525.7277781    188.48227103]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   272.45718353  -420.62983886]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   272.45718353  -420.62983886]
New Q values:  [-2527.46239811 -8521.23367799   164.92755472  -420.62983886]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x.g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -999.15476075 -8656.02923281 -7525.7277781    188.48227103]
------
Step:5, Action:West
State  288
Old Q Values:  [ -999.15476075 -8656.02923281 -7525.7277781    188.48227103]
New Q values:  [ -999.15476075 -8656.02923281 -7525.7277781    124.27117483]
Reward: -1  Episode Reward:  5
xxxxx
x.g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   164.92755472  -420.62983886]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   164.92755472  -420.62983886]
New Q values:  [-2527.46239811 -8521.23367799   102.65237434  -420.62983886]
Reward: -1  Episode Reward:  4
xxxxx
xg..x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -999.15476075 -8656.02923281 -7525.7277781    124.27117483]
------
Step:7, Action:West
State  288
Old Q Values:  [ -999.15476075 -8656.02923281 -7525.7277781    124.27117483]
New Q values:  [ -999.15476075 -8656.02923281 -7525.7277781     79.90418223]
Reward: -1  Episode Reward:  3
xxxxx
x...x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   102.65237434  -420.62983886]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   102.65237434  -420.62983886]
New Q values:  [-2527.46239811 -8521.23367799    64.4322044   -420.62983886]
Reward: -1  Episode Reward:  2
xxxxx
x...x
x.g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -999.15476075 -8656.02923281 -7525.7277781     79.90418223]
------
Step:9, Action:West
State  288
Old Q Values:  [ -999.15476075 -8656.02923281 -7525.7277781     79.90418223]
New Q values:  [ -999.15476075 -8656.02923281 -7525.7277781   2630.5682819 ]
Reward: -1  Episode Reward:  1
xxxxx
x...x
x..gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  210.8080821  8664.02203002]
------
Step:10, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    64.4322044   -420.62983886]
New Q values:  [-2527.46239811 -8521.23367799    64.4322044    185.48403932]
Reward: -1  Episode Reward:  0
xxxxx
x...x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1181.11991622  -289.59534477 -5256.8454      -251.53897752]
------
Step:11, Action:North
State  261
Old Q Values:  [ 1181.11991622  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [ 2179.60165979  -289.59534477 -5256.8454      -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  671.22811746 5672.512311      0.        ]
------
Step:12, Action:East
State  183
Old Q Values:  [  22.25138791  671.22811746 5672.512311      0.        ]
New Q values:  [  22.25138791  671.22811746 5923.049985      0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  3081.56358951  1388.1600337  12162.15020199]
------
Step:13, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  6.23150984e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  4.26891893e+03]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xa .x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  671.22811746 5923.049985      0.        ]
------
Step:14, Action:East
State  181
Old Q Values:  [   13.85659648  1108.73957374 -1332.41716586  -180.6       ]
New Q values:  [   13.85659648  1108.73957374 -5448.90065053  -180.6       ]
Reward: -10001  Episode Reward:  -9984
xxxxx
x...x
x g.x
x   x
xxxxx
xxxxx
x...x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1437.57886511 -6457.4598      2504.66370111 -7094.93143822]
------
Step:1, Action:East
State  260
Old Q Values:  [ 1437.57886511 -6457.4598      2504.66370111 -7094.93143822]
New Q values:  [ 1437.57886511 -6457.4598      1242.61744983 -7094.93143822]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6         512.11660633  784.50656464]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    64.4322044    185.48403932]
New Q values:  [-2527.46239811 -8521.23367799    64.4322044    504.86727526]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1437.57886511 -6457.4598      1242.61744983 -7094.93143822]
------
Step:3, Action:East
State  260
Old Q Values:  [ 1437.57886511 -6457.4598      1242.61744983 -7094.93143822]
New Q values:  [ 1437.57886511 -6457.4598       647.90716251 -7094.93143822]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x ..x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799    64.4322044    504.86727526]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    64.4322044    504.86727526]
New Q values:  [-2527.46239811 -8521.23367799   820.34336633   504.86727526]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x ..x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -999.15476075 -8656.02923281 -7525.7277781   2630.5682819 ]
------
Step:5, Action:North
State  288
Old Q Values:  [ -999.15476075 -8656.02923281 -7525.7277781   2630.5682819 ]
New Q values:  [ 1476.37793378 -8656.02923281 -7525.7277781   2630.5682819 ]
Reward: 9  Episode Reward:  25
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6235.46612695  1080.78337489 -2651.70614553 -3385.12952694]
------
Step:6, Action:North
State  208
Old Q Values:  [ 6235.46612695  1080.78337489 -2651.70614553 -3385.12952694]
New Q values:  [13332.21730588  1080.78337489 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  34
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  1710.24475768  -180.00807518 36108.76951702]
------
Step:7, Action:West
State  130
Old Q Values:  [18220.41077038  1710.24475768  -180.00807518 36108.76951702]
New Q values:  [18220.41077038  1710.24475768  -180.00807518 45603.75196232]
Reward: 9  Episode Reward:  43
xxxxx
x.a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6         -8280.92007422    112.85376583 103849.48051838]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    54.39795432   164.2920584 ]
New Q values:  [ -281.736      -8877.87327254    54.39795432   351.36383912]
Reward: 9  Episode Reward:  52
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  934.15671921  862.42400667 -272.09726687]
------
Step:9, Action:South
State  110
Old Q Values:  [ -180.6        -5648.89406634   324.2715545   -180.6       ]
New Q values:  [ -180.6        -1554.81716921   324.2715545   -180.6       ]
Reward: -1  Episode Reward:  51
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  2351.13485775     0.        ]
------
Step:10, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 37501.2334311      0.        ]
New Q values:  [    0.          4614.46100011 76286.56905238     0.        ]
Reward: 100009  Episode Reward:  100060
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  2351.13485775     0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [  22.25138791  671.22811746 5923.049985      0.        ]
New Q values:  [  22.25138791  671.22811746 3655.29567394    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  4.26891893e+03]
------
Step:2, Action:West
State  192
Old Q Values:  [3.89777037e-01 2.64425577e+03 2.92290451e+03 3.61555405e+03]
New Q values:  [3.89777037e-01 2.64425577e+03 2.92290451e+03 1.77824349e+03]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648  1108.73957374 -5448.90065053  -180.6       ]
------
Step:3, Action:South
State  181
Old Q Values:  [   13.85659648  1108.73957374 -5448.90065053  -180.6       ]
New Q values:  [   13.85659648  1096.77632743 -5448.90065053  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2179.60165979  -289.59534477 -5256.8454      -251.53897752]
------
Step:4, Action:North
State  261
Old Q Values:  [ 2179.60165979  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [ 1200.27356214  -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648  1096.77632743 -5448.90065053  -180.6       ]
------
Step:5, Action:South
State  183
Old Q Values:  [  22.25138791  671.22811746 3655.29567394    0.        ]
New Q values:  [  22.25138791  627.97331563 3655.29567394    0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1200.27356214  -289.59534477 -5256.8454      -251.53897752]
------
Step:6, Action:North
State  260
Old Q Values:  [ 1437.57886511 -6457.4598       647.90716251 -7094.93143822]
New Q values:  [ 1279.77200337 -6457.4598       647.90716251 -7094.93143822]
Reward: -1  Episode Reward:  4
xxxxx
x...x
xa .x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  2351.13485775     0.        ]
------
Step:7, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  2351.13485775     0.        ]
New Q values:  [    0.         -5969.29177534  2220.52962304     0.        ]
Reward: -1  Episode Reward:  3
xxxxx
x...x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  4.26891893e+03]
------
Step:8, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  4.26891893e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  2.80355628e+03]
Reward: -1  Episode Reward:  2
xxxxx
x...x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  627.97331563 3655.29567394    0.        ]
------
Step:9, Action:East
State  183
Old Q Values:  [  22.25138791  627.97331563 3655.29567394    0.        ]
New Q values:  [  22.25138791  627.97331563 5110.16333017    0.        ]
Reward: -1  Episode Reward:  1
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  3081.56358951  1388.1600337  12162.15020199]
------
Step:10, Action:West
State  193
Old Q Values:  [-5922.26708831  4390.31246547 -3909.58186816 10063.42899081]
New Q values:  [-5922.26708831  4390.31246547 -3909.58186816  4353.80449455]
Reward: -1  Episode Reward:  0
xxxxx
x...x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648  1096.77632743 -5448.90065053  -180.6       ]
------
Step:11, Action:South
State  181
Old Q Values:  [   13.85659648  1096.77632743 -5448.90065053  -180.6       ]
New Q values:  [   13.85659648   798.19259962 -5448.90065053  -180.6       ]
Reward: -1  Episode Reward:  -1
xxxxx
x...x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1200.27356214  -289.59534477 -5256.8454      -251.53897752]
------
Step:12, Action:North
State  261
Old Q Values:  [ 1200.27356214  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  718.96720474  -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  -2
xxxxx
x...x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   798.19259962 -5448.90065053  -180.6       ]
------
Step:13, Action:South
State  183
Old Q Values:  [  22.25138791  627.97331563 5110.16333017    0.        ]
New Q values:  [  22.25138791  466.27948767 5110.16333017    0.        ]
Reward: -1  Episode Reward:  -3
xxxxx
x...x
x  .x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  718.96720474  -289.59534477 -5256.8454      -251.53897752]
------
Step:14, Action:North
State  261
Old Q Values:  [  718.96720474  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [ 1820.03588095  -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  -4
xxxxx
x...x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  466.27948767 5110.16333017    0.        ]
------
Step:15, Action:East
State  183
Old Q Values:  [  22.25138791  466.27948767 5110.16333017    0.        ]
New Q values:  [  22.25138791  466.27948767 5692.11039267    0.        ]
Reward: -1  Episode Reward:  -5
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  3081.56358951  1388.1600337  12162.15020199]
------
Step:16, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  2.80355628e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  2.82845563e+03]
Reward: -1  Episode Reward:  -6
xxxxx
x...x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  466.27948767 5692.11039267    0.        ]
------
Step:17, Action:East
State  183
Old Q Values:  [  22.25138791  466.27948767 5692.11039267    0.        ]
New Q values:  [  22.25138791  466.27948767 5924.88921766    0.        ]
Reward: -1  Episode Reward:  -7
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  3081.56358951  1388.1600337  12162.15020199]
------
Step:18, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  2.82845563e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  2.90824902e+03]
Reward: -1  Episode Reward:  -8
xxxxx
x...x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  466.27948767 5924.88921766    0.        ]
------
Step:19, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  2220.52962304     0.        ]
New Q values:  [    0.         -5969.29177534  1760.08655416     0.        ]
Reward: -1  Episode Reward:  -9
xxxxx
x...x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  2.90824902e+03]
------
Step:20, Action:West
State  192
Old Q Values:  [3.89777037e-01 2.64425577e+03 2.92290451e+03 1.77824349e+03]
New Q values:  [ 3.89777037e-01  2.64425577e+03  2.92290451e+03 -3.83550944e+03]
Reward: -10001  Episode Reward:  -10010
xxxxx
x...x
xg .x
x ..x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   798.19259962 -5448.90065053  -180.6       ]
------
Step:1, Action:South
State  180
Old Q Values:  [-1367.02476015  1840.37285739  4845.97721139     0.        ]
New Q values:  [-1367.02476015  1125.48074397  4845.97721139     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1279.77200337 -6457.4598       647.90716251 -7094.93143822]
------
Step:2, Action:North
State  261
Old Q Values:  [ 1820.03588095  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  966.87213226  -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   798.19259962 -5448.90065053  -180.6       ]
------
Step:3, Action:South
State  181
Old Q Values:  [   13.85659648   798.19259962 -5448.90065053  -180.6       ]
New Q values:  [   13.85659648   608.73867953 -5448.90065053  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x. .x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  966.87213226  -289.59534477 -5256.8454      -251.53897752]
------
Step:4, Action:North
State  261
Old Q Values:  [  966.87213226  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [ 2163.6156182   -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  6
xxxxx
x. .x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  466.27948767 5924.88921766    0.        ]
------
Step:5, Action:East
State  181
Old Q Values:  [   13.85659648   608.73867953 -5448.90065053  -180.6       ]
New Q values:  [   13.85659648   608.73867953 -6997.62971144  -180.6       ]
Reward: -9991  Episode Reward:  -9985
xxxxx
x. .x
x g.x
x ..x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  210.8080821  8664.02203002]
------
Step:1, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  210.8080821  8664.02203002]
New Q values:  [  37.74111519 -168.92307549  210.8080821  4120.09349747]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2163.6156182   -289.59534477 -5256.8454      -251.53897752]
------
Step:2, Action:North
State  261
Old Q Values:  [ 2163.6156182   -289.59534477 -5256.8454      -251.53897752]
New Q values:  [ 1053.46785114  -289.59534477 -5256.8454      -251.53897752]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   608.73867953 -6997.62971144  -180.6       ]
------
Step:3, Action:South
State  183
Old Q Values:  [  22.25138791  466.27948767 5924.88921766    0.        ]
New Q values:  [  22.25138791  501.95215041 5924.88921766    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x ..x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1053.46785114  -289.59534477 -5256.8454      -251.53897752]
------
Step:4, Action:North
State  260
Old Q Values:  [ 1279.77200337 -6457.4598       647.90716251 -7094.93143822]
New Q values:  [ 1039.3347676  -6457.4598       647.90716251 -7094.93143822]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xa..x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  1760.08655416     0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [  22.25138791  501.95215041 5924.88921766    0.        ]
New Q values:  [  22.25138791  501.95215041 3247.83039201    0.        ]
Reward: 9  Episode Reward:  25
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  2.90824902e+03]
------
Step:6, Action:West
State  192
Old Q Values:  [ 3.89777037e-01  2.64425577e+03  2.92290451e+03 -3.83550944e+03]
New Q values:  [ 3.89777037e-01  2.64425577e+03  2.92290451e+03 -1.35218217e+03]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   608.73867953 -6997.62971144  -180.6       ]
------
Step:7, Action:South
State  180
Old Q Values:  [-1367.02476015  1125.48074397  4845.97721139     0.        ]
New Q values:  [-1367.02476015   761.39272787  4845.97721139     0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x...x
xg .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1039.3347676  -6457.4598       647.90716251 -7094.93143822]
------
Step:8, Action:East
State  260
Old Q Values:  [ 1039.3347676  -6457.4598       647.90716251 -7094.93143822]
New Q values:  [ 1039.3347676  -6457.4598       504.6658749  -7094.93143822]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   820.34336633   504.86727526]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   820.34336633   504.86727526]
New Q values:  [-2527.46239811 -8521.23367799  1116.7078311    504.86727526]
Reward: -1  Episode Reward:  21
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1476.37793378 -8656.02923281 -7525.7277781   2630.5682819 ]
------
Step:10, Action:West
State  288
Old Q Values:  [ 1476.37793378 -8656.02923281 -7525.7277781   2630.5682819 ]
New Q values:  [ 1476.37793378 -8656.02923281 -7525.7277781   1386.63966209]
Reward: -1  Episode Reward:  20
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1116.7078311    504.86727526]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1116.7078311    504.86727526]
New Q values:  [-2527.46239811 -8521.23367799   888.99651258   504.86727526]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1476.37793378 -8656.02923281 -7525.7277781   1386.63966209]
------
Step:12, Action:North
State  288
Old Q Values:  [ 1476.37793378 -8656.02923281 -7525.7277781   1386.63966209]
New Q values:  [ 4595.61636528 -8656.02923281 -7525.7277781   1386.63966209]
Reward: 9  Episode Reward:  28
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13332.21730588  1080.78337489 -2651.70614553 -3385.12952694]
------
Step:13, Action:North
State  208
Old Q Values:  [13332.21730588  1080.78337489 -2651.70614553 -3385.12952694]
New Q values:  [19019.41251105  1080.78337489 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  37
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  1710.24475768  -180.00807518 45603.75196232]
------
Step:14, Action:West
State  128
Old Q Values:  [ 6067.82062533  1504.73148864 -8652.84       21020.48148244]
New Q values:  [ 6067.82062533  1504.73148864 -8652.84       33615.86059298]
Reward: 9  Episode Reward:  46
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[    0.          1519.70944144  7301.08474053 84007.56      ]
------
Step:15, Action:East
State  112
Old Q Values:  [    0.          1519.70944144  7301.08474053 84007.56      ]
New Q values:  [    0.          1519.70944144 13004.59207411 84007.56      ]
Reward: -1  Episode Reward:  45
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 6067.82062533  1504.73148864 -8652.84       33615.86059298]
------
Step:16, Action:North
State  128
Old Q Values:  [ 6067.82062533  1504.73148864 -8652.84       33615.86059298]
New Q values:  [12331.28642803  1504.73148864 -8652.84       33615.86059298]
Reward: -301  Episode Reward:  -256
xxxxx
xg ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[12331.28642803  1504.73148864 -8652.84       33615.86059298]
------
Step:17, Action:West
State  128
Old Q Values:  [12331.28642803  1504.73148864 -8652.84       33615.86059298]
New Q values:  [12331.28642803  1504.73148864 -8652.84       32648.01223719]
Reward: -10001  Episode Reward:  -10257
xxxxx
x.g x
x   x
x   x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   888.99651258   504.86727526]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   888.99651258   504.86727526]
New Q values:  [-2527.46239811 -8521.23367799  1739.68351461   504.86727526]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4595.61636528 -8656.02923281 -7525.7277781   1386.63966209]
------
Step:2, Action:North
State  288
Old Q Values:  [ 4595.61636528 -8656.02923281 -7525.7277781   1386.63966209]
New Q values:  [ 7549.47029943 -8656.02923281 -7525.7277781   1386.63966209]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19019.41251105  1080.78337489 -2651.70614553 -3385.12952694]
------
Step:3, Action:North
State  208
Old Q Values:  [19019.41251105  1080.78337489 -2651.70614553 -3385.12952694]
New Q values:  [ 7841.9166376   1080.78337489 -2651.70614553 -3385.12952694]
Reward: 9  Episode Reward:  27
xxxxx
xg.ax
x . x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   762.50544392 -6245.61866138   268.76196987]
------
Step:4, Action:South
State  130
Old Q Values:  [18220.41077038  1710.24475768  -180.00807518 45603.75196232]
New Q values:  [18220.41077038  3036.07289435  -180.00807518 45603.75196232]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7841.9166376   1080.78337489 -2651.70614553 -3385.12952694]
------
Step:5, Action:North
State  210
Old Q Values:  [ 304.63919487 1745.11688845 -180.6           3.52184257]
New Q values:  [ 1.38023813e+04  1.74511689e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  25
xxxxx
x..ax
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  3036.07289435  -180.00807518 45603.75196232]
------
Step:6, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6           72.06810203]
New Q values:  [ -180.6        -3419.331151    -180.6          139.63639255]
Reward: 9  Episode Reward:  34
xxxxx
x.a x
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    54.39795432   351.36383912]
------
Step:7, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    80.21221393   573.61798999]
New Q values:  [ -253.44886264 -1902.20915811    80.21221393   300.78261813]
Reward: 9  Episode Reward:  43
xxxxx
xa  x
x . x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573  219.78474043 -252.78192178]
------
Step:8, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573  219.78474043 -252.78192178]
New Q values:  [-252.35169558   11.28108573  177.54868161 -252.78192178]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x .gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    80.21221393   300.78261813]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    54.39795432   351.36383912]
New Q values:  [ -281.736      -8877.87327254    54.39795432   193.21014013]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573  177.54868161 -252.78192178]
------
Step:10, Action:East
State  106
Old Q Values:  [ -180.6        -6000.6           59.14357236  -180.6       ]
New Q values:  [ -180.6        -6000.6           81.02047099  -180.6       ]
Reward: -1  Episode Reward:  40
xxxxx
x a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    54.39795432   193.21014013]
------
Step:11, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686         224.9799834    -790.71739399]
New Q values:  [-10156.11771313  -5995.686         224.9799834   -6134.99019341]
Reward: -10001  Episode Reward:  -9961
xxxxx
xg  x
x . x
x.  x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816  4353.80449455]
------
Step:1, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  2.64425577e+03  2.92290451e+03 -1.35218217e+03]
New Q values:  [ 3.89777037e-01  1.58500736e+03  2.92290451e+03 -1.35218217e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1739.68351461   504.86727526]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1739.68351461   504.86727526]
New Q values:  [-2527.46239811 -8521.23367799  2966.11449567   504.86727526]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7549.47029943 -8656.02923281 -7525.7277781   1386.63966209]
------
Step:3, Action:North
State  288
Old Q Values:  [ 7549.47029943 -8656.02923281 -7525.7277781   1386.63966209]
New Q values:  [ -622.23688895 -8656.02923281 -7525.7277781   1386.63966209]
Reward: -9991  Episode Reward:  -9973
xxxxx
x.. x
x. gx
x.  x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  501.95215041 3247.83039201    0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [  22.25138791  501.95215041 3247.83039201    0.        ]
New Q values:  [  22.25138791  501.95215041 2177.00686175    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  2.90824902e+03]
------
Step:2, Action:West
State  195
Old Q Values:  [   38.85388605  3081.56358951  1388.1600337  12162.15020199]
New Q values:  [  38.85388605 3081.56358951 1388.1600337  5517.36213932]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  501.95215041 2177.00686175    0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [  22.25138791  501.95215041 2177.00686175    0.        ]
New Q values:  [  22.25138791  501.95215041 1742.67744965    0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  2.90824902e+03]
------
Step:4, Action:West
State  192
Old Q Values:  [ 3.89777037e-01  1.58500736e+03  2.92290451e+03 -1.35218217e+03]
New Q values:  [ 3.89777037e-01  1.58500736e+03  2.92290451e+03 -3.58851265e+02]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xag.x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   608.73867953 -6997.62971144  -180.6       ]
------
Step:5, Action:South
State  181
Old Q Values:  [   13.85659648   608.73867953 -6997.62971144  -180.6       ]
New Q values:  [   13.85659648   564.93582715 -6997.62971144  -180.6       ]
Reward: 9  Episode Reward:  15
xxxxx
x...x
x  gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1053.46785114  -289.59534477 -5256.8454      -251.53897752]
------
Step:6, Action:North
State  261
Old Q Values:  [ 1053.46785114  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  590.2678886   -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xag.x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   564.93582715 -6997.62971144  -180.6       ]
------
Step:7, Action:South
State  183
Old Q Values:  [  22.25138791  501.95215041 1742.67744965    0.        ]
New Q values:  [  22.25138791  377.26122674 1742.67744965    0.        ]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  590.2678886   -289.59534477 -5256.8454      -251.53897752]
------
Step:8, Action:North
State  261
Old Q Values:  [  590.2678886   -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  758.31039033  -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  12
xxxxx
x...x
xa .x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  377.26122674 1742.67744965    0.        ]
------
Step:9, Action:East
State  181
Old Q Values:  [   13.85659648   564.93582715 -6997.62971144  -180.6       ]
New Q values:  [   13.85659648   564.93582715 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x agx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816  4353.80449455]
------
Step:10, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  1.58500736e+03  2.92290451e+03 -3.58851265e+02]
New Q values:  [ 3.89777037e-01  1.52923729e+03  2.92290451e+03 -3.58851265e+02]
Reward: 9  Episode Reward:  20
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2966.11449567   504.86727526]
------
Step:11, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  210.8080821  4120.09349747]
New Q values:  [  37.74111519 -168.92307549  499.71513147 4120.09349747]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -622.23688895 -8656.02923281 -7525.7277781   1386.63966209]
------
Step:12, Action:West
State  288
Old Q Values:  [ -622.23688895 -8656.02923281 -7525.7277781   1386.63966209]
New Q values:  [ -622.23688895 -8656.02923281 -7525.7277781   1790.08391408]
Reward: -1  Episode Reward:  18
xxxxx
x..gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  499.71513147 4120.09349747]
------
Step:13, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  499.71513147 4120.09349747]
New Q values:  [  37.74111519 -168.92307549  499.71513147 1874.93051609]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  758.31039033  -289.59534477 -5256.8454      -251.53897752]
------
Step:14, Action:North
State  261
Old Q Values:  [  758.31039033  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  825.52739103  -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  377.26122674 1742.67744965    0.        ]
------
Step:15, Action:East
State  183
Old Q Values:  [  22.25138791  377.26122674 1742.67744965    0.        ]
New Q values:  [  22.25138791  377.26122674 1568.94568481    0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  2.90824902e+03]
------
Step:16, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  2.90824902e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  1.69072557e+03]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xa .x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  1760.08655416     0.        ]
------
Step:17, Action:East
State  180
Old Q Values:  [-1367.02476015   761.39272787  4845.97721139     0.        ]
New Q values:  [-1367.02476015   761.39272787  2814.6622377      0.        ]
Reward: -1  Episode Reward:  13
xxxxx
x...x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  1.52923729e+03  2.92290451e+03 -3.58851265e+02]
------
Step:18, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  1.52923729e+03  2.92290451e+03 -3.58851265e+02]
New Q values:  [ 3.89777037e-01  1.52923729e+03  3.52713680e+03 -3.58851265e+02]
Reward: 9  Episode Reward:  22
xxxxx
xg..x
x  ax
x   x
xxxxx
Step:19, Action:East
State  208
Old Q Values:  [ 7841.9166376   1080.78337489 -2651.70614553 -3385.12952694]
New Q values:  [ 7841.9166376   1080.78337489  1111.29253307 -3385.12952694]
Reward: -301  Episode Reward:  -279
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7841.9166376   1080.78337489  1111.29253307 -3385.12952694]
------
Step:20, Action:North
State  208
Old Q Values:  [ 7841.9166376   1080.78337489  1111.29253307 -3385.12952694]
New Q values:  [12936.5703262   1080.78337489  1111.29253307 -3385.12952694]
Reward: 9  Episode Reward:  -270
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[12331.28642803  1504.73148864 -8652.84       32648.01223719]
------
Step:21, Action:North
State  128
Old Q Values:  [12331.28642803  1504.73148864 -8652.84       32648.01223719]
New Q values:  [14546.31824237  1504.73148864 -8652.84       32648.01223719]
Reward: -301  Episode Reward:  -571
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[14546.31824237  1504.73148864 -8652.84       32648.01223719]
------
Step:22, Action:West
State  130
Old Q Values:  [18220.41077038  3036.07289435  -180.00807518 45603.75196232]
New Q values:  [18220.41077038  3036.07289435  -180.00807518 49401.74494044]
Reward: 9  Episode Reward:  -562
xxxxx
x.a x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6         -8280.92007422    112.85376583 103849.48051838]
------
Step:23, Action:West
State  114
Old Q Values:  [  -180.6         -8280.92007422    112.85376583 103849.48051838]
New Q values:  [  -180.6         -8280.92007422    112.85376583 101629.03741003]
Reward: 100009  Episode Reward:  99447
xxxxx
xa  x
x   x
xg  x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
Step:1, Action:South
State  136
Old Q Values:  [ -724.71310357   762.50544392 -6245.61866138   268.76196987]
New Q values:  [ -724.71310357  4191.37327543 -6245.61866138   268.76196987]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12936.5703262   1080.78337489  1111.29253307 -3385.12952694]
------
Step:2, Action:East
State  208
Old Q Values:  [12936.5703262   1080.78337489  1111.29253307 -3385.12952694]
New Q values:  [12936.5703262   1080.78337489 -1855.11188891 -3385.12952694]
Reward: -10301  Episode Reward:  -10292
xxxxx
x.  x
x..gx
x...x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12936.5703262   1080.78337489 -1855.11188891 -3385.12952694]
------
Step:1, Action:North
State  216
Old Q Values:  [  294.80374576  1086.66392038 -6170.35693855 -2387.54492731]
New Q values:  [  165.21241607  1086.66392038 -6170.35693855 -2387.54492731]
Reward: 9  Episode Reward:  9
xxxxx
x .ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          139.63639255]
------
Step:2, Action:West
State  136
Old Q Values:  [ -724.71310357  4191.37327543 -6245.61866138   268.76196987]
New Q values:  [ -724.71310357  4191.37327543 -6245.61866138   180.39878297]
Reward: 9  Episode Reward:  18
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         224.9799834   -6134.99019341]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254    54.39795432   193.21014013]
New Q values:  [ -281.736      -8877.87327254    63.05009949   193.21014013]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          139.63639255]
------
Step:4, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6          139.63639255]
New Q values:  [ -180.6        -3419.331151    -180.6          113.21759906]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    63.05009949   193.21014013]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    63.05009949   193.21014013]
New Q values:  [ -281.736      -8877.87327254    63.05009949   173.9655224 ]
Reward: -1  Episode Reward:  15
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   324.2715545   -180.6       ]
------
Step:6, Action:East
State  110
Old Q Values:  [ -180.6        -1554.81716921   324.2715545   -180.6       ]
New Q values:  [ -180.6        -1554.81716921   181.29827852  -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    63.05009949   173.9655224 ]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    63.05009949   173.9655224 ]
New Q values:  [ -281.736      -8877.87327254    63.05009949   123.37569252]
Reward: -1  Episode Reward:  13
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   181.29827852  -180.6       ]
------
Step:8, Action:East
State  108
Old Q Values:  [-8463.16477134   608.44407335   371.59057143     0.        ]
New Q values:  [-8463.16477134   608.44407335   215.53022359     0.        ]
Reward: -1  Episode Reward:  12
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         224.9799834   -6134.99019341]
------
Step:9, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686         224.9799834   -6134.99019341]
New Q values:  [-10156.11771313  -5995.686        1346.80397599  -6134.99019341]
Reward: -1  Episode Reward:  11
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  4191.37327543 -6245.61866138   180.39878297]
------
Step:10, Action:South
State  136
Old Q Values:  [ -724.71310357  4191.37327543 -6245.61866138   180.39878297]
New Q values:  [ -724.71310357  2001.94848629 -6245.61866138   180.39878297]
Reward: -1  Episode Reward:  10
xxxxx
x  gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  165.21241607  1086.66392038 -6170.35693855 -2387.54492731]
------
Step:11, Action:South
State  208
Old Q Values:  [12936.5703262   1080.78337489 -1855.11188891 -3385.12952694]
New Q values:  [12936.5703262    974.73852418 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  19
xxxxx
x  gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -622.23688895 -8656.02923281 -7525.7277781   1790.08391408]
------
Step:12, Action:West
State  288
Old Q Values:  [ -622.23688895 -8656.02923281 -7525.7277781   1790.08391408]
New Q values:  [ -622.23688895 -8656.02923281 -7525.7277781   1611.26791433]
Reward: 9  Episode Reward:  28
xxxxx
x g x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2966.11449567   504.86727526]
------
Step:13, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  499.71513147 1874.93051609]
New Q values:  [  37.74111519 -168.92307549  682.66642689 1874.93051609]
Reward: -1  Episode Reward:  27
xxxxx
x  gx
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -622.23688895 -8656.02923281 -7525.7277781   1611.26791433]
------
Step:14, Action:West
State  288
Old Q Values:  [ -622.23688895 -8656.02923281 -7525.7277781   1611.26791433]
New Q values:  [ -622.23688895 -8656.02923281 -7525.7277781   1206.38632056]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  682.66642689 1874.93051609]
------
Step:15, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  682.66642689 1874.93051609]
New Q values:  [  37.74111519 -168.92307549  682.66642689 4581.31963561]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x.. x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[12753.15809726  3481.80752105   750.46010822  2937.02145253]
------
Step:16, Action:North
State  261
Old Q Values:  [  825.52739103  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  806.29466185  -289.59534477 -5256.8454      -251.53897752]
Reward: 9  Episode Reward:  44
xxxxx
x   x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  377.26122674 1568.94568481    0.        ]
------
Step:17, Action:East
State  177
Old Q Values:  [   0.         6260.18473843 5162.98667077    0.        ]
New Q values:  [    0.          6260.18473843 57128.73570695     0.        ]
Reward: 90009  Episode Reward:  90053
xxxxx
x   x
x g x
x   x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686        1346.80397599  -6134.99019341]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254    63.05009949   123.37569252]
New Q values:  [ -281.736      -8877.87327254    64.58531952   123.37569252]
Reward: 9  Episode Reward:  9
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -3419.331151    -180.6          113.21759906]
------
Step:2, Action:West
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6          113.21759906]
New Q values:  [ -180.6        -3419.331151    -180.6           81.69974738]
Reward: -1  Episode Reward:  8
xxxxx
x a x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    64.58531952   123.37569252]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    64.58531952   123.37569252]
New Q values:  [ -281.736      -8877.87327254    64.58531952   103.13976056]
Reward: -1  Episode Reward:  7
xxxxx
xa  x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   181.29827852  -180.6       ]
------
Step:4, Action:East
State  110
Old Q Values:  [ -180.6        -1554.81716921   181.29827852  -180.6       ]
New Q values:  [ -180.6        -1554.81716921   102.86123958  -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
x a x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    64.58531952   103.13976056]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    64.58531952   103.13976056]
New Q values:  [ -281.736      -8877.87327254    64.58531952    71.5142761 ]
Reward: -1  Episode Reward:  5
xxxxx
xa  x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   102.86123958  -180.6       ]
------
Step:6, Action:East
State  108
Old Q Values:  [-8463.16477134   608.44407335   215.53022359     0.        ]
New Q values:  [-8463.16477134   608.44407335   489.65328223     0.        ]
Reward: -1  Episode Reward:  4
xxxxx
xga x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686        1346.80397599  -6134.99019341]
------
Step:7, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686        1346.80397599  -6134.99019341]
New Q values:  [-10156.11771313  -5995.686        1138.70613628  -6134.99019341]
Reward: -1  Episode Reward:  3
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  2001.94848629 -6245.61866138   180.39878297]
------
Step:8, Action:South
State  136
Old Q Values:  [ -724.71310357  2001.94848629 -6245.61866138   180.39878297]
New Q values:  [ -724.71310357  1132.17857063 -6245.61866138   180.39878297]
Reward: 9  Episode Reward:  12
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  165.21241607  1086.66392038 -6170.35693855 -2387.54492731]
------
Step:9, Action:South
State  208
Old Q Values:  [12936.5703262    974.73852418 -1855.11188891 -3385.12952694]
New Q values:  [12936.5703262    757.21130584 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  21
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -622.23688895 -8656.02923281 -7525.7277781   1206.38632056]
------
Step:10, Action:West
State  288
Old Q Values:  [ -622.23688895 -8656.02923281 -7525.7277781   1206.38632056]
New Q values:  [ -622.23688895 -8656.02923281 -7525.7277781   1377.78887693]
Reward: 9  Episode Reward:  30
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2966.11449567   504.86727526]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2966.11449567   504.86727526]
New Q values:  [-2527.46239811 -8521.23367799  1599.18246135   504.86727526]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -622.23688895 -8656.02923281 -7525.7277781   1377.78887693]
------
Step:12, Action:West
State  288
Old Q Values:  [ -622.23688895 -8656.02923281 -7525.7277781   1377.78887693]
New Q values:  [ -622.23688895 -8656.02923281 -7525.7277781   1030.27028917]
Reward: -1  Episode Reward:  28
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1599.18246135   504.86727526]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1599.18246135   504.86727526]
New Q values:  [-2527.46239811 -8521.23367799   948.15407129   504.86727526]
Reward: -1  Episode Reward:  27
xxxxx
x g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -622.23688895 -8656.02923281 -7525.7277781   1030.27028917]
------
Step:14, Action:West
State  288
Old Q Values:  [ -622.23688895 -8656.02923281 -7525.7277781   1030.27028917]
New Q values:  [ -622.23688895 -8656.02923281 -7525.7277781   1785.90400635]
Reward: -1  Episode Reward:  26
xxxxx
x  gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  682.66642689 4581.31963561]
------
Step:15, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  682.66642689 4581.31963561]
New Q values:  [  37.74111519 -168.92307549  682.66642689 5663.87528342]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[12753.15809726  3481.80752105   750.46010822  2937.02145253]
------
Step:16, Action:North
State  261
Old Q Values:  [  806.29466185  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  497.39861289  -289.59534477 -5256.8454      -251.53897752]
Reward: 9  Episode Reward:  44
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   564.93582715 -1482.55814493  -180.6       ]
------
Step:17, Action:South
State  181
Old Q Values:  [   13.85659648   564.93582715 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648   374.59391473 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  497.39861289  -289.59534477 -5256.8454      -251.53897752]
------
Step:18, Action:North
State  257
Old Q Values:  [12753.15809726  3481.80752105   750.46010822  2937.02145253]
New Q values:  [22239.28395099  3481.80752105   750.46010822  2937.02145253]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.          6260.18473843 57128.73570695     0.        ]
------
Step:19, Action:South
State  181
Old Q Values:  [   13.85659648   374.59391473 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648   298.45714976 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  497.39861289  -289.59534477 -5256.8454      -251.53897752]
------
Step:20, Action:North
State  257
Old Q Values:  [22239.28395099  3481.80752105   750.46010822  2937.02145253]
New Q values:  [26033.73429248  3481.80752105   750.46010822  2937.02145253]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[    0.          6260.18473843 57128.73570695     0.        ]
------
Step:21, Action:South
State  183
Old Q Values:  [  22.25138791  377.26122674 1568.94568481    0.        ]
New Q values:  [  22.25138791  299.52407456 1568.94568481    0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  497.39861289  -289.59534477 -5256.8454      -251.53897752]
------
Step:22, Action:North
State  261
Old Q Values:  [  497.39861289  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  669.0431506   -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  299.52407456 1568.94568481    0.        ]
------
Step:23, Action:East
State  177
Old Q Values:  [    0.          6260.18473843 57128.73570695     0.        ]
New Q values:  [    0.          6260.18473843 84173.98802242     0.        ]
Reward: 100009  Episode Reward:  100047
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    64.58531952    71.5142761 ]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    64.58531952    71.5142761 ]
New Q values:  [ -281.736      -8877.87327254    64.58531952    64.86408231]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   102.86123958  -180.6       ]
------
Step:2, Action:East
State  108
Old Q Values:  [-8463.16477134   608.44407335   489.65328223     0.        ]
New Q values:  [-8463.16477134   608.44407335   536.87315378     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xga.x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686        1138.70613628  -6134.99019341]
------
Step:3, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686        1138.70613628  -6134.99019341]
New Q values:  [-10156.11771313  -5995.686         800.5360257   -6134.99019341]
Reward: 9  Episode Reward:  17
xxxxx
x gax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1132.17857063 -6245.61866138   180.39878297]
------
Step:4, Action:South
State  138
Old Q Values:  [ -180.6        -3419.331151    -180.6           81.69974738]
New Q values:  [ -180.6        -1036.33328429  -180.6           81.69974738]
Reward: 9  Episode Reward:  26
xxxxx
x   x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  165.21241607  1086.66392038 -6170.35693855 -2387.54492731]
------
Step:5, Action:South
State  210
Old Q Values:  [ 1.38023813e+04  1.74511689e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 1.38023813e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -622.23688895 -8656.02923281 -7525.7277781   1785.90400635]
------
Step:6, Action:North
State  288
Old Q Values:  [ -622.23688895 -8656.02923281 -7525.7277781   1785.90400635]
New Q values:  [ 3631.47634228 -8656.02923281 -7525.7277781   1785.90400635]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x.gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12936.5703262    757.21130584 -1855.11188891 -3385.12952694]
------
Step:7, Action:North
State  208
Old Q Values:  [12936.5703262    757.21130584 -1855.11188891 -3385.12952694]
New Q values:  [ 5513.68170167   757.21130584 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  33
xxxxx
x gax
x.. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1132.17857063 -6245.61866138   180.39878297]
------
Step:8, Action:South
State  138
Old Q Values:  [ -180.6        -1036.33328429  -180.6           81.69974738]
New Q values:  [-180.6        1238.97119679 -180.6          81.69974738]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x.gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5513.68170167   757.21130584 -1855.11188891 -3385.12952694]
------
Step:9, Action:North
State  208
Old Q Values:  [ 5513.68170167   757.21130584 -1855.11188891 -3385.12952694]
New Q values:  [ 2576.5640397    757.21130584 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
xg. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1238.97119679 -180.6          81.69974738]
------
Step:10, Action:South
State  138
Old Q Values:  [-180.6        1238.97119679 -180.6          81.69974738]
New Q values:  [-180.6        4635.70285871 -180.6          81.69974738]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.38023813e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:11, Action:North
State  210
Old Q Values:  [ 1.38023813e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 6.91106336e+03  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  29
xxxxx
x  ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        4635.70285871 -180.6          81.69974738]
------
Step:12, Action:South
State  138
Old Q Values:  [-180.6        4635.70285871 -180.6          81.69974738]
New Q values:  [-180.6        3927.00015276 -180.6          81.69974738]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6.91106336e+03  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:13, Action:North
State  210
Old Q Values:  [ 6.91106336e+03  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 3.94192539e+03  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        3927.00015276 -180.6          81.69974738]
------
Step:14, Action:South
State  138
Old Q Values:  [-180.6        3927.00015276 -180.6          81.69974738]
New Q values:  [-180.6        2752.77767857 -180.6          81.69974738]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x..ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3.94192539e+03  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:15, Action:North
State  210
Old Q Values:  [ 3.94192539e+03  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [2402.00346018 1239.21795729 -180.6           3.52184257]
Reward: -1  Episode Reward:  25
xxxxx
x  ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2752.77767857 -180.6          81.69974738]
------
Step:16, Action:South
State  138
Old Q Values:  [-180.6        2752.77767857 -180.6          81.69974738]
New Q values:  [-180.6        1821.11210948 -180.6          81.69974738]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x..ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2402.00346018 1239.21795729 -180.6           3.52184257]
------
Step:17, Action:North
State  210
Old Q Values:  [2402.00346018 1239.21795729 -180.6           3.52184257]
New Q values:  [1506.53501692 1239.21795729 -180.6           3.52184257]
Reward: -1  Episode Reward:  23
xxxxx
x  ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1821.11210948 -180.6          81.69974738]
------
Step:18, Action:South
State  138
Old Q Values:  [-180.6        1821.11210948 -180.6          81.69974738]
New Q values:  [-180.6        1179.80534887 -180.6          81.69974738]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x..ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1506.53501692 1239.21795729 -180.6           3.52184257]
------
Step:19, Action:North
State  210
Old Q Values:  [1506.53501692 1239.21795729 -180.6           3.52184257]
New Q values:  [ 955.95561143 1239.21795729 -180.6           3.52184257]
Reward: -1  Episode Reward:  21
xxxxx
x  ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1179.80534887 -180.6          81.69974738]
------
Step:20, Action:South
State  138
Old Q Values:  [-180.6        1179.80534887 -180.6          81.69974738]
New Q values:  [-180.6         843.08752673 -180.6          81.69974738]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x..ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 955.95561143 1239.21795729 -180.6           3.52184257]
------
Step:21, Action:North
State  210
Old Q Values:  [ 955.95561143 1239.21795729 -180.6           3.52184257]
New Q values:  [ 634.70850259 1239.21795729 -180.6           3.52184257]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         843.08752673 -180.6          81.69974738]
------
Step:22, Action:South
State  138
Old Q Values:  [-180.6         843.08752673 -180.6          81.69974738]
New Q values:  [-180.6         708.40039788 -180.6          81.69974738]
Reward: -1  Episode Reward:  18
xxxxx
x   x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 634.70850259 1239.21795729 -180.6           3.52184257]
------
Step:23, Action:South
State  208
Old Q Values:  [ 2576.5640397    757.21130584 -1855.11188891 -3385.12952694]
New Q values:  [ 2576.5640397   1391.72742502 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  17
xxxxx
x   x
xg. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3631.47634228 -8656.02923281 -7525.7277781   1785.90400635]
------
Step:24, Action:North
State  288
Old Q Values:  [ 3631.47634228 -8656.02923281 -7525.7277781   1785.90400635]
New Q values:  [ 2224.95974882 -8656.02923281 -7525.7277781   1785.90400635]
Reward: -1  Episode Reward:  16
xxxxx
xg  x
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2576.5640397   1391.72742502 -1855.11188891 -3385.12952694]
------
Step:25, Action:North
State  208
Old Q Values:  [ 2576.5640397   1391.72742502 -1855.11188891 -3385.12952694]
New Q values:  [ 1369.67918707  1391.72742502 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  15
xxxxx
xg ax
x.. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1132.17857063 -6245.61866138   180.39878297]
------
Step:26, Action:South
State  136
Old Q Values:  [ -724.71310357  1132.17857063 -6245.61866138   180.39878297]
New Q values:  [ -724.71310357   869.78965576 -6245.61866138   180.39878297]
Reward: -1  Episode Reward:  14
xxxxx
x g x
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1369.67918707  1391.72742502 -1855.11188891 -3385.12952694]
------
Step:27, Action:South
State  208
Old Q Values:  [ 1369.67918707  1391.72742502 -1855.11188891 -3385.12952694]
New Q values:  [ 1369.67918707  1223.57889465 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  13
xxxxx
x  gx
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2224.95974882 -8656.02923281 -7525.7277781   1785.90400635]
------
Step:28, Action:North
State  288
Old Q Values:  [ 2224.95974882 -8656.02923281 -7525.7277781   1785.90400635]
New Q values:  [-4699.71234435 -8656.02923281 -7525.7277781   1785.90400635]
Reward: -10001  Episode Reward:  -9988
xxxxx
x   x
x..gx
x . x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   948.15407129   504.86727526]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   948.15407129   504.86727526]
New Q values:  [-2527.46239811 -8521.23367799   920.43283042   504.86727526]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4699.71234435 -8656.02923281 -7525.7277781   1785.90400635]
------
Step:2, Action:West
State  288
Old Q Values:  [-4699.71234435 -8656.02923281 -7525.7277781   1785.90400635]
New Q values:  [-4699.71234435 -8656.02923281 -7525.7277781   2412.92418757]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  682.66642689 5663.87528342]
------
Step:3, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  682.66642689 5663.87528342]
New Q values:  [   37.74111519  -168.92307549   682.66642689 10081.07040111]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[26033.73429248  3481.80752105   750.46010822  2937.02145253]
------
Step:4, Action:North
State  261
Old Q Values:  [  669.0431506   -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  362.55440517  -289.59534477 -5256.8454      -251.53897752]
Reward: 9  Episode Reward:  26
xxxxx
x..gx
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   298.45714976 -1482.55814493  -180.6       ]
------
Step:5, Action:South
State  181
Old Q Values:  [   13.85659648   298.45714976 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648   227.54918145 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
x.g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  362.55440517  -289.59534477 -5256.8454      -251.53897752]
------
Step:6, Action:North
State  261
Old Q Values:  [  362.55440517  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  212.6865165   -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   227.54918145 -1482.55814493  -180.6       ]
------
Step:7, Action:South
State  181
Old Q Values:  [   13.85659648   227.54918145 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648   154.22562753 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  23
xxxxx
x.g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  212.6865165   -289.59534477 -5256.8454      -251.53897752]
------
Step:8, Action:North
State  261
Old Q Values:  [  212.6865165   -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  130.74229486  -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  22
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   154.22562753 -1482.55814493  -180.6       ]
------
Step:9, Action:South
State  183
Old Q Values:  [  22.25138791  299.52407456 1568.94568481    0.        ]
New Q values:  [  22.25138791  158.43231828 1568.94568481    0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  130.74229486  -289.59534477 -5256.8454      -251.53897752]
------
Step:10, Action:North
State  260
Old Q Values:  [ 1039.3347676  -6457.4598       504.6658749  -7094.93143822]
New Q values:  [  943.15987329 -6457.4598       504.6658749  -7094.93143822]
Reward: -1  Episode Reward:  20
xxxxx
x...x
xa .x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  1760.08655416     0.        ]
------
Step:11, Action:East
State  183
Old Q Values:  [  22.25138791  158.43231828 1568.94568481    0.        ]
New Q values:  [  22.25138791  158.43231828 1134.19594578    0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  1.69072557e+03]
------
Step:12, Action:West
State  195
Old Q Values:  [  38.85388605 3081.56358951 1388.1600337  5517.36213932]
New Q values:  [  38.85388605 3081.56358951 1388.1600337  2546.60363946]
Reward: -1  Episode Reward:  18
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  158.43231828 1134.19594578    0.        ]
------
Step:13, Action:East
State  183
Old Q Values:  [  22.25138791  158.43231828 1134.19594578    0.        ]
New Q values:  [ 22.25138791 158.43231828 960.29605016   0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  1.69072557e+03]
------
Step:14, Action:West
State  192
Old Q Values:  [ 3.89777037e-01  1.52923729e+03  3.52713680e+03 -3.58851265e+02]
New Q values:  [ 3.89777037e-01  1.52923729e+03  3.52713680e+03 -9.78728177e+01]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   154.22562753 -1482.55814493  -180.6       ]
------
Step:15, Action:South
State  181
Old Q Values:  [   13.85659648   154.22562753 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648   100.31293947 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  130.74229486  -289.59534477 -5256.8454      -251.53897752]
------
Step:16, Action:North
State  261
Old Q Values:  [  130.74229486  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [   81.79079979  -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   100.31293947 -1482.55814493  -180.6       ]
------
Step:17, Action:South
State  181
Old Q Values:  [   13.85659648   100.31293947 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    64.06241572 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   81.79079979  -289.59534477 -5256.8454      -251.53897752]
------
Step:18, Action:North
State  261
Old Q Values:  [   81.79079979  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  320.20513496  -289.59534477 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  12
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 22.25138791 158.43231828 960.29605016   0.        ]
------
Step:19, Action:East
State  183
Old Q Values:  [ 22.25138791 158.43231828 960.29605016   0.        ]
New Q values:  [ 22.25138791 158.43231828 890.73609192   0.        ]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  1.69072557e+03]
------
Step:20, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  1.69072557e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  1.20371620e+03]
Reward: -1  Episode Reward:  10
xxxxx
x...x
xa .x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  1760.08655416     0.        ]
------
Step:21, Action:East
State  183
Old Q Values:  [ 22.25138791 158.43231828 890.73609192   0.        ]
New Q values:  [ 22.25138791 158.43231828 734.05342691   0.        ]
Reward: -1  Episode Reward:  9
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  1.20371620e+03]
------
Step:22, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  1.52923729e+03  3.52713680e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  1.52923729e+03  1.82715847e+03 -9.78728177e+01]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1369.67918707  1223.57889465 -1855.11188891 -3385.12952694]
------
Step:23, Action:North
State  208
Old Q Values:  [ 1369.67918707  1223.57889465 -1855.11188891 -3385.12952694]
New Q values:  [15373.79515696  1223.57889465 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  3036.07289435  -180.00807518 49401.74494044]
------
Step:24, Action:West
State  130
Old Q Values:  [18220.41077038  3036.07289435  -180.00807518 49401.74494044]
New Q values:  [18220.41077038  3036.07289435  -180.00807518 50254.80919919]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6         -8280.92007422    112.85376583 101629.03741003]
------
Step:25, Action:West
State  114
Old Q Values:  [  -180.6         -8280.92007422    112.85376583 101629.03741003]
New Q values:  [  -180.6         -8280.92007422    112.85376583 100740.8601667 ]
Reward: 100009  Episode Reward:  100045
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    64.06241572 -1482.55814493  -180.6       ]
------
Step:1, Action:South
State  183
Old Q Values:  [ 22.25138791 158.43231828 734.05342691   0.        ]
New Q values:  [ 22.25138791 164.8344678  734.05342691   0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x . x
xa.gx
xxxxx
Step:2, Action:South
State  261
Old Q Values:  [  320.20513496  -289.59534477 -5256.8454      -251.53897752]
New Q values:  [  320.20513496  -200.37659742 -5256.8454      -251.53897752]
Reward: -301  Episode Reward:  -292
xxxxx
x...x
x . x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  320.20513496  -200.37659742 -5256.8454      -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [  320.20513496  -200.37659742 -5256.8454      -251.53897752]
New Q values:  [  146.7007787   -200.37659742 -5256.8454      -251.53897752]
Reward: -1  Episode Reward:  -293
xxxxx
x...x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    64.06241572 -1482.55814493  -180.6       ]
------
Step:4, Action:South
State  180
Old Q Values:  [-1367.02476015   761.39272787  2814.6622377      0.        ]
New Q values:  [-1367.02476015   586.90505313  2814.6622377      0.        ]
Reward: -1  Episode Reward:  -294
xxxxx
x...x
xg. x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  943.15987329 -6457.4598       504.6658749  -7094.93143822]
------
Step:5, Action:East
State  261
Old Q Values:  [  146.7007787   -200.37659742 -5256.8454      -251.53897752]
New Q values:  [  146.7007787   -200.37659742 -1821.20831088  -251.53897752]
Reward: 9  Episode Reward:  -285
xxxxx
x...x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   920.43283042   504.86727526]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   920.43283042   504.86727526]
New Q values:  [-2527.46239811 -8521.23367799  1097.45038844   504.86727526]
Reward: 9  Episode Reward:  -276
xxxxx
x.g.x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4699.71234435 -8656.02923281 -7525.7277781   2412.92418757]
------
Step:7, Action:West
State  288
Old Q Values:  [-4699.71234435 -8656.02923281 -7525.7277781   2412.92418757]
New Q values:  [-4699.71234435 -8656.02923281 -7525.7277781   1293.80479156]
Reward: -1  Episode Reward:  -277
xxxxx
xg..x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1097.45038844   504.86727526]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1097.45038844   504.86727526]
New Q values:  [-2527.46239811 -8521.23367799   826.52159284   504.86727526]
Reward: -1  Episode Reward:  -278
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4699.71234435 -8656.02923281 -7525.7277781   1293.80479156]
------
Step:9, Action:West
State  288
Old Q Values:  [-4699.71234435 -8656.02923281 -7525.7277781   1293.80479156]
New Q values:  [-4699.71234435 -8656.02923281 -7525.7277781    764.87839448]
Reward: -1  Episode Reward:  -279
xxxxx
x...x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   826.52159284   504.86727526]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   826.52159284   504.86727526]
New Q values:  [-2527.46239811 -8521.23367799   559.47215548   504.86727526]
Reward: -1  Episode Reward:  -280
xxxxx
x...x
x . x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4699.71234435 -8656.02923281 -7525.7277781    764.87839448]
------
Step:11, Action:West
State  288
Old Q Values:  [-4699.71234435 -8656.02923281 -7525.7277781    764.87839448]
New Q values:  [-4699.71234435 -8656.02923281 -7525.7277781    473.19300443]
Reward: -1  Episode Reward:  -281
xxxxx
x...x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   559.47215548   504.86727526]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   559.47215548   504.86727526]
New Q values:  [-2527.46239811 -8521.23367799   365.14676352   504.86727526]
Reward: -1  Episode Reward:  -282
xxxxx
x...x
x . x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4699.71234435 -8656.02923281 -7525.7277781    473.19300443]
------
Step:13, Action:West
State  288
Old Q Values:  [-4699.71234435 -8656.02923281 -7525.7277781    473.19300443]
New Q values:  [-4699.71234435 -8656.02923281 -7525.7277781    340.13738435]
Reward: -1  Episode Reward:  -283
xxxxx
x...x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   365.14676352   504.86727526]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   365.14676352   504.86727526]
New Q values:  [-2527.46239811 -8521.23367799   365.14676352   484.29487209]
Reward: -1  Episode Reward:  -284
xxxxx
xg..x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  943.15987329 -6457.4598       504.6658749  -7094.93143822]
------
Step:15, Action:North
State  261
Old Q Values:  [  146.7007787   -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [   77.2990362   -200.37659742 -1821.20831088  -251.53897752]
Reward: -1  Episode Reward:  -285
xxxxx
x.g.x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    64.06241572 -1482.55814493  -180.6       ]
------
Step:16, Action:South
State  181
Old Q Values:  [   13.85659648    64.06241572 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    48.21467715 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  -286
xxxxx
x..gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   77.2990362   -200.37659742 -1821.20831088  -251.53897752]
------
Step:17, Action:North
State  261
Old Q Values:  [   77.2990362   -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [   44.78401762  -200.37659742 -1821.20831088  -251.53897752]
Reward: -1  Episode Reward:  -287
xxxxx
x.g.x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    48.21467715 -1482.55814493  -180.6       ]
------
Step:18, Action:South
State  180
Old Q Values:  [-1367.02476015   586.90505313  2814.6622377      0.        ]
New Q values:  [-1367.02476015   517.10998324  2814.6622377      0.        ]
Reward: -1  Episode Reward:  -288
xxxxx
xg..x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  943.15987329 -6457.4598       504.6658749  -7094.93143822]
------
Step:19, Action:North
State  260
Old Q Values:  [  943.15987329 -6457.4598       504.6658749  -7094.93143822]
New Q values:  [-4778.93737938 -6457.4598       504.6658749  -7094.93143822]
Reward: -10001  Episode Reward:  -10289
xxxxx
x...x
xg. x
x   x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   365.14676352   484.29487209]
------
Step:1, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6         512.11660633  784.50656464]
New Q values:  [  16.82637525 -180.6         512.11660633  470.60238833]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4778.93737938 -6457.4598       504.6658749  -7094.93143822]
------
Step:2, Action:East
State  260
Old Q Values:  [-4778.93737938 -6457.4598       504.6658749  -7094.93143822]
New Q values:  [-4778.93737938 -6457.4598       346.55481159 -7094.93143822]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   365.14676352   484.29487209]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   365.14676352   484.29487209]
New Q values:  [-2527.46239811 -8521.23367799   253.49992071   484.29487209]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4699.71234435 -8656.02923281 -7525.7277781    340.13738435]
------
Step:4, Action:North
State  288
Old Q Values:  [-4699.71234435 -8656.02923281 -7525.7277781    340.13738435]
New Q values:  [-1502.71955055 -8656.02923281 -7525.7277781    340.13738435]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 634.70850259 1239.21795729 -180.6           3.52184257]
------
Step:5, Action:South
State  208
Old Q Values:  [15373.79515696  1223.57889465 -1855.11188891 -3385.12952694]
New Q values:  [15373.79515696   590.87277317 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1502.71955055 -8656.02923281 -7525.7277781    340.13738435]
------
Step:6, Action:West
State  288
Old Q Values:  [-1502.71955055 -8656.02923281 -7525.7277781    340.13738435]
New Q values:  [-1502.71955055 -8656.02923281 -7525.7277781    280.74341537]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   253.49992071   484.29487209]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   253.49992071   484.29487209]
New Q values:  [-2527.46239811 -8521.23367799   253.49992071   297.08439231]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4778.93737938 -6457.4598       346.55481159 -7094.93143822]
------
Step:8, Action:East
State  260
Old Q Values:  [-4778.93737938 -6457.4598       346.55481159 -7094.93143822]
New Q values:  [-4778.93737938 -6457.4598       227.14724233 -7094.93143822]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x.. x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   253.49992071   297.08439231]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   253.49992071   297.08439231]
New Q values:  [-2527.46239811 -8521.23367799   185.0229929    297.08439231]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1502.71955055 -8656.02923281 -7525.7277781    280.74341537]
------
Step:10, Action:West
State  288
Old Q Values:  [-1502.71955055 -8656.02923281 -7525.7277781    280.74341537]
New Q values:  [-1502.71955055 -8656.02923281 -7525.7277781    200.82268384]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   185.0229929    297.08439231]
------
Step:11, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   185.0229929    297.08439231]
New Q values:  [-2527.46239811 -8521.23367799   185.0229929    131.66896221]
Reward: -1  Episode Reward:  19
xxxxx
x g.x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   44.78401762  -200.37659742 -1821.20831088  -251.53897752]
------
Step:12, Action:North
State  261
Old Q Values:  [   44.78401762  -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [   37.77801019  -200.37659742 -1821.20831088  -251.53897752]
Reward: 9  Episode Reward:  28
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    48.21467715 -1482.55814493  -180.6       ]
------
Step:13, Action:South
State  183
Old Q Values:  [ 22.25138791 164.8344678  734.05342691   0.        ]
New Q values:  [ 22.25138791  76.66719018 734.05342691   0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   37.77801019  -200.37659742 -1821.20831088  -251.53897752]
------
Step:14, Action:North
State  261
Old Q Values:  [   37.77801019  -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [   28.97560722  -200.37659742 -1821.20831088  -251.53897752]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    48.21467715 -1482.55814493  -180.6       ]
------
Step:15, Action:South
State  181
Old Q Values:  [   13.85659648    48.21467715 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    27.37855303 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
x g.x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   28.97560722  -200.37659742 -1821.20831088  -251.53897752]
------
Step:16, Action:North
State  261
Old Q Values:  [   28.97560722  -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [   19.2038088   -200.37659742 -1821.20831088  -251.53897752]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    27.37855303 -1482.55814493  -180.6       ]
------
Step:17, Action:South
State  181
Old Q Values:  [   13.85659648    27.37855303 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    16.11256385 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   19.2038088   -200.37659742 -1821.20831088  -251.53897752]
------
Step:18, Action:North
State  261
Old Q Values:  [   19.2038088   -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [  227.29755159  -200.37659742 -1821.20831088  -251.53897752]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 22.25138791  76.66719018 734.05342691   0.        ]
------
Step:19, Action:East
State  183
Old Q Values:  [ 22.25138791  76.66719018 734.05342691   0.        ]
New Q values:  [ 22.25138791  76.66719018 677.38036091   0.        ]
Reward: 9  Episode Reward:  31
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  1.20371620e+03]
------
Step:20, Action:East
State  195
Old Q Values:  [  38.85388605 3081.56358951 1388.1600337  2546.60363946]
New Q values:  [  38.85388605 3081.56358951  926.42940066 2546.60363946]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 634.70850259 1239.21795729 -180.6           3.52184257]
------
Step:21, Action:North
State  210
Old Q Values:  [ 634.70850259 1239.21795729 -180.6           3.52184257]
New Q values:  [ 1.53357262e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  39
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18220.41077038  3036.07289435  -180.00807518 50254.80919919]
------
Step:22, Action:West
State  130
Old Q Values:  [18220.41077038  3036.07289435  -180.00807518 50254.80919919]
New Q values:  [ 18220.41077038   3036.07289435   -180.00807518 110329.58172968]
Reward: 100009  Episode Reward:  100048
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  934.15671921  862.42400667 -272.09726687]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6        1539.20727394    0.            0.        ]
New Q values:  [-180.6         625.91667873    0.            0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    16.11256385 -1482.55814493  -180.6       ]
------
Step:2, Action:South
State  183
Old Q Values:  [ 22.25138791  76.66719018 677.38036091   0.        ]
New Q values:  [ 22.25138791 104.25614155 677.38036091   0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x ..x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  227.29755159  -200.37659742 -1821.20831088  -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [  227.29755159  -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [  293.53312891  -200.37659742 -1821.20831088  -251.53897752]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 22.25138791 104.25614155 677.38036091   0.        ]
------
Step:4, Action:East
State  183
Old Q Values:  [ 22.25138791 104.25614155 677.38036091   0.        ]
New Q values:  [ 22.25138791 104.25614155 394.98224959   0.        ]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01  3.95433684e+02  5.59303454e+01  0.00000000e+00]
------
Step:5, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  1.52923729e+03  1.82715847e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  1.52923729e+03  5.34840194e+03 -9.78728177e+01]
Reward: 9  Episode Reward:  35
xxxxx
x ..x
x gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15373.79515696   590.87277317 -1855.11188891 -3385.12952694]
------
Step:6, Action:North
State  208
Old Q Values:  [15373.79515696   590.87277317 -1855.11188891 -3385.12952694]
New Q values:  [39253.79258169   590.87277317 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  44
xxxxx
x .ax
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 18220.41077038   3036.07289435   -180.00807518 110329.58172968]
------
Step:7, Action:West
State  136
Old Q Values:  [ -724.71310357   869.78965576 -6245.61866138   180.39878297]
New Q values:  [ -724.71310357   869.78965576 -6245.61866138    94.39090975]
Reward: 9  Episode Reward:  53
xxxxx
x agx
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9726.26759903    56.10465519]
------
Step:8, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686         800.5360257   -6134.99019341]
New Q values:  [-10156.11771313  -5995.686         800.5360257   -2375.90118319]
Reward: -1  Episode Reward:  52
xxxxx
xag x
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   262.31631391 -8213.80649336  -180.6       ]
------
Step:9, Action:South
State  109
Old Q Values:  [ -241.10880094   262.31631391 -8213.80649336  -180.6       ]
New Q values:  [ -241.10880094   280.46595952 -8213.80649336  -180.6       ]
Reward: -1  Episode Reward:  51
xxxxx
x  gx
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   587.13144651 -4680.74267672  -244.98066897]
------
Step:10, Action:South
State  189
Old Q Values:  [    9.84673294   587.13144651 -4680.74267672  -244.98066897]
New Q values:  [    9.84673294   322.31251728 -4680.74267672  -244.98066897]
Reward: -1  Episode Reward:  50
xxxxx
x g x
x   x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  293.53312891  -200.37659742 -1821.20831088  -251.53897752]
------
Step:11, Action:North
State  260
Old Q Values:  [-4778.93737938 -6457.4598       227.14724233 -7094.93143822]
New Q values:  [-1556.03118328 -6457.4598       227.14724233 -7094.93143822]
Reward: -1  Episode Reward:  49
xxxxx
xg  x
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263   103.59077741  1187.14589491     0.        ]
------
Step:12, Action:East
State  188
Old Q Values:  [-6523.78898263   103.59077741  1187.14589491     0.        ]
New Q values:  [-6523.78898263   103.59077741  1687.41680659     0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xga x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  4043.86149542  715.61136736  181.20343395]
------
Step:13, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  1.52923729e+03  5.34840194e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  6.06726018e+04  5.34840194e+03 -9.78728177e+01]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x g x
x a x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   185.0229929    131.66896221]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   185.0229929    131.66896221]
New Q values:  [-2527.46239811 -8521.23367799   139.65600231   131.66896221]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1502.71955055 -8656.02923281 -7525.7277781    200.82268384]
------
Step:2, Action:West
State  288
Old Q Values:  [-1502.71955055 -8656.02923281 -7525.7277781    200.82268384]
New Q values:  [-1502.71955055 -8656.02923281 -7525.7277781    121.62587423]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   139.65600231   131.66896221]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   139.65600231   131.66896221]
New Q values:  [-2527.46239811 -8521.23367799    91.75016319   131.66896221]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1502.71955055 -8656.02923281 -7525.7277781    121.62587423]
------
Step:4, Action:West
State  288
Old Q Values:  [-1502.71955055 -8656.02923281 -7525.7277781    121.62587423]
New Q values:  [-1502.71955055 -8656.02923281 -7525.7277781     87.55103836]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x.g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799    91.75016319   131.66896221]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    91.75016319   131.66896221]
New Q values:  [-2527.46239811 -8521.23367799    91.75016319   140.12752356]
Reward: -1  Episode Reward:  5
xxxxx
x.g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  293.53312891  -200.37659742 -1821.20831088  -251.53897752]
------
Step:6, Action:North
State  261
Old Q Values:  [  293.53312891  -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [  127.64702072  -200.37659742 -1821.20831088  -251.53897752]
Reward: 9  Episode Reward:  14
xxxxx
x..gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    16.11256385 -1482.55814493  -180.6       ]
------
Step:7, Action:South
State  181
Old Q Values:  [   13.85659648    16.11256385 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    44.13913176 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  127.64702072  -200.37659742 -1821.20831088  -251.53897752]
------
Step:8, Action:North
State  261
Old Q Values:  [  127.64702072  -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [  168.95348316  -200.37659742 -1821.20831088  -251.53897752]
Reward: -1  Episode Reward:  12
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 22.25138791 104.25614155 394.98224959   0.        ]
------
Step:9, Action:East
State  183
Old Q Values:  [ 22.25138791 104.25614155 394.98224959   0.        ]
New Q values:  [ 22.25138791 104.25614155 541.75188998   0.        ]
Reward: 9  Episode Reward:  21
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.06176528e+02  1.26119663e+03  1.20371620e+03]
------
Step:10, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  1.26119663e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.06176528e+02  5.11059650e+03  1.20371620e+03]
Reward: 9  Episode Reward:  30
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.53357262e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:11, Action:North
State  210
Old Q Values:  [ 1.53357262e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 3.92385650e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  39
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 18220.41077038   3036.07289435   -180.00807518 110329.58172968]
------
Step:12, Action:West
State  130
Old Q Values:  [ 18220.41077038   3036.07289435   -180.00807518 110329.58172968]
New Q values:  [18220.41077038  3036.07289435  -180.00807518 74359.49074188]
Reward: 9  Episode Reward:  48
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6         -8280.92007422    112.85376583 100740.8601667 ]
------
Step:13, Action:West
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  9.88271127e+04]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.10351638e+05]
Reward: 100009  Episode Reward:  100057
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  6.06726018e+04  5.34840194e+03 -9.78728177e+01]
------
Step:1, Action:South
State  194
Old Q Values:  [-6.00000000e-01  1.06176528e+02  5.11059650e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.01505593e+02  5.11059650e+03  1.20371620e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6         512.11660633  470.60238833]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    91.75016319   140.12752356]
New Q values:  [-2527.46239811 -8521.23367799    68.36537678   140.12752356]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1502.71955055 -8656.02923281 -7525.7277781     87.55103836]
------
Step:3, Action:North
State  288
Old Q Values:  [-1502.71955055 -8656.02923281 -7525.7277781     87.55103836]
New Q values:  [11180.44995429 -8656.02923281 -7525.7277781     87.55103836]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[39253.79258169   590.87277317 -1855.11188891 -3385.12952694]
------
Step:4, Action:North
State  208
Old Q Values:  [39253.79258169   590.87277317 -1855.11188891 -3385.12952694]
New Q values:  [25501.32070383   590.87277317 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  36
xxxxx
x.gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[14546.31824237  1504.73148864 -8652.84       32648.01223719]
------
Step:5, Action:North
State  130
Old Q Values:  [18220.41077038  3036.07289435  -180.00807518 74359.49074188]
New Q values:  [29415.41153072  3036.07289435  -180.00807518 74359.49074188]
Reward: -301  Episode Reward:  -265
xxxxx
x..ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3036.07289435  -180.00807518 74359.49074188]
------
Step:6, Action:West
State  130
Old Q Values:  [29415.41153072  3036.07289435  -180.00807518 74359.49074188]
New Q values:  [29415.41153072  3036.07289435  -180.00807518 62854.68770072]
Reward: 9  Episode Reward:  -256
xxxxx
x.a x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.10351638e+05]
------
Step:7, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    80.21221393   300.78261813]
New Q values:  [ -253.44886264 -1902.20915811    80.21221393   178.97765173]
Reward: 9  Episode Reward:  -247
xxxxx
xa  x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573  177.54868161 -252.78192178]
------
Step:8, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573  177.54868161 -252.78192178]
New Q values:  [-252.35169558   11.28108573   89.87869734 -252.78192178]
Reward: -1  Episode Reward:  -248
xxxxx
x a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    64.58531952    64.86408231]
------
Step:9, Action:West
State  114
Old Q Values:  [  -180.6         -8280.92007422    112.85376583 100740.8601667 ]
New Q values:  [ -180.6        -8280.92007422   112.85376583 40379.58926936]
Reward: -1  Episode Reward:  -249
xxxxx
xa  x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SW
[  0.         279.48400894   0.           0.        ]
------
Step:10, Action:South
State  106
Old Q Values:  [ -180.6        -6000.6           81.02047099  -180.6       ]
New Q values:  [ -180.6        -8399.5673214     81.02047099  -180.6       ]
Reward: -10001  Episode Reward:  -10250
xxxxx
x   x
xg  x
x.  x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    44.13913176 -1482.55814493  -180.6       ]
------
Step:1, Action:South
State  181
Old Q Values:  [   13.85659648    44.13913176 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    73.74169765 -1482.55814493  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  168.95348316  -200.37659742 -1821.20831088  -251.53897752]
------
Step:2, Action:North
State  260
Old Q Values:  [-1556.03118328 -6457.4598       227.14724233 -7094.93143822]
New Q values:  [  221.386198   -6457.4598       227.14724233 -7094.93143822]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   517.10998324  2814.6622377      0.        ]
------
Step:3, Action:East
State  180
Old Q Values:  [-1367.02476015   517.10998324  2814.6622377      0.        ]
New Q values:  [-1367.02476015   517.10998324 19327.04543975     0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  6.06726018e+04  5.34840194e+03 -9.78728177e+01]
------
Step:4, Action:South
State  194
Old Q Values:  [-6.00000000e-01  2.01505593e+02  5.11059650e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.39637219e+02  5.11059650e+03  1.20371620e+03]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6         512.11660633  470.60238833]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    68.36537678   140.12752356]
New Q values:  [-2527.46239811 -8521.23367799  3386.881137     140.12752356]
Reward: 9  Episode Reward:  25
xxxxx
x...x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11180.44995429 -8656.02923281 -7525.7277781     87.55103836]
------
Step:6, Action:North
State  288
Old Q Values:  [11180.44995429 -8656.02923281 -7525.7277781     87.55103836]
New Q values:  [16249.14947668 -8656.02923281 -7525.7277781     87.55103836]
Reward: 9  Episode Reward:  34
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3.92385650e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:7, Action:North
State  210
Old Q Values:  [ 3.92385650e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 3.45572323e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  43
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3036.07289435  -180.00807518 62854.68770072]
------
Step:8, Action:West
State  130
Old Q Values:  [29415.41153072  3036.07289435  -180.00807518 62854.68770072]
New Q values:  [29415.41153072  3036.07289435  -180.00807518 37261.1518611 ]
Reward: 9  Episode Reward:  52
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6        -8280.92007422   112.85376583 40379.58926936]
------
Step:9, Action:West
State  114
Old Q Values:  [ -180.6        -8280.92007422   112.85376583 40379.58926936]
New Q values:  [ -180.6        -8280.92007422   112.85376583 86972.62863201]
Reward: 100009  Episode Reward:  100061
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  168.95348316  -200.37659742 -1821.20831088  -251.53897752]
------
Step:1, Action:North
State  260
Old Q Values:  [  221.386198   -6457.4598       227.14724233 -7094.93143822]
New Q values:  [  621.98044545 -6457.4598       227.14724233 -7094.93143822]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  1760.08655416     0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 22.25138791 104.25614155 541.75188998   0.        ]
New Q values:  [  22.25138791  104.25614155 1755.27970652    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.39637219e+02  5.11059650e+03  1.20371620e+03]
------
Step:3, Action:East
State  195
Old Q Values:  [  38.85388605 3081.56358951  926.42940066 2546.60363946]
New Q values:  [   38.85388605  3081.56358951 10743.14145132  2546.60363946]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3.45572323e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:4, Action:North
State  208
Old Q Values:  [25501.32070383   590.87277317 -1855.11188891 -3385.12952694]
New Q values:  [21384.27383986   590.87277317 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3036.07289435  -180.00807518 37261.1518611 ]
------
Step:5, Action:West
State  128
Old Q Values:  [14546.31824237  1504.73148864 -8652.84       32648.01223719]
New Q values:  [14546.31824237  1504.73148864 -8652.84       13144.00636441]
Reward: 9  Episode Reward:  45
xxxxx
x.agx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:6, Action:South
State  112
Old Q Values:  [    0.          1519.70944144 13004.59207411 84007.56      ]
New Q values:  [    0.         18809.06432124 13004.59207411 84007.56      ]
Reward: -1  Episode Reward:  44
xxxxx
x.g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  6.06726018e+04  5.34840194e+03 -9.78728177e+01]
------
Step:7, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  6.06726018e+04  5.34840194e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  2.52845051e+04  5.34840194e+03 -9.78728177e+01]
Reward: -1  Episode Reward:  43
xxxxx
x.  x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3386.881137     140.12752356]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3386.881137     140.12752356]
New Q values:  [-2527.46239811 -8521.23367799  6234.8972978    140.12752356]
Reward: 9  Episode Reward:  52
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16249.14947668 -8656.02923281 -7525.7277781     87.55103836]
------
Step:9, Action:North
State  288
Old Q Values:  [16249.14947668 -8656.02923281 -7525.7277781     87.55103836]
New Q values:  [12914.34194263 -8656.02923281 -7525.7277781     87.55103836]
Reward: -1  Episode Reward:  51
xxxxx
xg  x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21384.27383986   590.87277317 -1855.11188891 -3385.12952694]
------
Step:10, Action:North
State  208
Old Q Values:  [21384.27383986   590.87277317 -1855.11188891 -3385.12952694]
New Q values:  [19731.45509427   590.87277317 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  50
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3036.07289435  -180.00807518 37261.1518611 ]
------
Step:11, Action:West
State  128
Old Q Values:  [14546.31824237  1504.73148864 -8652.84       13144.00636441]
New Q values:  [14546.31824237  1504.73148864 -8652.84       30459.27054576]
Reward: -1  Episode Reward:  49
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[    0.         18809.06432124 13004.59207411 84007.56      ]
------
Step:12, Action:South
State  114
Old Q Values:  [ -180.6        -8280.92007422   112.85376583 86972.62863201]
New Q values:  [ -180.6         4272.38349051   112.85376583 86972.62863201]
Reward: -1  Episode Reward:  48
xxxxx
x.  x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  2.52845051e+04  5.34840194e+03 -9.78728177e+01]
------
Step:13, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  2.52845051e+04  5.34840194e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  1.19836712e+04  5.34840194e+03 -9.78728177e+01]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6234.8972978    140.12752356]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6234.8972978    140.12752356]
New Q values:  [-2527.46239811 -8521.23367799  6367.66150191   140.12752356]
Reward: -1  Episode Reward:  46
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12914.34194263 -8656.02923281 -7525.7277781     87.55103836]
------
Step:15, Action:North
State  288
Old Q Values:  [12914.34194263 -8656.02923281 -7525.7277781     87.55103836]
New Q values:  [11084.57330533 -8656.02923281 -7525.7277781     87.55103836]
Reward: -1  Episode Reward:  45
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19731.45509427   590.87277317 -1855.11188891 -3385.12952694]
------
Step:16, Action:North
State  210
Old Q Values:  [ 3.45572323e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 2.50006385e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  44
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3036.07289435  -180.00807518 37261.1518611 ]
------
Step:17, Action:West
State  130
Old Q Values:  [29415.41153072  3036.07289435  -180.00807518 37261.1518611 ]
New Q values:  [29415.41153072  3036.07289435  -180.00807518 40995.64933404]
Reward: -1  Episode Reward:  43
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         4272.38349051   112.85376583 86972.62863201]
------
Step:18, Action:West
State  114
Old Q Values:  [ -180.6         4272.38349051   112.85376583 86972.62863201]
New Q values:  [  -180.6          4272.38349051    112.85376583 105609.84437707]
Reward: 100009  Episode Reward:  100052
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  1.19836712e+04  5.34840194e+03 -9.78728177e+01]
------
Step:1, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  1.19836712e+04  5.34840194e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  6.70916694e+03  5.34840194e+03 -9.78728177e+01]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6367.66150191   140.12752356]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6367.66150191   140.12752356]
New Q values:  [-2527.46239811 -8521.23367799  5877.83659236   140.12752356]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11084.57330533 -8656.02923281 -7525.7277781     87.55103836]
------
Step:3, Action:North
State  288
Old Q Values:  [11084.57330533 -8656.02923281 -7525.7277781     87.55103836]
New Q values:  [10358.66585042 -8656.02923281 -7525.7277781     87.55103836]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19731.45509427   590.87277317 -1855.11188891 -3385.12952694]
------
Step:4, Action:North
State  208
Old Q Values:  [19731.45509427   590.87277317 -1855.11188891 -3385.12952694]
New Q values:  [20196.67683792   590.87277317 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3036.07289435  -180.00807518 40995.64933404]
------
Step:5, Action:West
State  130
Old Q Values:  [29415.41153072  3036.07289435  -180.00807518 40995.64933404]
New Q values:  [29415.41153072  3036.07289435  -180.00807518 48086.61304674]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051    112.85376583 105609.84437707]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    64.58531952    64.86408231]
New Q values:  [ -281.736      -8877.87327254    64.58531952    55.65177422]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8399.5673214     81.02047099  -180.6       ]
------
Step:7, Action:East
State  98
Old Q Values:  [  0.         279.48400894   0.           0.        ]
New Q values:  [    0.           279.48400894 31682.35331312     0.        ]
Reward: -1  Episode Reward:  53
xxxxx
x a x
x   x
xg  x
xxxxx
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254    64.58531952    55.65177422]
New Q values:  [ -281.736      -8877.87327254   237.75424717    55.65177422]
Reward: -1  Episode Reward:  52
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         708.40039788 -180.6          81.69974738]
------
Step:9, Action:South
State  130
Old Q Values:  [29415.41153072  3036.07289435  -180.00807518 48086.61304674]
New Q values:  [29415.41153072  8714.02070166  -180.00807518 48086.61304674]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.50006385e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:10, Action:North
State  218
Old Q Values:  [   4.59845327 -610.30346672    0.          784.96322284]
New Q values:  [ 213.75950067 -610.30346672    0.          784.96322284]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         708.40039788 -180.6          81.69974738]
------
Step:11, Action:South
State  130
Old Q Values:  [29415.41153072  8714.02070166  -180.00807518 48086.61304674]
New Q values:  [29415.41153072 10985.19982458  -180.00807518 48086.61304674]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.50006385e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:12, Action:North
State  216
Old Q Values:  [  165.21241607  1086.66392038 -6170.35693855 -2387.54492731]
New Q values:  [  278.00508579  1086.66392038 -6170.35693855 -2387.54492731]
Reward: -1  Episode Reward:  48
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         708.40039788 -180.6          81.69974738]
------
Step:13, Action:South
State  130
Old Q Values:  [29415.41153072 10985.19982458  -180.00807518 48086.61304674]
New Q values:  [29415.41153072 11893.67147375  -180.00807518 48086.61304674]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.50006385e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:14, Action:North
State  216
Old Q Values:  [  278.00508579  1086.66392038 -6170.35693855 -2387.54492731]
New Q values:  [  323.12215368  1086.66392038 -6170.35693855 -2387.54492731]
Reward: -1  Episode Reward:  46
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         708.40039788 -180.6          81.69974738]
------
Step:15, Action:South
State  136
Old Q Values:  [ -724.71310357   869.78965576 -6245.61866138    94.39090975]
New Q values:  [ -724.71310357   673.31503842 -6245.61866138    94.39090975]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  323.12215368  1086.66392038 -6170.35693855 -2387.54492731]
------
Step:16, Action:South
State  216
Old Q Values:  [  323.12215368  1086.66392038 -6170.35693855 -2387.54492731]
New Q values:  [  323.12215368  3541.66532328 -6170.35693855 -2387.54492731]
Reward: -1  Episode Reward:  44
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10358.66585042 -8656.02923281 -7525.7277781     87.55103836]
------
Step:17, Action:North
State  288
Old Q Values:  [10358.66585042 -8656.02923281 -7525.7277781     87.55103836]
New Q values:  [ 5205.36593715 -8656.02923281 -7525.7277781     87.55103836]
Reward: -1  Episode Reward:  43
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  323.12215368  3541.66532328 -6170.35693855 -2387.54492731]
------
Step:18, Action:South
State  216
Old Q Values:  [  323.12215368  3541.66532328 -6170.35693855 -2387.54492731]
New Q values:  [  323.12215368  2977.67591046 -6170.35693855 -2387.54492731]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5205.36593715 -8656.02923281 -7525.7277781     87.55103836]
------
Step:19, Action:North
State  288
Old Q Values:  [ 5205.36593715 -8656.02923281 -7525.7277781     87.55103836]
New Q values:  [ 2974.849148   -8656.02923281 -7525.7277781     87.55103836]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  323.12215368  2977.67591046 -6170.35693855 -2387.54492731]
------
Step:20, Action:South
State  216
Old Q Values:  [  323.12215368  2977.67591046 -6170.35693855 -2387.54492731]
New Q values:  [  323.12215368  2082.92510858 -6170.35693855 -2387.54492731]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2974.849148   -8656.02923281 -7525.7277781     87.55103836]
------
Step:21, Action:North
State  288
Old Q Values:  [ 2974.849148   -8656.02923281 -7525.7277781     87.55103836]
New Q values:  [ 8689.53120312 -8656.02923281 -7525.7277781     87.55103836]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.50006385e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:22, Action:North
State  216
Old Q Values:  [  323.12215368  2082.92510858 -6170.35693855 -2387.54492731]
New Q values:  [  341.16898084  2082.92510858 -6170.35693855 -2387.54492731]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         708.40039788 -180.6          81.69974738]
------
Step:23, Action:South
State  130
Old Q Values:  [29415.41153072 11893.67147375  -180.00807518 48086.61304674]
New Q values:  [29415.41153072 12257.06013342  -180.00807518 48086.61304674]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.50006385e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:24, Action:North
State  218
Old Q Values:  [ 213.75950067 -610.30346672    0.          784.96322284]
New Q values:  [ 297.42391963 -610.30346672    0.          784.96322284]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         708.40039788 -180.6          81.69974738]
------
Step:25, Action:South
State  138
Old Q Values:  [-180.6         708.40039788 -180.6          81.69974738]
New Q values:  [-180.6         907.63769173 -180.6          81.69974738]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  341.16898084  2082.92510858 -6170.35693855 -2387.54492731]
------
Step:26, Action:South
State  216
Old Q Values:  [  341.16898084  2082.92510858 -6170.35693855 -2387.54492731]
New Q values:  [  341.16898084  3439.42940437 -6170.35693855 -2387.54492731]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8689.53120312 -8656.02923281 -7525.7277781     87.55103836]
------
Step:27, Action:North
State  288
Old Q Values:  [ 8689.53120312 -8656.02923281 -7525.7277781     87.55103836]
New Q values:  [ 4507.04130256 -8656.02923281 -7525.7277781     87.55103836]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  341.16898084  3439.42940437 -6170.35693855 -2387.54492731]
------
Step:28, Action:South
State  216
Old Q Values:  [  341.16898084  3439.42940437 -6170.35693855 -2387.54492731]
New Q values:  [  341.16898084  2727.28415251 -6170.35693855 -2387.54492731]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4507.04130256 -8656.02923281 -7525.7277781     87.55103836]
------
Step:29, Action:West
State  288
Old Q Values:  [ 4507.04130256 -8656.02923281 -7525.7277781     87.55103836]
New Q values:  [ 4507.04130256 -8656.02923281 -7525.7277781   3058.74153568]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549   682.66642689 10081.07040111]
------
Step:30, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549   682.66642689 10081.07040111]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  6.82666427e+02  7.18479484e+04]
Reward: 100009  Episode Reward:  100040
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x..ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         907.63769173 -180.6          81.69974738]
------
Step:1, Action:South
State  138
Old Q Values:  [-180.6         907.63769173 -180.6          81.69974738]
New Q values:  [-180.6        6427.45812807 -180.6          81.69974738]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20196.67683792   590.87277317 -1855.11188891 -3385.12952694]
------
Step:2, Action:North
State  208
Old Q Values:  [20196.67683792   590.87277317 -1855.11188891 -3385.12952694]
New Q values:  [10006.30817359   590.87277317 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.g x
x ..x
xxxxx
Step:3, Action:West
State  138
Old Q Values:  [-180.6        6427.45812807 -180.6          81.69974738]
New Q values:  [-180.6        6427.45812807 -180.6         109.4061731 ]
Reward: 9  Episode Reward:  17
xxxxx
x.a x
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   237.75424717    55.65177422]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    80.21221393   178.97765173]
New Q values:  [ -253.44886264 -1902.20915811  1959.72232399   178.97765173]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x.. x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        6427.45812807 -180.6         109.4061731 ]
------
Step:5, Action:South
State  138
Old Q Values:  [-180.6        6427.45812807 -180.6         109.4061731 ]
New Q values:  [ -180.6        10070.57479515  -180.6          109.4061731 ]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x..ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.50006385e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:6, Action:North
State  210
Old Q Values:  [ 2.50006385e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 1.30208278e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        10070.57479515  -180.6          109.4061731 ]
------
Step:7, Action:South
State  138
Old Q Values:  [ -180.6        10070.57479515  -180.6          109.4061731 ]
New Q values:  [-180.6        7029.52237013 -180.6         109.4061731 ]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10006.30817359   590.87277317 -1855.11188891 -3385.12952694]
------
Step:8, Action:North
State  208
Old Q Values:  [10006.30817359   590.87277317 -1855.11188891 -3385.12952694]
New Q values:  [ 6110.77998048   590.87277317 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  12
xxxxx
x. ax
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        7029.52237013 -180.6         109.4061731 ]
------
Step:9, Action:South
State  138
Old Q Values:  [-180.6        7029.52237013 -180.6         109.4061731 ]
New Q values:  [-180.6        6717.45729718 -180.6         109.4061731 ]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
x..ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.30208278e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
------
Step:10, Action:North
State  210
Old Q Values:  [ 1.30208278e+04  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 7.22296832e+03  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  10
xxxxx
x. ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        6717.45729718 -180.6         109.4061731 ]
------
Step:11, Action:South
State  138
Old Q Values:  [-180.6        6717.45729718 -180.6         109.4061731 ]
New Q values:  [-180.6        4519.61691302 -180.6         109.4061731 ]
Reward: -1  Episode Reward:  9
xxxxx
x.  x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6110.77998048   590.87277317 -1855.11188891 -3385.12952694]
------
Step:12, Action:North
State  208
Old Q Values:  [ 6110.77998048   590.87277317 -1855.11188891 -3385.12952694]
New Q values:  [ 2645.70650372   590.87277317 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  8
xxxxx
xg ax
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   673.31503842 -6245.61866138    94.39090975]
------
Step:13, Action:South
State  136
Old Q Values:  [ -724.71310357   673.31503842 -6245.61866138    94.39090975]
New Q values:  [ -724.71310357  1062.43796648 -6245.61866138    94.39090975]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2645.70650372   590.87277317 -1855.11188891 -3385.12952694]
------
Step:14, Action:North
State  208
Old Q Values:  [ 2645.70650372   590.87277317 -1855.11188891 -3385.12952694]
New Q values:  [ 1376.41399143   590.87277317 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  6
xxxxx
xg ax
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1062.43796648 -6245.61866138    94.39090975]
------
Step:15, Action:South
State  136
Old Q Values:  [ -724.71310357  1062.43796648 -6245.61866138    94.39090975]
New Q values:  [ -724.71310357   837.29938402 -6245.61866138    94.39090975]
Reward: -1  Episode Reward:  5
xxxxx
x.g x
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1376.41399143   590.87277317 -1855.11188891 -3385.12952694]
------
Step:16, Action:North
State  208
Old Q Values:  [ 1376.41399143   590.87277317 -1855.11188891 -3385.12952694]
New Q values:  [-5198.84458822   590.87277317 -1855.11188891 -3385.12952694]
Reward: -10001  Episode Reward:  -9996
xxxxx
x. gx
x.. x
x ..x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5877.83659236   140.12752356]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5877.83659236   140.12752356]
New Q values:  [-2527.46239811 -8521.23367799  3708.64702771   140.12752356]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4507.04130256 -8656.02923281 -7525.7277781   3058.74153568]
------
Step:2, Action:North
State  288
Old Q Values:  [ 4507.04130256 -8656.02923281 -7525.7277781   3058.74153568]
New Q values:  [ 1985.47835297 -8656.02923281 -7525.7277781   3058.74153568]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5198.84458822   590.87277317 -1855.11188891 -3385.12952694]
------
Step:3, Action:South
State  210
Old Q Values:  [ 7.22296832e+03  1.23921796e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 7.22296832e+03  1.41270964e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x . x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1985.47835297 -8656.02923281 -7525.7277781   3058.74153568]
------
Step:4, Action:West
State  288
Old Q Values:  [ 1985.47835297 -8656.02923281 -7525.7277781   3058.74153568]
New Q values:  [ 1985.47835297 -8656.02923281 -7525.7277781   2335.49072258]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3708.64702771   140.12752356]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3708.64702771   140.12752356]
New Q values:  [-2527.46239811 -8521.23367799  2183.50602786   140.12752356]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1985.47835297 -8656.02923281 -7525.7277781   2335.49072258]
------
Step:6, Action:West
State  288
Old Q Values:  [ 1985.47835297 -8656.02923281 -7525.7277781   2335.49072258]
New Q values:  [ 1985.47835297 -8656.02923281 -7525.7277781   1588.64809739]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2183.50602786   140.12752356]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2183.50602786   140.12752356]
New Q values:  [-2527.46239811 -8521.23367799  1468.44591704   140.12752356]
Reward: -1  Episode Reward:  13
xxxxx
xg..x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1985.47835297 -8656.02923281 -7525.7277781   1588.64809739]
------
Step:8, Action:North
State  288
Old Q Values:  [ 1985.47835297 -8656.02923281 -7525.7277781   1588.64809739]
New Q values:  [  970.85317314 -8656.02923281 -7525.7277781   1588.64809739]
Reward: -1  Episode Reward:  12
xxxxx
x...x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5198.84458822   590.87277317 -1855.11188891 -3385.12952694]
------
Step:9, Action:South
State  208
Old Q Values:  [-5198.84458822   590.87277317 -1855.11188891 -3385.12952694]
New Q values:  [-5198.84458822   712.34353848 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  11
xxxxx
xg..x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  970.85317314 -8656.02923281 -7525.7277781   1588.64809739]
------
Step:10, Action:West
State  288
Old Q Values:  [  970.85317314 -8656.02923281 -7525.7277781   1588.64809739]
New Q values:  [  970.85317314 -8656.02923281 -7525.7277781   1075.39301407]
Reward: -1  Episode Reward:  10
xxxxx
x.g.x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1468.44591704   140.12752356]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1468.44591704   140.12752356]
New Q values:  [-2527.46239811 -8521.23367799   909.39627103   140.12752356]
Reward: -1  Episode Reward:  9
xxxxx
xg..x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  970.85317314 -8656.02923281 -7525.7277781   1075.39301407]
------
Step:12, Action:West
State  288
Old Q Values:  [  970.85317314 -8656.02923281 -7525.7277781   1075.39301407]
New Q values:  [  970.85317314 -8656.02923281 -7525.7277781    702.37608694]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   909.39627103   140.12752356]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   909.39627103   140.12752356]
New Q values:  [-2527.46239811 -8521.23367799   654.41446036   140.12752356]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  970.85317314 -8656.02923281 -7525.7277781    702.37608694]
------
Step:14, Action:North
State  288
Old Q Values:  [  970.85317314 -8656.02923281 -7525.7277781    702.37608694]
New Q values:  [  601.4443308  -8656.02923281 -7525.7277781    702.37608694]
Reward: -1  Episode Reward:  6
xxxxx
x.g.x
x .ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5198.84458822   712.34353848 -1855.11188891 -3385.12952694]
------
Step:15, Action:South
State  208
Old Q Values:  [-5198.84458822   712.34353848 -1855.11188891 -3385.12952694]
New Q values:  [-5198.84458822   495.05024148 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  5
xxxxx
x..gx
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  601.4443308  -8656.02923281 -7525.7277781    702.37608694]
------
Step:16, Action:West
State  288
Old Q Values:  [  601.4443308  -8656.02923281 -7525.7277781    702.37608694]
New Q values:  [  601.4443308  -8656.02923281 -7525.7277781  21834.73496923]
Reward: -1  Episode Reward:  4
xxxxx
x...x
x .gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  6.82666427e+02  7.18479484e+04]
------
Step:17, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   654.41446036   140.12752356]
New Q values:  [-2527.46239811 -8521.23367799   654.41446036   112.13705437]
Reward: 9  Episode Reward:  13
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  168.95348316  -200.37659742 -1821.20831088  -251.53897752]
------
Step:18, Action:North
State  260
Old Q Values:  [  621.98044545 -6457.4598       227.14724233 -7094.93143822]
New Q values:  [   46.3058101  -6457.4598       227.14724233 -7094.93143822]
Reward: -10001  Episode Reward:  -9988
xxxxx
x...x
xg. x
x   x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    73.74169765 -1482.55814493  -180.6       ]
------
Step:1, Action:South
State  181
Old Q Values:  [   13.85659648    73.74169765 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    85.58272401 -1482.55814493  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  168.95348316  -200.37659742 -1821.20831088  -251.53897752]
------
Step:2, Action:North
State  261
Old Q Values:  [  168.95348316  -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [   92.65621047  -200.37659742 -1821.20831088  -251.53897752]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    85.58272401 -1482.55814493  -180.6       ]
------
Step:3, Action:South
State  181
Old Q Values:  [   13.85659648    85.58272401 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    61.42995274 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   92.65621047  -200.37659742 -1821.20831088  -251.53897752]
------
Step:4, Action:North
State  261
Old Q Values:  [   92.65621047  -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [   54.89147001  -200.37659742 -1821.20831088  -251.53897752]
Reward: -1  Episode Reward:  6
xxxxx
x..gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    61.42995274 -1482.55814493  -180.6       ]
------
Step:5, Action:South
State  181
Old Q Values:  [   13.85659648    61.42995274 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    40.4394221  -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  5
xxxxx
x.g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   54.89147001  -200.37659742 -1821.20831088  -251.53897752]
------
Step:6, Action:North
State  261
Old Q Values:  [   54.89147001  -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [   33.48841463  -200.37659742 -1821.20831088  -251.53897752]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    40.4394221  -1482.55814493  -180.6       ]
------
Step:7, Action:South
State  183
Old Q Values:  [  22.25138791  104.25614155 1755.27970652    0.        ]
New Q values:  [  22.25138791   51.14898101 1755.27970652    0.        ]
Reward: -1  Episode Reward:  3
xxxxx
x.. x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   33.48841463  -200.37659742 -1821.20831088  -251.53897752]
------
Step:8, Action:North
State  261
Old Q Values:  [   33.48841463  -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [  539.37927781  -200.37659742 -1821.20831088  -251.53897752]
Reward: -1  Episode Reward:  2
xxxxx
x.. x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791   51.14898101 1755.27970652    0.        ]
------
Step:9, Action:East
State  189
Old Q Values:  [    9.84673294   322.31251728 -4680.74267672  -244.98066897]
New Q values:  [   9.84673294  322.31251728 -549.80333104 -244.98066897]
Reward: 9  Episode Reward:  11
xxxxx
x.. x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816  4353.80449455]
------
Step:10, Action:South
State  195
Old Q Values:  [   38.85388605  3081.56358951 10743.14145132  2546.60363946]
New Q values:  [   38.85388605 22792.40997026 10743.14145132  2546.60363946]
Reward: 9  Episode Reward:  20
xxxxx
x.. x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  6.82666427e+02  7.18479484e+04]
------
Step:11, Action:West
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  6.82666427e+02  7.18479484e+04]
New Q values:  [   37.74111519  -168.92307549   682.66642689 28900.39316262]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  539.37927781  -200.37659742 -1821.20831088  -251.53897752]
------
Step:12, Action:North
State  261
Old Q Values:  [  539.37927781  -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [  227.28353775  -200.37659742 -1821.20831088  -251.53897752]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    40.4394221  -1482.55814493  -180.6       ]
------
Step:13, Action:South
State  180
Old Q Values:  [-1367.02476015   517.10998324 19327.04543975     0.        ]
New Q values:  [-1367.02476015   274.38816599 19327.04543975     0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[   46.3058101  -6457.4598       227.14724233 -7094.93143822]
------
Step:14, Action:East
State  261
Old Q Values:  [  227.28353775  -200.37659742 -1821.20831088  -251.53897752]
New Q values:  [ 227.28353775 -200.37659742 -532.75898624 -251.53897752]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   654.41446036   112.13705437]
------
Step:15, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549   682.66642689 28900.39316262]
New Q values:  [   37.74111519  -168.92307549  6828.88706152 28900.39316262]
Reward: 9  Episode Reward:  25
xxxxx
x.. x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  601.4443308  -8656.02923281 -7525.7277781  21834.73496923]
------
Step:16, Action:West
State  288
Old Q Values:  [  601.4443308  -8656.02923281 -7525.7277781  21834.73496923]
New Q values:  [  601.4443308  -8656.02923281 -7525.7277781  17403.41193648]
Reward: -1  Episode Reward:  24
xxxxx
x..gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  6828.88706152 28900.39316262]
------
Step:17, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   654.41446036   112.13705437]
New Q values:  [-2527.46239811 -8521.23367799   654.41446036  7854.37510949]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[26033.73429248  3481.80752105   750.46010822  2937.02145253]
------
Step:18, Action:North
State  260
Old Q Values:  [   46.3058101  -6457.4598       227.14724233 -7094.93143822]
New Q values:  [ 5816.03595597 -6457.4598       227.14724233 -7094.93143822]
Reward: -1  Episode Reward:  22
xxxxx
xg. x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   274.38816599 19327.04543975     0.        ]
------
Step:19, Action:East
State  177
Old Q Values:  [    0.          6260.18473843 84173.98802242     0.        ]
New Q values:  [    0.          6260.18473843 35681.74529009     0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x.g x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  6.70916694e+03  5.34840194e+03 -9.78728177e+01]
------
Step:20, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  6.70916694e+03  5.34840194e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  5.03937931e+03  5.34840194e+03 -9.78728177e+01]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   654.41446036  7854.37510949]
------
Step:21, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   654.41446036  7854.37510949]
New Q values:  [-2527.46239811 -8521.23367799   654.41446036 10951.27033154]
Reward: -1  Episode Reward:  19
xxxxx
x.g x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[26033.73429248  3481.80752105   750.46010822  2937.02145253]
------
Step:22, Action:North
State  260
Old Q Values:  [ 5816.03595597 -6457.4598       227.14724233 -7094.93143822]
New Q values:  [ 8123.92801431 -6457.4598       227.14724233 -7094.93143822]
Reward: -1  Episode Reward:  18
xxxxx
xg. x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   274.38816599 19327.04543975     0.        ]
------
Step:23, Action:East
State  180
Old Q Values:  [-1367.02476015   274.38816599 19327.04543975     0.        ]
New Q values:  [-1367.02476015   274.38816599  9334.73875694     0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  5.03937931e+03  5.34840194e+03 -9.78728177e+01]
------
Step:24, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  5.03937931e+03  5.34840194e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  5.03937931e+03  2.29327585e+03 -9.78728177e+01]
Reward: 9  Episode Reward:  26
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5198.84458822   495.05024148 -1855.11188891 -3385.12952694]
------
Step:25, Action:South
State  208
Old Q Values:  [-5198.84458822   495.05024148 -1855.11188891 -3385.12952694]
New Q values:  [-5198.84458822  5418.44367753 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  601.4443308  -8656.02923281 -7525.7277781  17403.41193648]
------
Step:26, Action:West
State  288
Old Q Values:  [  601.4443308  -8656.02923281 -7525.7277781  17403.41193648]
New Q values:  [  601.4443308  -8656.02923281 -7525.7277781  15630.88272338]
Reward: -1  Episode Reward:  24
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  6828.88706152 28900.39316262]
------
Step:27, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  6828.88706152 28900.39316262]
New Q values:  [   37.74111519  -168.92307549  6828.88706152 19369.67755279]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[26033.73429248  3481.80752105   750.46010822  2937.02145253]
------
Step:28, Action:North
State  257
Old Q Values:  [26033.73429248  3481.80752105   750.46010822  2937.02145253]
New Q values:  [33298.86443271  3481.80752105   750.46010822  2937.02145253]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.          4614.46100011 76286.56905238     0.        ]
------
Step:29, Action:East
State  177
Old Q Values:  [    0.          6260.18473843 35681.74529009     0.        ]
New Q values:  [    0.          6260.18473843 15589.19185568     0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816  4353.80449455]
------
Step:30, Action:South
State  195
Old Q Values:  [   38.85388605 22792.40997026 10743.14145132  2546.60363946]
New Q values:  [   38.85388605 14927.26725394 10743.14145132  2546.60363946]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  6828.88706152 19369.67755279]
------
Step:31, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  6828.88706152 19369.67755279]
New Q values:  [   37.74111519  -168.92307549  6828.88706152 17736.93035093]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[33298.86443271  3481.80752105   750.46010822  2937.02145253]
------
Step:32, Action:North
State  257
Old Q Values:  [33298.86443271  3481.80752105   750.46010822  2937.02145253]
New Q values:  [36204.9164888   3481.80752105   750.46010822  2937.02145253]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.          4614.46100011 76286.56905238     0.        ]
------
Step:33, Action:East
State  177
Old Q Values:  [    0.          6260.18473843 15589.19185568     0.        ]
New Q values:  [   0.         6260.18473843 7552.17048191    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816  4353.80449455]
------
Step:34, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  5.03937931e+03  2.29327585e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  5.30053282e+03  2.29327585e+03 -9.78728177e+01]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   654.41446036 10951.27033154]
------
Step:35, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   654.41446036 10951.27033154]
New Q values:  [-2527.46239811 -8521.23367799   654.41446036 12140.66646858]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[25869.19445322     0.          2253.00678683   644.94785455]
------
Step:36, Action:East
State  256
Old Q Values:  [25869.19445322     0.          2253.00678683   644.94785455]
New Q values:  [25869.19445322     0.          4542.80265531   644.94785455]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   654.41446036 12140.66646858]
------
Step:37, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   654.41446036 12140.66646858]
New Q values:  [-2527.46239811 -8521.23367799  4950.43060116 12140.66646858]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  601.4443308  -8656.02923281 -7525.7277781  15630.88272338]
------
Step:38, Action:West
State  288
Old Q Values:  [  601.4443308  -8656.02923281 -7525.7277781  15630.88272338]
New Q values:  [  601.4443308  -8656.02923281 -7525.7277781   9893.95302993]
Reward: -1  Episode Reward:  12
xxxxx
x.. x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4950.43060116 12140.66646858]
------
Step:39, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4950.43060116 12140.66646858]
New Q values:  [-2527.46239811 -8521.23367799  4947.75814944 12140.66646858]
Reward: -1  Episode Reward:  11
xxxxx
x.. x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  601.4443308  -8656.02923281 -7525.7277781   9893.95302993]
------
Step:40, Action:West
State  288
Old Q Values:  [  601.4443308  -8656.02923281 -7525.7277781   9893.95302993]
New Q values:  [  601.4443308  -8656.02923281 -7525.7277781   7599.18115255]
Reward: -1  Episode Reward:  10
xxxxx
x.. x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4947.75814944 12140.66646858]
------
Step:41, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4947.75814944 12140.66646858]
New Q values:  [-2527.46239811 -8521.23367799  4947.75814944 15717.14153407]
Reward: -1  Episode Reward:  9
xxxxx
x.. x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[36204.9164888   3481.80752105   750.46010822  2937.02145253]
------
Step:42, Action:North
State  257
Old Q Values:  [36204.9164888   3481.80752105   750.46010822  2937.02145253]
New Q values:  [37367.33731123  3481.80752105   750.46010822  2937.02145253]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.          4614.46100011 76286.56905238     0.        ]
------
Step:43, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 76286.56905238     0.        ]
New Q values:  [    0.          4614.46100011 32047.20657148     0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.39637219e+02  5.11059650e+03  1.20371620e+03]
------
Step:44, Action:East
State  195
Old Q Values:  [   38.85388605 14927.26725394 10743.14145132  2546.60363946]
New Q values:  [   38.85388605 14927.26725394  6463.54707693  2546.60363946]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 7.22296832e+03  1.41270964e+03 -1.80600000e+02  3.52184257e+00]
------
Step:45, Action:North
State  210
Old Q Values:  [ 7.22296832e+03  1.41270964e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 1.73145712e+04  1.41270964e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072 12257.06013342  -180.00807518 48086.61304674]
------
Step:46, Action:West
State  130
Old Q Values:  [29415.41153072 12257.06013342  -180.00807518 48086.61304674]
New Q values:  [29415.41153072 12257.06013342  -180.00807518 50922.99853181]
Reward: 9  Episode Reward:  14
xxxxx
x.a x
x g x
x   x
xxxxx
Step:47, Action:West
State  112
Old Q Values:  [    0.         18809.06432124 13004.59207411 84007.56      ]
New Q values:  [    0.         18809.06432124 13004.59207411 93608.424     ]
Reward: 100009  Episode Reward:  100023
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  934.15671921  862.42400667 -272.09726687]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6         625.91667873    0.            0.        ]
New Q values:  [-180.6         782.35058345    0.            0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791   51.14898101 1755.27970652    0.        ]
------
Step:2, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  1760.08655416     0.        ]
New Q values:  [    0.         -5969.29177534   828.06472689     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01  3.95433684e+02  5.59303454e+01  0.00000000e+00]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144  3921.76849592   174.55451539     0.        ]
New Q values:  [-2469.90645144  6289.24985859   174.55451539     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4947.75814944 15717.14153407]
------
Step:4, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6         512.11660633  470.60238833]
New Q values:  [   16.82637525  -180.6          512.11660633 -3369.18064038]
Reward: -9991  Episode Reward:  -9964
xxxxx
x ..x
x  .x
xg  x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  6828.88706152 17736.93035093]
------
Step:1, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  6828.88706152 17736.93035093]
New Q values:  [  37.74111519 -168.92307549 6828.88706152 7168.3572017 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 227.28353775 -200.37659742 -532.75898624 -251.53897752]
------
Step:2, Action:North
State  261
Old Q Values:  [ 227.28353775 -200.37659742 -532.75898624 -251.53897752]
New Q values:  [ 108.44524173 -200.37659742 -532.75898624 -251.53897752]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    40.4394221  -1482.55814493  -180.6       ]
------
Step:3, Action:South
State  181
Old Q Values:  [   13.85659648    40.4394221  -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    48.10934136 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 108.44524173 -200.37659742 -532.75898624 -251.53897752]
------
Step:4, Action:North
State  260
Old Q Values:  [ 8123.92801431 -6457.4598       227.14724233 -7094.93143822]
New Q values:  [ 6049.39283281 -6457.4598       227.14724233 -7094.93143822]
Reward: -1  Episode Reward:  16
xxxxx
xg..x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   274.38816599  9334.73875694     0.        ]
------
Step:5, Action:East
State  180
Old Q Values:  [-1367.02476015   274.38816599  9334.73875694     0.        ]
New Q values:  [-1367.02476015   274.38816599  5329.45534954     0.        ]
Reward: 9  Episode Reward:  25
xxxxx
x...x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  5.30053282e+03  2.29327585e+03 -9.78728177e+01]
------
Step:6, Action:South
State  194
Old Q Values:  [-6.00000000e-01  2.39637219e+02  5.11059650e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  4.81039735e+03  5.11059650e+03  1.20371620e+03]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4947.75814944 15717.14153407]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4947.75814944 15717.14153407]
New Q values:  [-2527.46239811 -8521.23367799  4258.25760554 15717.14153407]
Reward: -1  Episode Reward:  23
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  601.4443308  -8656.02923281 -7525.7277781   7599.18115255]
------
Step:8, Action:West
State  288
Old Q Values:  [  601.4443308  -8656.02923281 -7525.7277781   7599.18115255]
New Q values:  [  601.4443308  -8656.02923281 -7525.7277781   3192.70744292]
Reward: -1  Episode Reward:  22
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525  -180.6          512.11660633 -3369.18064038]
------
Step:9, Action:East
State  276
Old Q Values:  [   16.82637525  -180.6          512.11660633 -3369.18064038]
New Q values:  [   16.82637525  -180.6         1162.05887541 -3369.18064038]
Reward: -1  Episode Reward:  21
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  601.4443308  -8656.02923281 -7525.7277781   3192.70744292]
------
Step:10, Action:West
State  288
Old Q Values:  [  601.4443308  -8656.02923281 -7525.7277781   3192.70744292]
New Q values:  [  601.4443308  -8656.02923281 -7525.7277781   3426.99013768]
Reward: -1  Episode Reward:  20
xxxxx
x..gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 6828.88706152 7168.3572017 ]
------
Step:11, Action:West
State  276
Old Q Values:  [   16.82637525  -180.6         1162.05887541 -3369.18064038]
New Q values:  [   16.82637525  -180.6         1162.05887541 -1315.73868363]
Reward: -1  Episode Reward:  19
xxxxx
x.g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 108.44524173 -200.37659742 -532.75898624 -251.53897752]
------
Step:12, Action:North
State  261
Old Q Values:  [ 108.44524173 -200.37659742 -532.75898624 -251.53897752]
New Q values:  [  57.2108991  -200.37659742 -532.75898624 -251.53897752]
Reward: -1  Episode Reward:  18
xxxxx
x..gx
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    48.10934136 -1482.55814493  -180.6       ]
------
Step:13, Action:South
State  181
Old Q Values:  [   13.85659648    48.10934136 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    35.80700627 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  57.2108991  -200.37659742 -532.75898624 -251.53897752]
------
Step:14, Action:North
State  260
Old Q Values:  [ 6049.39283281 -6457.4598       227.14724233 -7094.93143822]
New Q values:  [ 4017.99373798 -6457.4598       227.14724233 -7094.93143822]
Reward: -1  Episode Reward:  16
xxxxx
xg..x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   274.38816599  5329.45534954     0.        ]
------
Step:15, Action:East
State  180
Old Q Values:  [-1367.02476015   274.38816599  5329.45534954     0.        ]
New Q values:  [-1367.02476015   274.38816599  4017.95709739     0.        ]
Reward: -1  Episode Reward:  15
xxxxx
xg..x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6289.24985859   174.55451539     0.        ]
------
Step:16, Action:South
State  196
Old Q Values:  [-2469.90645144  6289.24985859   174.55451539     0.        ]
New Q values:  [-2469.90645144  2863.71760606   174.55451539     0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x.g.x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525  -180.6         1162.05887541 -1315.73868363]
------
Step:17, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 6828.88706152 7168.3572017 ]
New Q values:  [  37.74111519 -168.92307549 3759.05186591 7168.3572017 ]
Reward: -1  Episode Reward:  13
xxxxx
x..gx
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  601.4443308  -8656.02923281 -7525.7277781   3426.99013768]
------
Step:18, Action:West
State  288
Old Q Values:  [  601.4443308  -8656.02923281 -7525.7277781   3426.99013768]
New Q values:  [  601.4443308  -8656.02923281 -7525.7277781   3520.70321558]
Reward: -1  Episode Reward:  12
xxxxx
x..gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 3759.05186591 7168.3572017 ]
------
Step:19, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 3759.05186591 7168.3572017 ]
New Q values:  [  37.74111519 -168.92307549 3759.05186591 2883.90615041]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x  gx
xa  x
xxxxx
Step:20, Action:East
State  261
Old Q Values:  [  57.2108991  -200.37659742 -532.75898624 -251.53897752]
New Q values:  [  57.2108991  -200.37659742 4501.43886572 -251.53897752]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4258.25760554 15717.14153407]
------
Step:21, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 3759.05186591 2883.90615041]
New Q values:  [  37.74111519 -168.92307549 3759.05186591 2503.39411988]
Reward: -1  Episode Reward:  9
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  57.2108991  -200.37659742 4501.43886572 -251.53897752]
------
Step:22, Action:East
State  261
Old Q Values:  [  57.2108991  -200.37659742 4501.43886572 -251.53897752]
New Q values:  [  57.2108991  -200.37659742 2927.69110606 -251.53897752]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 3759.05186591 2503.39411988]
------
Step:23, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4258.25760554 15717.14153407]
New Q values:  [-2527.46239811 -8521.23367799  4258.25760554  7164.56394545]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  57.2108991  -200.37659742 2927.69110606 -251.53897752]
------
Step:24, Action:North
State  260
Old Q Values:  [ 4017.99373798 -6457.4598       227.14724233 -7094.93143822]
New Q values:  [ 1855.01691326 -6457.4598       227.14724233 -7094.93143822]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xa .x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   828.06472689     0.        ]
------
Step:25, Action:East
State  180
Old Q Values:  [-1367.02476015   274.38816599  4017.95709739     0.        ]
New Q values:  [-1367.02476015   274.38816599  3196.74268572     0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x...x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  5.30053282e+03  2.29327585e+03 -9.78728177e+01]
------
Step:26, Action:South
State  196
Old Q Values:  [-2469.90645144  2863.71760606   174.55451539     0.        ]
New Q values:  [-2469.90645144  1493.50470505   174.55451539     0.        ]
Reward: -1  Episode Reward:  4
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525  -180.6         1162.05887541 -1315.73868363]
------
Step:27, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4258.25760554  7164.56394545]
New Q values:  [-2527.46239811 -8521.23367799  2758.91400689  7164.56394545]
Reward: -1  Episode Reward:  3
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  601.4443308  -8656.02923281 -7525.7277781   3520.70321558]
------
Step:28, Action:West
State  288
Old Q Values:  [  601.4443308  -8656.02923281 -7525.7277781   3520.70321558]
New Q values:  [  601.4443308  -8656.02923281 -7525.7277781   3557.05046987]
Reward: -1  Episode Reward:  2
xxxxx
x...x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2758.91400689  7164.56394545]
------
Step:29, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2758.91400689  7164.56394545]
New Q values:  [-2527.46239811 -8521.23367799  2170.08074372  7164.56394545]
Reward: -1  Episode Reward:  1
xxxxx
x...x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  601.4443308  -8656.02923281 -7525.7277781   3557.05046987]
------
Step:30, Action:North
State  288
Old Q Values:  [  601.4443308  -8656.02923281 -7525.7277781   3557.05046987]
New Q values:  [ 1871.51083558 -8656.02923281 -7525.7277781   3557.05046987]
Reward: 9  Episode Reward:  10
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5198.84458822  5418.44367753 -1855.11188891 -3385.12952694]
------
Step:31, Action:South
State  210
Old Q Values:  [ 1.73145712e+04  1.41270964e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 1.73145712e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  9
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1871.51083558 -8656.02923281 -7525.7277781   3557.05046987]
------
Step:32, Action:North
State  288
Old Q Values:  [ 1871.51083558 -8656.02923281 -7525.7277781   3557.05046987]
New Q values:  [ 5942.375707   -8656.02923281 -7525.7277781   3557.05046987]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.73145712e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:33, Action:North
State  210
Old Q Values:  [ 1.73145712e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 2.22081281e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  17
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072 12257.06013342  -180.00807518 50922.99853181]
------
Step:34, Action:West
State  130
Old Q Values:  [29415.41153072 12257.06013342  -180.00807518 50922.99853181]
New Q values:  [29415.41153072 12257.06013342  -180.00807518 53480.09081669]
Reward: 9  Episode Reward:  26
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.10351638e+05]
------
Step:35, Action:West
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.10351638e+05]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.14961448e+05]
Reward: 100009  Episode Reward:  100035
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2170.08074372  7164.56394545]
------
Step:1, Action:West
State  276
Old Q Values:  [   16.82637525  -180.6         1162.05887541 -1315.73868363]
New Q values:  [  16.82637525 -180.6        1162.05887541   35.60960053]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1855.01691326 -6457.4598       227.14724233 -7094.93143822]
------
Step:2, Action:North
State  260
Old Q Values:  [ 1855.01691326 -6457.4598       227.14724233 -7094.93143822]
New Q values:  [-4299.57042898 -6457.4598       227.14724233 -7094.93143822]
Reward: -10001  Episode Reward:  -9992
xxxxx
x...x
xg..x
x  .x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1959.72232399   178.97765173]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   237.75424717    55.65177422]
New Q values:  [ -281.736      -8877.87327254  1456.38677277    55.65177422]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        4519.61691302 -180.6         109.4061731 ]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6        4519.61691302 -180.6         109.4061731 ]
New Q values:  [-180.6        8475.68518218 -180.6         109.4061731 ]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.22081281e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [ 2.22081281e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 1.14253568e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        8475.68518218 -180.6         109.4061731 ]
------
Step:4, Action:South
State  138
Old Q Values:  [-180.6        8475.68518218 -180.6         109.4061731 ]
New Q values:  [-180.6        6817.28110605 -180.6         109.4061731 ]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.14253568e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:5, Action:North
State  210
Old Q Values:  [ 1.14253568e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 6.61472704e+03  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        6817.28110605 -180.6         109.4061731 ]
------
Step:6, Action:South
State  138
Old Q Values:  [-180.6        6817.28110605 -180.6         109.4061731 ]
New Q values:  [-180.6        4710.73055524 -180.6         109.4061731 ]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6.61472704e+03  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:7, Action:North
State  210
Old Q Values:  [ 6.61472704e+03  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 4.05850998e+03  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        4710.73055524 -180.6         109.4061731 ]
------
Step:8, Action:South
State  138
Old Q Values:  [-180.6        4710.73055524 -180.6         109.4061731 ]
New Q values:  [-180.6        3509.22532536 -180.6         109.4061731 ]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5198.84458822  5418.44367753 -1855.11188891 -3385.12952694]
------
Step:9, Action:South
State  208
Old Q Values:  [-5198.84458822  5418.44367753 -1855.11188891 -3385.12952694]
New Q values:  [-5198.84458822  3949.49018311 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  11
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5942.375707   -8656.02923281 -7525.7277781   3557.05046987]
------
Step:10, Action:North
State  288
Old Q Values:  [ 5942.375707   -8656.02923281 -7525.7277781   3557.05046987]
New Q values:  [ 3561.19733773 -8656.02923281 -7525.7277781   3557.05046987]
Reward: -1  Episode Reward:  10
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5198.84458822  3949.49018311 -1855.11188891 -3385.12952694]
------
Step:11, Action:South
State  208
Old Q Values:  [-5198.84458822  3949.49018311 -1855.11188891 -3385.12952694]
New Q values:  [-5198.84458822  2647.55527457 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  9
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3561.19733773 -8656.02923281 -7525.7277781   3557.05046987]
------
Step:12, Action:North
State  288
Old Q Values:  [ 3561.19733773 -8656.02923281 -7525.7277781   3557.05046987]
New Q values:  [ 2641.43193019 -8656.02923281 -7525.7277781   3557.05046987]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.05850998e+03  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:13, Action:North
State  210
Old Q Values:  [ 4.05850998e+03  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [2675.57159107 1631.59899841 -180.6           3.52184257]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        3509.22532536 -180.6         109.4061731 ]
------
Step:14, Action:South
State  138
Old Q Values:  [-180.6        3509.22532536 -180.6         109.4061731 ]
New Q values:  [-180.6        2205.76160746 -180.6         109.4061731 ]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2675.57159107 1631.59899841 -180.6           3.52184257]
------
Step:15, Action:North
State  210
Old Q Values:  [2675.57159107 1631.59899841 -180.6           3.52184257]
New Q values:  [1731.35711867 1631.59899841 -180.6           3.52184257]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2205.76160746 -180.6         109.4061731 ]
------
Step:16, Action:South
State  138
Old Q Values:  [-180.6        2205.76160746 -180.6         109.4061731 ]
New Q values:  [-180.6        1675.97122536 -180.6         109.4061731 ]
Reward: -1  Episode Reward:  4
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-5198.84458822  2647.55527457 -1855.11188891 -3385.12952694]
------
Step:17, Action:South
State  208
Old Q Values:  [-5198.84458822  2647.55527457 -1855.11188891 -3385.12952694]
New Q values:  [-5198.84458822  2125.53725079 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  3
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2641.43193019 -8656.02923281 -7525.7277781   3557.05046987]
------
Step:18, Action:West
State  288
Old Q Values:  [ 2641.43193019 -8656.02923281 -7525.7277781   3557.05046987]
New Q values:  [ 2641.43193019 -8656.02923281 -7525.7277781   3577.58937158]
Reward: 9  Episode Reward:  12
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2170.08074372  7164.56394545]
------
Step:19, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2170.08074372  7164.56394545]
New Q values:  [-2527.46239811 -8521.23367799  2170.08074372 10631.98391415]
Reward: 9  Episode Reward:  21
xxxxx
x.  x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[25869.19445322     0.          4542.80265531   644.94785455]
------
Step:20, Action:East
State  256
Old Q Values:  [25869.19445322     0.          4542.80265531   644.94785455]
New Q values:  [25869.19445322     0.          5006.11623637   644.94785455]
Reward: -1  Episode Reward:  20
xxxxx
xg  x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2170.08074372 10631.98391415]
------
Step:21, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2170.08074372 10631.98391415]
New Q values:  [-2527.46239811 -8521.23367799  2170.08074372 15462.39475903]
Reward: -1  Episode Reward:  19
xxxxx
x.g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[37367.33731123  3481.80752105   750.46010822  2937.02145253]
------
Step:22, Action:North
State  257
Old Q Values:  [37367.33731123  3481.80752105   750.46010822  2937.02145253]
New Q values:  [17217.98606907  3481.80752105   750.46010822  2937.02145253]
Reward: 9  Episode Reward:  28
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         6260.18473843 7552.17048191    0.        ]
------
Step:23, Action:South
State  177
Old Q Values:  [   0.         6260.18473843 7552.17048191    0.        ]
New Q values:  [   0.         7668.86971609 7552.17048191    0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x.g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[17217.98606907  3481.80752105   750.46010822  2937.02145253]
------
Step:24, Action:North
State  257
Old Q Values:  [17217.98606907  3481.80752105   750.46010822  2937.02145253]
New Q values:  [9187.25534245 3481.80752105  750.46010822 2937.02145253]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         7668.86971609 7552.17048191    0.        ]
------
Step:25, Action:South
State  177
Old Q Values:  [   0.         7668.86971609 7552.17048191    0.        ]
New Q values:  [   0.         5823.12448917 7552.17048191    0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[9187.25534245 3481.80752105  750.46010822 2937.02145253]
------
Step:26, Action:North
State  260
Old Q Values:  [-4299.57042898 -6457.4598       227.14724233 -7094.93143822]
New Q values:  [ -761.40536588 -6457.4598       227.14724233 -7094.93143822]
Reward: -1  Episode Reward:  24
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   274.38816599  3196.74268572     0.        ]
------
Step:27, Action:East
State  176
Old Q Values:  [   0.         1327.79507613 2267.17882       0.        ]
New Q values:  [   0.         1327.79507613 2502.43137476    0.        ]
Reward: 9  Episode Reward:  33
xxxxx
x.  x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  5.30053282e+03  2.29327585e+03 -9.78728177e+01]
------
Step:28, Action:South
State  194
Old Q Values:  [-6.00000000e-01  4.81039735e+03  5.11059650e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  6.56227737e+03  5.11059650e+03  1.20371620e+03]
Reward: -1  Episode Reward:  32
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2170.08074372 15462.39475903]
------
Step:29, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2170.08074372 15462.39475903]
New Q values:  [-2527.46239811 -8521.23367799  1940.70910896 15462.39475903]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2641.43193019 -8656.02923281 -7525.7277781   3577.58937158]
------
Step:30, Action:West
State  288
Old Q Values:  [ 2641.43193019 -8656.02923281 -7525.7277781   3577.58937158]
New Q values:  [ 2641.43193019 -8656.02923281 -7525.7277781   6069.15417634]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1940.70910896 15462.39475903]
------
Step:31, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1940.70910896 15462.39475903]
New Q values:  [-2527.46239811 -8521.23367799  2596.42989649 15462.39475903]
Reward: -1  Episode Reward:  29
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2641.43193019 -8656.02923281 -7525.7277781   6069.15417634]
------
Step:32, Action:West
State  288
Old Q Values:  [ 2641.43193019 -8656.02923281 -7525.7277781   6069.15417634]
New Q values:  [ 2641.43193019 -8656.02923281 -7525.7277781   7065.78009824]
Reward: -1  Episode Reward:  28
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2596.42989649 15462.39475903]
------
Step:33, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2596.42989649 15462.39475903]
New Q values:  [-2527.46239811 -8521.23367799  3157.70598807 15462.39475903]
Reward: -1  Episode Reward:  27
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2641.43193019 -8656.02923281 -7525.7277781   7065.78009824]
------
Step:34, Action:West
State  288
Old Q Values:  [ 2641.43193019 -8656.02923281 -7525.7277781   7065.78009824]
New Q values:  [ 2641.43193019 -8656.02923281 -7525.7277781   7464.43046701]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3157.70598807 15462.39475903]
------
Step:35, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 3759.05186591 2503.39411988]
New Q values:  [  37.74111519 -168.92307549 3759.05186591 3756.93425069]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[9187.25534245 3481.80752105  750.46010822 2937.02145253]
------
Step:36, Action:North
State  257
Old Q Values:  [9187.25534245 3481.80752105  750.46010822 2937.02145253]
New Q values:  [13288.46410843  3481.80752105   750.46010822  2937.02145253]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.          4614.46100011 32047.20657148     0.        ]
------
Step:37, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 32047.20657148     0.        ]
New Q values:  [    0.          4614.46100011 14786.96583865     0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  6.56227737e+03  5.11059650e+03  1.20371620e+03]
------
Step:38, Action:East
State  195
Old Q Values:  [   38.85388605 14927.26725394  6463.54707693  2546.60363946]
New Q values:  [   38.85388605 14927.26725394  3104.22596637  2546.60363946]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1731.35711867 1631.59899841 -180.6           3.52184257]
------
Step:39, Action:North
State  210
Old Q Values:  [1731.35711867 1631.59899841 -180.6           3.52184257]
New Q values:  [ 1.67359701e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  21
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072 12257.06013342  -180.00807518 53480.09081669]
------
Step:40, Action:West
State  130
Old Q Values:  [29415.41153072 12257.06013342  -180.00807518 53480.09081669]
New Q values:  [29415.41153072 12257.06013342  -180.00807518 55879.87076554]
Reward: -1  Episode Reward:  20
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.14961448e+05]
------
Step:41, Action:West
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.14961448e+05]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.16805372e+05]
Reward: 100009  Episode Reward:  100029
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  6.56227737e+03  5.11059650e+03  1.20371620e+03]
------
Step:1, Action:East
State  195
Old Q Values:  [   38.85388605 14927.26725394  3104.22596637  2546.60363946]
New Q values:  [   38.85388605 14927.26725394  6267.88141429  2546.60363946]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.67359701e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:2, Action:North
State  208
Old Q Values:  [-5198.84458822  2125.53725079 -1855.11188891 -3385.12952694]
New Q values:  [-1571.34646768  2125.53725079 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1675.97122536 -180.6         109.4061731 ]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6        1675.97122536 -180.6         109.4061731 ]
New Q values:  [-180.6        1675.97122536 -180.6         637.07916644]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1959.72232399   178.97765173]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  1456.38677277    55.65177422]
New Q values:  [ -281.736      -8877.87327254  1084.74607672    55.65177422]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1675.97122536 -180.6         637.07916644]
------
Step:5, Action:South
State  138
Old Q Values:  [-180.6        1675.97122536 -180.6         637.07916644]
New Q values:  [-180.6        5690.57951788 -180.6         637.07916644]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.67359701e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:6, Action:North
State  216
Old Q Values:  [  341.16898084  2727.28415251 -6170.35693855 -2387.54492731]
New Q values:  [ 1843.0414477   2727.28415251 -6170.35693855 -2387.54492731]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        5690.57951788 -180.6         637.07916644]
------
Step:7, Action:South
State  136
Old Q Values:  [ -724.71310357   837.29938402 -6245.61866138    94.39090975]
New Q values:  [ -724.71310357  1152.50499936 -6245.61866138    94.39090975]
Reward: -1  Episode Reward:  23
xxxxx
xg  x
x. ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1843.0414477   2727.28415251 -6170.35693855 -2387.54492731]
------
Step:8, Action:South
State  208
Old Q Values:  [-1571.34646768  2125.53725079 -1855.11188891 -3385.12952694]
New Q values:  [-1571.34646768  3094.94404042 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  32
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2641.43193019 -8656.02923281 -7525.7277781   7464.43046701]
------
Step:9, Action:West
State  288
Old Q Values:  [ 2641.43193019 -8656.02923281 -7525.7277781   7464.43046701]
New Q values:  [ 2641.43193019 -8656.02923281 -7525.7277781   7623.89061451]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3157.70598807 15462.39475903]
------
Step:10, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3157.70598807 15462.39475903]
New Q values:  [-2527.46239811 -8521.23367799  3157.70598807 10176.89713614]
Reward: 9  Episode Reward:  40
xxxxx
x.  x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[13288.46410843  3481.80752105   750.46010822  2937.02145253]
------
Step:11, Action:North
State  257
Old Q Values:  [13288.46410843  3481.80752105   750.46010822  2937.02145253]
New Q values:  [9756.87539497 3481.80752105  750.46010822 2937.02145253]
Reward: 9  Episode Reward:  49
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.          4614.46100011 14786.96583865     0.        ]
------
Step:12, Action:East
State  177
Old Q Values:  [   0.         5823.12448917 7552.17048191    0.        ]
New Q values:  [   0.         5823.12448917 4337.36193241    0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816  4353.80449455]
------
Step:13, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  5.30053282e+03  2.29327585e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  5.17268227e+03  2.29327585e+03 -9.78728177e+01]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3157.70598807 10176.89713614]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3157.70598807 10176.89713614]
New Q values:  [-2527.46239811 -8521.23367799  3157.70598807 11830.91719042]
Reward: -1  Episode Reward:  46
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[25869.19445322     0.          5006.11623637   644.94785455]
------
Step:15, Action:East
State  256
Old Q Values:  [25869.19445322     0.          5006.11623637   644.94785455]
New Q values:  [25869.19445322     0.          5551.12165167   644.94785455]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3157.70598807 11830.91719042]
------
Step:16, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3157.70598807 11830.91719042]
New Q values:  [-2527.46239811 -8521.23367799  3157.70598807 12492.52521214]
Reward: -1  Episode Reward:  44
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[25869.19445322     0.          5551.12165167   644.94785455]
------
Step:17, Action:East
State  256
Old Q Values:  [25869.19445322     0.          5551.12165167   644.94785455]
New Q values:  [25869.19445322     0.          5967.60622431   644.94785455]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3157.70598807 12492.52521214]
------
Step:18, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3157.70598807 12492.52521214]
New Q values:  [-2527.46239811 -8521.23367799  3157.70598807 12757.16842082]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[25869.19445322     0.          5967.60622431   644.94785455]
------
Step:19, Action:East
State  256
Old Q Values:  [25869.19445322     0.          5967.60622431   644.94785455]
New Q values:  [25869.19445322     0.          6213.59301597   644.94785455]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3157.70598807 12757.16842082]
------
Step:20, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3157.70598807 12757.16842082]
New Q values:  [-2527.46239811 -8521.23367799  3157.70598807 12863.0257043 ]
Reward: -1  Episode Reward:  40
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[25869.19445322     0.          6213.59301597   644.94785455]
------
Step:21, Action:East
State  256
Old Q Values:  [25869.19445322     0.          6213.59301597   644.94785455]
New Q values:  [25869.19445322     0.          6343.74491768   644.94785455]
Reward: -1  Episode Reward:  39
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3157.70598807 12863.0257043 ]
------
Step:22, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3157.70598807 12863.0257043 ]
New Q values:  [-2527.46239811 -8521.23367799  3549.64957958 12863.0257043 ]
Reward: -1  Episode Reward:  38
xxxxx
x.  x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2641.43193019 -8656.02923281 -7525.7277781   7623.89061451]
------
Step:23, Action:North
State  288
Old Q Values:  [ 2641.43193019 -8656.02923281 -7525.7277781   7623.89061451]
New Q values:  [ 6076.76379982 -8656.02923281 -7525.7277781   7623.89061451]
Reward: -1  Episode Reward:  37
xxxxx
x.  x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.67359701e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:24, Action:North
State  210
Old Q Values:  [ 1.67359701e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 2.34577493e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  36
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072 12257.06013342  -180.00807518 55879.87076554]
------
Step:25, Action:West
State  130
Old Q Values:  [29415.41153072 12257.06013342  -180.00807518 55879.87076554]
New Q values:  [29415.41153072 12257.06013342  -180.00807518 54034.30161934]
Reward: -1  Episode Reward:  35
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051    112.85376583 105609.84437707]
------
Step:26, Action:West
State  114
Old Q Values:  [  -180.6          4272.38349051    112.85376583 105609.84437707]
New Q values:  [-1.80600000e+02  4.27238349e+03  1.12853766e+02  1.13064731e+05]
Reward: 100009  Episode Reward:  100044
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    35.80700627 -1482.55814493  -180.6       ]
------
Step:1, Action:South
State  181
Old Q Values:  [   13.85659648    35.80700627 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648   898.03013433 -1482.55814493  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  57.2108991  -200.37659742 2927.69110606 -251.53897752]
------
Step:2, Action:East
State  261
Old Q Values:  [  57.2108991  -200.37659742 2927.69110606 -251.53897752]
New Q values:  [  57.2108991  -200.37659742 1525.09410505 -251.53897752]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1162.05887541   35.60960053]
------
Step:3, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        1162.05887541   35.60960053]
New Q values:  [  16.82637525 -180.6        2757.39073452   35.60960053]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6076.76379982 -8656.02923281 -7525.7277781   7623.89061451]
------
Step:4, Action:West
State  288
Old Q Values:  [ 6076.76379982 -8656.02923281 -7525.7277781   7623.89061451]
New Q values:  [ 6076.76379982 -8656.02923281 -7525.7277781   6907.86395709]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3549.64957958 12863.0257043 ]
------
Step:5, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 3759.05186591 3756.93425069]
New Q values:  [  37.74111519 -168.92307549 3759.05186591 1959.70193179]
Reward: -1  Episode Reward:  25
xxxxx
x. gx
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  57.2108991  -200.37659742 1525.09410505 -251.53897752]
------
Step:6, Action:East
State  261
Old Q Values:  [  57.2108991  -200.37659742 1525.09410505 -251.53897752]
New Q values:  [  57.2108991  -200.37659742 4468.34535331 -251.53897752]
Reward: -1  Episode Reward:  24
xxxxx
x.g.x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3549.64957958 12863.0257043 ]
------
Step:7, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 3759.05186591 1959.70193179]
New Q values:  [  37.74111519 -168.92307549 3759.05186591 2123.78437871]
Reward: -1  Episode Reward:  23
xxxxx
x. gx
x ..x
xa  x
xxxxx
Step:8, Action:South
State  261
Old Q Values:  [  57.2108991  -200.37659742 4468.34535331 -251.53897752]
New Q values:  [  57.2108991  1079.75296702 4468.34535331 -251.53897752]
Reward: -301  Episode Reward:  -278
xxxxx
x. .x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  57.2108991  1079.75296702 4468.34535331 -251.53897752]
------
Step:9, Action:East
State  261
Old Q Values:  [  57.2108991  1079.75296702 4468.34535331 -251.53897752]
New Q values:  [  57.2108991  1079.75296702 2613.95536168 -251.53897752]
Reward: -1  Episode Reward:  -279
xxxxx
x. .x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        2757.39073452   35.60960053]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3549.64957958 12863.0257043 ]
New Q values:  [-2527.46239811 -8521.23367799  3491.61901896 12863.0257043 ]
Reward: -1  Episode Reward:  -280
xxxxx
x.g.x
x ..x
x  ax
xxxxx
Step:11, Action:North
State  288
Old Q Values:  [ 6076.76379982 -8656.02923281 -7525.7277781   6907.86395709]
New Q values:  [ 3364.58873205 -8656.02923281 -7525.7277781   6907.86395709]
Reward: 9  Episode Reward:  -271
xxxxx
x. gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1571.34646768  3094.94404042 -1855.11188891 -3385.12952694]
------
Step:12, Action:South
State  208
Old Q Values:  [-1571.34646768  3094.94404042 -1855.11188891 -3385.12952694]
New Q values:  [-1571.34646768  3309.73680329 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  -272
xxxxx
x. .x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3364.58873205 -8656.02923281 -7525.7277781   6907.86395709]
------
Step:13, Action:West
State  288
Old Q Values:  [ 3364.58873205 -8656.02923281 -7525.7277781   6907.86395709]
New Q values:  [ 3364.58873205 -8656.02923281 -7525.7277781   3890.26114261]
Reward: -1  Episode Reward:  -273
xxxxx
x. .x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 3759.05186591 2123.78437871]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3491.61901896 12863.0257043 ]
New Q values:  [-2527.46239811 -8521.23367799  3491.61901896  5928.79689022]
Reward: -1  Episode Reward:  -274
xxxxx
x. .x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  57.2108991  1079.75296702 2613.95536168 -251.53897752]
------
Step:15, Action:South
State  261
Old Q Values:  [  57.2108991  1079.75296702 2613.95536168 -251.53897752]
New Q values:  [  57.2108991  1035.48779531 2613.95536168 -251.53897752]
Reward: -301  Episode Reward:  -575
xxxxx
x. .x
x . x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  57.2108991  1035.48779531 2613.95536168 -251.53897752]
------
Step:16, Action:East
State  261
Old Q Values:  [  57.2108991  1035.48779531 2613.95536168 -251.53897752]
New Q values:  [  57.2108991  1035.48779531 2172.69770444 -251.53897752]
Reward: -1  Episode Reward:  -576
xxxxx
x. .x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 3759.05186591 2123.78437871]
------
Step:17, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 3759.05186591 2123.78437871]
New Q values:  [   37.74111519  -168.92307549 -3329.90091085  2123.78437871]
Reward: -10001  Episode Reward:  -10577
xxxxx
x. .x
x . x
x  gx
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   898.03013433 -1482.55814493  -180.6       ]
------
Step:1, Action:South
State  183
Old Q Values:  [  22.25138791   51.14898101 1755.27970652    0.        ]
New Q values:  [  22.25138791  677.66890374 1755.27970652    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  57.2108991  1035.48779531 2172.69770444 -251.53897752]
------
Step:2, Action:East
State  261
Old Q Values:  [  57.2108991  1035.48779531 2172.69770444 -251.53897752]
New Q values:  [   57.2108991   1035.48779531 -3346.88185116  -251.53897752]
Reward: -9991  Episode Reward:  -9982
xxxxx
x...x
x . x
x g.x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1959.72232399   178.97765173]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  1084.74607672    55.65177422]
New Q values:  [ -281.736      -8877.87327254  2146.47228605    55.65177422]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        5690.57951788 -180.6         637.07916644]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6        5690.57951788 -180.6         637.07916644]
New Q values:  [-180.6        3274.55284814 -180.6         637.07916644]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1571.34646768  3309.73680329 -1855.11188891 -3385.12952694]
------
Step:3, Action:South
State  208
Old Q Values:  [-1571.34646768  3309.73680329 -1855.11188891 -3385.12952694]
New Q values:  [-1571.34646768  2490.3730641  -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3364.58873205 -8656.02923281 -7525.7277781   3890.26114261]
------
Step:4, Action:West
State  288
Old Q Values:  [ 3364.58873205 -8656.02923281 -7525.7277781   3890.26114261]
New Q values:  [ 3364.58873205 -8656.02923281 -7525.7277781   2198.63977066]
Reward: 9  Episode Reward:  26
xxxxx
x. gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -3329.90091085  2123.78437871]
------
Step:5, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -3329.90091085  2123.78437871]
New Q values:  [   37.74111519  -168.92307549 -3329.90091085  3781.97636997]
Reward: 9  Episode Reward:  35
xxxxx
x.  x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[9756.87539497 3481.80752105  750.46010822 2937.02145253]
------
Step:6, Action:North
State  257
Old Q Values:  [9756.87539497 3481.80752105  750.46010822 2937.02145253]
New Q values:  [5655.08750474 3481.80752105  750.46010822 2937.02145253]
Reward: 9  Episode Reward:  44
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         5823.12448917 4337.36193241    0.        ]
------
Step:7, Action:South
State  177
Old Q Values:  [   0.         5823.12448917 4337.36193241    0.        ]
New Q values:  [   0.         4025.17604709 4337.36193241    0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x.g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[5655.08750474 3481.80752105  750.46010822 2937.02145253]
------
Step:8, Action:North
State  257
Old Q Values:  [5655.08750474 3481.80752105  750.46010822 2937.02145253]
New Q values:  [3562.64358162 3481.80752105  750.46010822 2937.02145253]
Reward: -1  Episode Reward:  42
xxxxx
x. gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         4025.17604709 4337.36193241    0.        ]
------
Step:9, Action:East
State  177
Old Q Values:  [   0.         4025.17604709 4337.36193241    0.        ]
New Q values:  [   0.         4025.17604709 3057.4385126     0.        ]
Reward: 9  Episode Reward:  51
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816  4353.80449455]
------
Step:10, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  5.17268227e+03  2.29327585e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  3.84711198e+03  2.29327585e+03 -9.78728177e+01]
Reward: -1  Episode Reward:  50
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3491.61901896  5928.79689022]
------
Step:11, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3491.61901896  5928.79689022]
New Q values:  [-2527.46239811 -8521.23367799  3491.61901896 10131.67709206]
Reward: -1  Episode Reward:  49
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[25869.19445322     0.          6343.74491768   644.94785455]
------
Step:12, Action:East
State  256
Old Q Values:  [25869.19445322     0.          6343.74491768   644.94785455]
New Q values:  [25869.19445322     0.          5576.40109469   644.94785455]
Reward: -1  Episode Reward:  48
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3491.61901896 10131.67709206]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3491.61901896 10131.67709206]
New Q values:  [-2527.46239811 -8521.23367799  2405.4242272  10131.67709206]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3364.58873205 -8656.02923281 -7525.7277781   2198.63977066]
------
Step:14, Action:North
State  288
Old Q Values:  [ 3364.58873205 -8656.02923281 -7525.7277781   2198.63977066]
New Q values:  [ 8382.56027282 -8656.02923281 -7525.7277781   2198.63977066]
Reward: -1  Episode Reward:  46
xxxxx
x.  x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.34577493e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:15, Action:North
State  210
Old Q Values:  [ 2.34577493e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 2.55927902e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  45
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072 12257.06013342  -180.00807518 54034.30161934]
------
Step:16, Action:West
State  130
Old Q Values:  [29415.41153072 12257.06013342  -180.00807518 54034.30161934]
New Q values:  [29415.41153072 12257.06013342  -180.00807518 55532.53985026]
Reward: -1  Episode Reward:  44
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02  4.27238349e+03  1.12853766e+02  1.13064731e+05]
------
Step:17, Action:West
State  114
Old Q Values:  [-1.80600000e+02  4.27238349e+03  1.12853766e+02  1.13064731e+05]
New Q values:  [-1.80600000e+02  4.27238349e+03  1.12853766e+02  1.14735998e+05]
Reward: 100009  Episode Reward:  100053
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8382.56027282 -8656.02923281 -7525.7277781   2198.63977066]
------
Step:1, Action:North
State  288
Old Q Values:  [ 8382.56027282 -8656.02923281 -7525.7277781   2198.63977066]
New Q values:  [ 4105.53602836 -8656.02923281 -7525.7277781   2198.63977066]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1571.34646768  2490.3730641  -1855.11188891 -3385.12952694]
------
Step:2, Action:South
State  208
Old Q Values:  [-1571.34646768  2490.3730641  -1855.11188891 -3385.12952694]
New Q values:  [-1571.34646768  2227.21003415 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4105.53602836 -8656.02923281 -7525.7277781   2198.63977066]
------
Step:3, Action:North
State  288
Old Q Values:  [ 4105.53602836 -8656.02923281 -7525.7277781   2198.63977066]
New Q values:  [ 2309.77742159 -8656.02923281 -7525.7277781   2198.63977066]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1571.34646768  2227.21003415 -1855.11188891 -3385.12952694]
------
Step:4, Action:South
State  208
Old Q Values:  [-1571.34646768  2227.21003415 -1855.11188891 -3385.12952694]
New Q values:  [-1571.34646768  1583.21724014 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  6
xxxxx
xg..x
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2309.77742159 -8656.02923281 -7525.7277781   2198.63977066]
------
Step:5, Action:North
State  288
Old Q Values:  [ 2309.77742159 -8656.02923281 -7525.7277781   2198.63977066]
New Q values:  [ 1398.27614068 -8656.02923281 -7525.7277781   2198.63977066]
Reward: -1  Episode Reward:  5
xxxxx
x.g.x
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1571.34646768  1583.21724014 -1855.11188891 -3385.12952694]
------
Step:6, Action:South
State  208
Old Q Values:  [-1571.34646768  1583.21724014 -1855.11188891 -3385.12952694]
New Q values:  [-1571.34646768  1292.27882725 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  4
xxxxx
x...x
x.g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1398.27614068 -8656.02923281 -7525.7277781   2198.63977066]
------
Step:7, Action:West
State  288
Old Q Values:  [ 1398.27614068 -8656.02923281 -7525.7277781   2198.63977066]
New Q values:  [ 1398.27614068 -8656.02923281 -7525.7277781   3924.35903588]
Reward: 9  Episode Reward:  13
xxxxx
x.g.x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2405.4242272  10131.67709206]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2405.4242272  10131.67709206]
New Q values:  [-2527.46239811 -8521.23367799  2405.4242272   5120.86391131]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[3562.64358162 3481.80752105  750.46010822 2937.02145253]
------
Step:9, Action:North
State  261
Old Q Values:  [   57.2108991   1035.48779531 -3346.88185116  -251.53897752]
New Q values:  [  554.8682716   1035.48779531 -3346.88185116  -251.53897752]
Reward: 9  Episode Reward:  21
xxxxx
x...x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  677.66890374 1755.27970652    0.        ]
------
Step:10, Action:East
State  183
Old Q Values:  [  22.25138791  677.66890374 1755.27970652    0.        ]
New Q values:  [  22.25138791  677.66890374 5185.69205879    0.        ]
Reward: 9  Episode Reward:  30
xxxxx
x...x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 14927.26725394  6267.88141429  2546.60363946]
------
Step:11, Action:South
State  194
Old Q Values:  [-6.00000000e-01  6.56227737e+03  5.11059650e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -1.83942988e+03  5.11059650e+03  1.20371620e+03]
Reward: -10001  Episode Reward:  -9971
xxxxx
x...x
x   x
x g x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2405.4242272   5120.86391131]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2405.4242272   5120.86391131]
New Q values:  [-2527.46239811 -8521.23367799  2405.4242272   2364.39190312]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  554.8682716   1035.48779531 -3346.88185116  -251.53897752]
------
Step:2, Action:South
State  260
Old Q Values:  [ -761.40536588 -6457.4598       227.14724233 -7094.93143822]
New Q values:  [ -761.40536588 -8695.4397473    227.14724233 -7094.93143822]
Reward: -10301  Episode Reward:  -10292
xxxxx
x...x
x. .x
xg .x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   898.03013433 -1482.55814493  -180.6       ]
------
Step:1, Action:South
State  183
Old Q Values:  [  22.25138791  677.66890374 5185.69205879    0.        ]
New Q values:  [  22.25138791  587.11390009 5185.69205879    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  554.8682716   1035.48779531 -3346.88185116  -251.53897752]
------
Step:2, Action:South
State  261
Old Q Values:  [  554.8682716   1035.48779531 -3346.88185116  -251.53897752]
New Q values:  [  554.8682716    544.24145672 -3346.88185116  -251.53897752]
Reward: -301  Episode Reward:  -292
xxxxx
x...x
x  .x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  554.8682716    544.24145672 -3346.88185116  -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [  554.8682716    544.24145672 -3346.88185116  -251.53897752]
New Q values:  [  490.75634894   544.24145672 -3346.88185116  -251.53897752]
Reward: -1  Episode Reward:  -293
xxxxx
x...x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   898.03013433 -1482.55814493  -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [   13.85659648   898.03013433 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648   521.88449075 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  -294
xxxxx
x..gx
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  490.75634894   544.24145672 -3346.88185116  -251.53897752]
------
Step:5, Action:South
State  261
Old Q Values:  [  490.75634894   544.24145672 -3346.88185116  -251.53897752]
New Q values:  [  490.75634894   200.3690197  -3346.88185116  -251.53897752]
Reward: -301  Episode Reward:  -595
xxxxx
x...x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  490.75634894   200.3690197  -3346.88185116  -251.53897752]
------
Step:6, Action:North
State  261
Old Q Values:  [  490.75634894   200.3690197  -3346.88185116  -251.53897752]
New Q values:  [  352.2678868    200.3690197  -3346.88185116  -251.53897752]
Reward: -1  Episode Reward:  -596
xxxxx
x...x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   521.88449075 -1482.55814493  -180.6       ]
------
Step:7, Action:South
State  183
Old Q Values:  [  22.25138791  587.11390009 5185.69205879    0.        ]
New Q values:  [  22.25138791  339.92592608 5185.69205879    0.        ]
Reward: -1  Episode Reward:  -597
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  352.2678868    200.3690197  -3346.88185116  -251.53897752]
------
Step:8, Action:North
State  260
Old Q Values:  [ -761.40536588 -8695.4397473    227.14724233 -7094.93143822]
New Q values:  [  -56.74272828 -8695.4397473    227.14724233 -7094.93143822]
Reward: -1  Episode Reward:  -598
xxxxx
x...x
xa .x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   828.06472689     0.        ]
------
Step:9, Action:East
State  180
Old Q Values:  [-1367.02476015   274.38816599  3196.74268572     0.        ]
New Q values:  [-1367.02476015   274.38816599  2432.23066679     0.        ]
Reward: -1  Episode Reward:  -599
xxxxx
x...x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  3.84711198e+03  2.29327585e+03 -9.78728177e+01]
------
Step:10, Action:South
State  196
Old Q Values:  [-2469.90645144  1493.50470505   174.55451539     0.        ]
New Q values:  [-2469.90645144  1430.01910237   174.55451539     0.        ]
Reward: 9  Episode Reward:  -590
xxxxx
xg..x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        2757.39073452   35.60960053]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2405.4242272   2364.39190312]
New Q values:  [-2527.46239811 -8521.23367799  2144.87740164  2364.39190312]
Reward: 9  Episode Reward:  -581
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1398.27614068 -8656.02923281 -7525.7277781   3924.35903588]
------
Step:12, Action:West
State  288
Old Q Values:  [ 1398.27614068 -8656.02923281 -7525.7277781   3924.35903588]
New Q values:  [ 1398.27614068 -8656.02923281 -7525.7277781   2396.36083471]
Reward: -1  Episode Reward:  -582
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        2757.39073452   35.60960053]
------
Step:13, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        2757.39073452   35.60960053]
New Q values:  [  16.82637525 -180.6        1821.26454422   35.60960053]
Reward: -1  Episode Reward:  -583
xxxxx
xg..x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1398.27614068 -8656.02923281 -7525.7277781   2396.36083471]
------
Step:14, Action:West
State  288
Old Q Values:  [ 1398.27614068 -8656.02923281 -7525.7277781   2396.36083471]
New Q values:  [ 1398.27614068 -8656.02923281 -7525.7277781   1667.26190482]
Reward: -1  Episode Reward:  -584
xxxxx
x...x
xg .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2144.87740164  2364.39190312]
------
Step:15, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2144.87740164  2364.39190312]
New Q values:  [-2527.46239811 -8521.23367799  2144.87740164 -4986.69906605]
Reward: -10001  Episode Reward:  -10585
xxxxx
x...x
x  .x
xg  x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  22.25138791  339.92592608 5185.69205879    0.        ]
------
Step:1, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534   828.06472689     0.        ]
New Q values:  [    0.         -5969.29177534  1869.80484129     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -1.83942988e+03  5.11059650e+03  1.20371620e+03]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -1.83942988e+03  5.11059650e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -1.83942988e+03  9.72747566e+03  1.20371620e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.55927902e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [ 2.55927902e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 2.69022780e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072 12257.06013342  -180.00807518 55532.53985026]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6        3274.55284814 -180.6         637.07916644]
New Q values:  [ -180.6         3274.55284814  -180.6        34681.03114577]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02  4.27238349e+03  1.12853766e+02  1.14735998e+05]
------
Step:5, Action:West
State  126
Old Q Values:  [   0.         2047.43645092    5.9800666   494.15810438]
New Q values:  [   0.         2047.43645092    5.9800666   233.92161362]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   102.86123958  -180.6       ]
------
Step:6, Action:East
State  106
Old Q Values:  [ -180.6        -8399.5673214     81.02047099  -180.6       ]
New Q values:  [ -180.6        -8399.5673214    675.74987421  -180.6       ]
Reward: -1  Episode Reward:  44
xxxxx
x a x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254  2146.47228605    55.65177422]
------
Step:7, Action:East
State  126
Old Q Values:  [   0.         2047.43645092    5.9800666   233.92161362]
New Q values:  [    0.          2047.43645092 10406.10137037   233.92161362]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6         3274.55284814  -180.6        34681.03114577]
------
Step:8, Action:West
State  138
Old Q Values:  [ -180.6         3274.55284814  -180.6        34681.03114577]
New Q values:  [ -180.6         3274.55284814  -180.6        14515.75414412]
Reward: -1  Episode Reward:  42
xxxxx
x a x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254  2146.47228605    55.65177422]
------
Step:9, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  2146.47228605    55.65177422]
New Q values:  [ -281.736      -8877.87327254  5212.71515766    55.65177422]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6         3274.55284814  -180.6        14515.75414412]
------
Step:10, Action:West
State  138
Old Q Values:  [ -180.6         3274.55284814  -180.6        14515.75414412]
New Q values:  [-180.6        3274.55284814 -180.6        7369.51620495]
Reward: -1  Episode Reward:  40
xxxxx
x a x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254  5212.71515766    55.65177422]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  5212.71515766    55.65177422]
New Q values:  [ -281.736      -8877.87327254  4295.34092455    55.65177422]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        3274.55284814 -180.6        7369.51620495]
------
Step:12, Action:West
State  138
Old Q Values:  [-180.6        3274.55284814 -180.6        7369.51620495]
New Q values:  [-180.6        3274.55284814 -180.6        3535.12317918]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1959.72232399   178.97765173]
------
Step:13, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  4295.34092455    55.65177422]
New Q values:  [ -281.736      -8877.87327254  2778.07332357    55.65177422]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        3274.55284814 -180.6        3535.12317918]
------
Step:14, Action:West
State  138
Old Q Values:  [-180.6        3274.55284814 -180.6        3535.12317918]
New Q values:  [-180.6        3274.55284814 -180.6        2246.87126874]
Reward: -1  Episode Reward:  36
xxxxx
x a x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254  2778.07332357    55.65177422]
------
Step:15, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  2778.07332357    55.65177422]
New Q values:  [ -281.736      -8877.87327254  2092.99518387    55.65177422]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        3274.55284814 -180.6        2246.87126874]
------
Step:16, Action:South
State  138
Old Q Values:  [-180.6        3274.55284814 -180.6        2246.87126874]
New Q values:  [-180.6        1544.71010611 -180.6        2246.87126874]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 297.42391963 -610.30346672    0.          784.96322284]
------
Step:17, Action:West
State  216
Old Q Values:  [ 1843.0414477   2727.28415251 -6170.35693855 -2387.54492731]
New Q values:  [ 1843.0414477   2727.28415251 -6170.35693855 -5742.4595223 ]
Reward: -10001  Episode Reward:  -9967
xxxxx
x   x
x g x
x. .x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -1.83942988e+03  9.72747566e+03  1.20371620e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  3.84711198e+03  2.29327585e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  3.84711198e+03  1.31039399e+03 -9.78728177e+01]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1571.34646768  1292.27882725 -1855.11188891 -3385.12952694]
------
Step:2, Action:South
State  208
Old Q Values:  [-1571.34646768  1292.27882725 -1855.11188891 -3385.12952694]
New Q values:  [-1571.34646768  1022.49010235 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1398.27614068 -8656.02923281 -7525.7277781   1667.26190482]
------
Step:3, Action:West
State  288
Old Q Values:  [ 1398.27614068 -8656.02923281 -7525.7277781   1667.26190482]
New Q values:  [ 1398.27614068 -8656.02923281 -7525.7277781   1315.76798242]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2144.87740164 -4986.69906605]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2144.87740164 -4986.69906605]
New Q values:  [-2527.46239811 -8521.23367799  1276.83380286 -4986.69906605]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x.g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1398.27614068 -8656.02923281 -7525.7277781   1315.76798242]
------
Step:5, Action:North
State  288
Old Q Values:  [ 1398.27614068 -8656.02923281 -7525.7277781   1315.76798242]
New Q values:  [ 8629.39386589 -8656.02923281 -7525.7277781   1315.76798242]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x. ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.69022780e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:6, Action:North
State  210
Old Q Values:  [ 2.69022780e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 2.74260732e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  34
xxxxx
x..ax
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072 12257.06013342  -180.00807518 55532.53985026]
------
Step:7, Action:West
State  130
Old Q Values:  [29415.41153072 12257.06013342  -180.00807518 55532.53985026]
New Q values:  [29415.41153072 12257.06013342  -180.00807518 57260.02759293]
Reward: 9  Episode Reward:  43
xxxxx
x.a x
x. gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.16805372e+05]
------
Step:8, Action:West
State  121
Old Q Values:  [    0.             0.         -9726.26759903    56.10465519]
New Q values:  [    0.             0.         -9726.26759903    76.59116932]
Reward: 9  Episode Reward:  52
xxxxx
xa gx
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6          162.4976908  -5851.25726525     0.        ]
------
Step:9, Action:South
State  99
Old Q Values:  [    0.          5102.60835331 36051.30974755     0.        ]
New Q values:  [    0.         63253.99615545 36051.30974755     0.        ]
Reward: 100009  Episode Reward:  100061
xxxxx
x   x
xa gx
x   x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8629.39386589 -8656.02923281 -7525.7277781   1315.76798242]
------
Step:1, Action:North
State  288
Old Q Values:  [ 8629.39386589 -8656.02923281 -7525.7277781   1315.76798242]
New Q values:  [ 3763.90457706 -8656.02923281 -7525.7277781   1315.76798242]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1571.34646768  1022.49010235 -1855.11188891 -3385.12952694]
------
Step:2, Action:South
State  208
Old Q Values:  [-1571.34646768  1022.49010235 -1855.11188891 -3385.12952694]
New Q values:  [-1571.34646768  1537.56741406 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3763.90457706 -8656.02923281 -7525.7277781   1315.76798242]
------
Step:3, Action:North
State  288
Old Q Values:  [ 3763.90457706 -8656.02923281 -7525.7277781   1315.76798242]
New Q values:  [ 1966.23205504 -8656.02923281 -7525.7277781   1315.76798242]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1571.34646768  1537.56741406 -1855.11188891 -3385.12952694]
------
Step:4, Action:South
State  208
Old Q Values:  [-1571.34646768  1537.56741406 -1855.11188891 -3385.12952694]
New Q values:  [-1571.34646768  1204.29658213 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  6
xxxxx
x..gx
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1966.23205504 -8656.02923281 -7525.7277781   1315.76798242]
------
Step:5, Action:North
State  288
Old Q Values:  [ 1966.23205504 -8656.02923281 -7525.7277781   1315.76798242]
New Q values:  [-4852.81820334 -8656.02923281 -7525.7277781   1315.76798242]
Reward: -10001  Episode Reward:  -9995
xxxxx
x...x
x. gx
x.. x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.74260732e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:1, Action:North
State  208
Old Q Values:  [-1571.34646768  1204.29658213 -1855.11188891 -3385.12952694]
New Q values:  [   50.92279355  1204.29658213 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1544.71010611 -180.6        2246.87126874]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6        1544.71010611 -180.6        2246.87126874]
New Q values:  [-180.6        1544.71010611 -180.6        1532.04706266]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254  2092.99518387    55.65177422]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  2092.99518387    55.65177422]
New Q values:  [ -281.736      -8877.87327254  1300.01110538    55.65177422]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
xg. x
x ..x
xxxxx
Step:4, Action:South
State  136
Old Q Values:  [ -724.71310357  1152.50499936 -6245.61866138    94.39090975]
New Q values:  [ -724.71310357   821.69097439 -6245.61866138    94.39090975]
Reward: -1  Episode Reward:  16
xxxxx
xg  x
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   50.92279355  1204.29658213 -1855.11188891 -3385.12952694]
------
Step:5, Action:South
State  208
Old Q Values:  [   50.92279355  1204.29658213 -1855.11188891 -3385.12952694]
New Q values:  [   50.92279355   881.84902758 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  25
xxxxx
x.g x
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4852.81820334 -8656.02923281 -7525.7277781   1315.76798242]
------
Step:6, Action:West
State  288
Old Q Values:  [-4852.81820334 -8656.02923281 -7525.7277781   1315.76798242]
New Q values:  [-4852.81820334 -8656.02923281 -7525.7277781    914.75733383]
Reward: 9  Episode Reward:  34
xxxxx
x.  x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1276.83380286 -4986.69906605]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1276.83380286 -4986.69906605]
New Q values:  [-2527.46239811 -8521.23367799   784.56072129 -4986.69906605]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4852.81820334 -8656.02923281 -7525.7277781    914.75733383]
------
Step:8, Action:North
State  288
Old Q Values:  [-4852.81820334 -8656.02923281 -7525.7277781    914.75733383]
New Q values:  [ 6286.09466903 -8656.02923281 -7525.7277781    914.75733383]
Reward: -1  Episode Reward:  32
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2.74260732e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:9, Action:North
State  210
Old Q Values:  [ 2.74260732e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 1.14332423e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  31
xxxxx
x. ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1544.71010611 -180.6        1532.04706266]
------
Step:10, Action:South
State  138
Old Q Values:  [-180.6        1544.71010611 -180.6        1532.04706266]
New Q values:  [-180.6        4047.25673214 -180.6        1532.04706266]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.14332423e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
------
Step:11, Action:North
State  210
Old Q Values:  [ 1.14332423e+04  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 5.78687394e+03  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  29
xxxxx
x. ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        4047.25673214 -180.6        1532.04706266]
------
Step:12, Action:South
State  130
Old Q Values:  [29415.41153072 12257.06013342  -180.00807518 57260.02759293]
New Q values:  [29415.41153072  5166.77876164  -180.00807518 57260.02759293]
Reward: -1  Episode Reward:  28
xxxxx
x.  x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   50.92279355   881.84902758 -1855.11188891 -3385.12952694]
------
Step:13, Action:South
State  210
Old Q Values:  [ 5.78687394e+03  1.63159900e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 5.78687394e+03  2.53786800e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  27
xxxxx
x.  x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6286.09466903 -8656.02923281 -7525.7277781    914.75733383]
------
Step:14, Action:North
State  288
Old Q Values:  [ 6286.09466903 -8656.02923281 -7525.7277781    914.75733383]
New Q values:  [ 4249.90004939 -8656.02923281 -7525.7277781    914.75733383]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5.78687394e+03  2.53786800e+03 -1.80600000e+02  3.52184257e+00]
------
Step:15, Action:North
State  210
Old Q Values:  [ 5.78687394e+03  2.53786800e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 3.52832660e+03  2.53786800e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  25
xxxxx
x. ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        4047.25673214 -180.6        1532.04706266]
------
Step:16, Action:South
State  138
Old Q Values:  [-180.6        4047.25673214 -180.6        1532.04706266]
New Q values:  [-180.6        2676.80067146 -180.6        1532.04706266]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3.52832660e+03  2.53786800e+03 -1.80600000e+02  3.52184257e+00]
------
Step:17, Action:North
State  210
Old Q Values:  [ 3.52832660e+03  2.53786800e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [2213.77083957 2537.86800007 -180.6           3.52184257]
Reward: -1  Episode Reward:  23
xxxxx
x. ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2676.80067146 -180.6        1532.04706266]
------
Step:18, Action:South
State  130
Old Q Values:  [29415.41153072  5166.77876164  -180.00807518 57260.02759293]
New Q values:  [29415.41153072  2330.66621293  -180.00807518 57260.02759293]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   50.92279355   881.84902758 -1855.11188891 -3385.12952694]
------
Step:19, Action:South
State  210
Old Q Values:  [2213.77083957 2537.86800007 -180.6           3.52184257]
New Q values:  [2213.77083957 2289.51721484 -180.6           3.52184257]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4249.90004939 -8656.02923281 -7525.7277781    914.75733383]
------
Step:20, Action:North
State  288
Old Q Values:  [ 4249.90004939 -8656.02923281 -7525.7277781    914.75733383]
New Q values:  [ 1963.91472803 -8656.02923281 -7525.7277781    914.75733383]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   50.92279355   881.84902758 -1855.11188891 -3385.12952694]
------
Step:21, Action:South
State  208
Old Q Values:  [   50.92279355   881.84902758 -1855.11188891 -3385.12952694]
New Q values:  [   50.92279355   941.31402944 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1963.91472803 -8656.02923281 -7525.7277781    914.75733383]
------
Step:22, Action:North
State  288
Old Q Values:  [ 1963.91472803 -8656.02923281 -7525.7277781    914.75733383]
New Q values:  [ 1067.36010004 -8656.02923281 -7525.7277781    914.75733383]
Reward: -1  Episode Reward:  18
xxxxx
xg  x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[   50.92279355   941.31402944 -1855.11188891 -3385.12952694]
------
Step:23, Action:South
State  208
Old Q Values:  [   50.92279355   941.31402944 -1855.11188891 -3385.12952694]
New Q values:  [   50.92279355   696.13364179 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1067.36010004 -8656.02923281 -7525.7277781    914.75733383]
------
Step:24, Action:North
State  288
Old Q Values:  [ 1067.36010004 -8656.02923281 -7525.7277781    914.75733383]
New Q values:  [ 1113.19920447 -8656.02923281 -7525.7277781    914.75733383]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2213.77083957 2289.51721484 -180.6           3.52184257]
------
Step:25, Action:South
State  210
Old Q Values:  [2213.77083957 2289.51721484 -180.6           3.52184257]
New Q values:  [2213.77083957 1249.16664728 -180.6           3.52184257]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1113.19920447 -8656.02923281 -7525.7277781    914.75733383]
------
Step:26, Action:North
State  288
Old Q Values:  [ 1113.19920447 -8656.02923281 -7525.7277781    914.75733383]
New Q values:  [ 1108.81093366 -8656.02923281 -7525.7277781    914.75733383]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2213.77083957 1249.16664728 -180.6           3.52184257]
------
Step:27, Action:North
State  208
Old Q Values:  [   50.92279355   696.13364179 -1855.11188891 -3385.12952694]
New Q values:  [  822.80931886   696.13364179 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2676.80067146 -180.6        1532.04706266]
------
Step:28, Action:West
State  138
Old Q Values:  [-180.6        2676.80067146 -180.6        1532.04706266]
New Q values:  [-180.6        2676.80067146 -180.6        1200.13552226]
Reward: -1  Episode Reward:  12
xxxxx
x.a x
x.. x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1959.72232399   178.97765173]
------
Step:29, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  1300.01110538    55.65177422]
New Q values:  [ -281.736      -8877.87327254  1322.44464359    55.65177422]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2676.80067146 -180.6        1200.13552226]
------
Step:30, Action:South
State  138
Old Q Values:  [-180.6        2676.80067146 -180.6        1200.13552226]
New Q values:  [-180.6        1734.25152046 -180.6        1200.13552226]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2213.77083957 1249.16664728 -180.6           3.52184257]
------
Step:31, Action:North
State  208
Old Q Values:  [  822.80931886   696.13364179 -1855.11188891 -3385.12952694]
New Q values:  [17506.53200542   696.13364179 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  9
xxxxx
x. ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2330.66621293  -180.00807518 57260.02759293]
------
Step:32, Action:West
State  130
Old Q Values:  [29415.41153072  2330.66621293  -180.00807518 57260.02759293]
New Q values:  [29415.41153072  2330.66621293  -180.00807518 57324.21051636]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02  4.27238349e+03  1.12853766e+02  1.14735998e+05]
------
Step:33, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686         800.5360257   -2375.90118319]
New Q values:  [-10156.11771313  -5995.686         800.5360257    -896.21116603]
Reward: 9  Episode Reward:  17
xxxxx
xag x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6          162.4976908  -5851.25726525     0.        ]
------
Step:34, Action:South
State  99
Old Q Values:  [    0.         63253.99615545 36051.30974755     0.        ]
New Q values:  [    0.         26514.55127631 36051.30974755     0.        ]
Reward: 9  Episode Reward:  26
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         4025.17604709 3057.4385126     0.        ]
------
Step:35, Action:South
State  183
Old Q Values:  [  22.25138791  339.92592608 5185.69205879    0.        ]
New Q values:  [  22.25138791  241.05073647 5185.69205879    0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  352.2678868    200.3690197  -3346.88185116  -251.53897752]
------
Step:36, Action:North
State  257
Old Q Values:  [3562.64358162 3481.80752105  750.46010822 2937.02145253]
New Q values:  [2632.01024677 3481.80752105  750.46010822 2937.02145253]
Reward: -1  Episode Reward:  24
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         4025.17604709 3057.4385126     0.        ]
------
Step:37, Action:South
State  180
Old Q Values:  [-1367.02476015   274.38816599  2432.23066679     0.        ]
New Q values:  [-1367.02476015   177.2994391   2432.23066679     0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  -56.74272828 -8695.4397473    227.14724233 -7094.93143822]
------
Step:38, Action:East
State  257
Old Q Values:  [2632.01024677 3481.80752105  750.46010822 2937.02145253]
New Q values:  [2632.01024677 3481.80752105  534.95225968 2937.02145253]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   784.56072129 -4986.69906605]
------
Step:39, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   784.56072129 -4986.69906605]
New Q values:  [-2527.46239811 -8521.23367799   645.86756861 -4986.69906605]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1108.81093366 -8656.02923281 -7525.7277781    914.75733383]
------
Step:40, Action:North
State  288
Old Q Values:  [ 1108.81093366 -8656.02923281 -7525.7277781    914.75733383]
New Q values:  [ 5694.88397509 -8656.02923281 -7525.7277781    914.75733383]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17506.53200542   696.13364179 -1855.11188891 -3385.12952694]
------
Step:41, Action:North
State  208
Old Q Values:  [17506.53200542   696.13364179 -1855.11188891 -3385.12952694]
New Q values:  [ 7522.28825831   696.13364179 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1734.25152046 -180.6        1200.13552226]
------
Step:42, Action:West
State  138
Old Q Values:  [-180.6        1734.25152046 -180.6        1200.13552226]
New Q values:  [-180.6        1734.25152046 -180.6        1067.3709061 ]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1959.72232399   178.97765173]
------
Step:43, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  1322.44464359    55.65177422]
New Q values:  [ -281.736      -8877.87327254  1048.65331357    55.65177422]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1734.25152046 -180.6        1067.3709061 ]
------
Step:44, Action:South
State  138
Old Q Values:  [-180.6        1734.25152046 -180.6        1067.3709061 ]
New Q values:  [-180.6        1357.23186005 -180.6        1067.3709061 ]
Reward: -1  Episode Reward:  16
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2213.77083957 1249.16664728 -180.6           3.52184257]
------
Step:45, Action:North
State  210
Old Q Values:  [2213.77083957 1249.16664728 -180.6           3.52184257]
New Q values:  [1292.07789385 1249.16664728 -180.6           3.52184257]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1357.23186005 -180.6        1067.3709061 ]
------
Step:46, Action:South
State  130
Old Q Values:  [29415.41153072  2330.66621293  -180.00807518 57324.21051636]
New Q values:  [29415.41153072  3188.35296266  -180.00807518 57324.21051636]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7522.28825831   696.13364179 -1855.11188891 -3385.12952694]
------
Step:47, Action:North
State  208
Old Q Values:  [ 7522.28825831   696.13364179 -1855.11188891 -3385.12952694]
New Q values:  [ 3415.48486134   696.13364179 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  13
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1357.23186005 -180.6        1067.3709061 ]
------
Step:48, Action:West
State  136
Old Q Values:  [ -724.71310357   821.69097439 -6245.61866138    94.39090975]
New Q values:  [ -724.71310357   821.69097439 -6245.61866138    60.13371469]
Reward: -1  Episode Reward:  12
xxxxx
x agx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9726.26759903    76.59116932]
------
Step:49, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686         800.5360257    -896.21116603]
New Q values:  [-10156.11771313  -5995.686         800.5360257    -274.94467856]
Reward: -1  Episode Reward:  11
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   280.46595952 -8213.80649336  -180.6       ]
------
Step:50, Action:South
State  108
Old Q Values:  [-8463.16477134   608.44407335   536.87315378     0.        ]
New Q values:  [-8463.16477134   972.44682938   536.87315378     0.        ]
Reward: -1  Episode Reward:  10
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   177.2994391   2432.23066679     0.        ]
------
Step:51, Action:East
State  176
Old Q Values:  [   0.         1327.79507613 2502.43137476    0.        ]
New Q values:  [    0.          1327.79507613 62160.50614241     0.        ]
Reward: 100009  Episode Reward:  100019
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  934.15671921  862.42400667 -272.09726687]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6         782.35058345    0.            0.        ]
New Q values:  [-180.6        474.9055806    0.           0.       ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xag.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   521.88449075 -1482.55814493  -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [   13.85659648   521.88449075 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648   319.83416234 -1482.55814493  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x g.x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  352.2678868    200.3690197  -3346.88185116  -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [  352.2678868    200.3690197  -3346.88185116  -251.53897752]
New Q values:  [  236.25740342   200.3690197  -3346.88185116  -251.53897752]
Reward: -1  Episode Reward:  17
xxxxx
x .gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   319.83416234 -1482.55814493  -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [   13.85659648   319.83416234 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648   198.21088596 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x g.x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  236.25740342   200.3690197  -3346.88185116  -251.53897752]
------
Step:5, Action:North
State  261
Old Q Values:  [  236.25740342   200.3690197  -3346.88185116  -251.53897752]
New Q values:  [  153.36622716   200.3690197  -3346.88185116  -251.53897752]
Reward: -1  Episode Reward:  15
xxxxx
x .gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   198.21088596 -1482.55814493  -180.6       ]
------
Step:6, Action:South
State  181
Old Q Values:  [   13.85659648   198.21088596 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648   138.7950603  -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x g.x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  153.36622716   200.3690197  -3346.88185116  -251.53897752]
------
Step:7, Action:South
State  261
Old Q Values:  [  153.36622716   200.3690197  -3346.88185116  -251.53897752]
New Q values:  [  153.36622716   -40.34168621 -3346.88185116  -251.53897752]
Reward: -301  Episode Reward:  -287
xxxxx
x ..x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  153.36622716   -40.34168621 -3346.88185116  -251.53897752]
------
Step:8, Action:North
State  261
Old Q Values:  [  153.36622716   -40.34168621 -3346.88185116  -251.53897752]
New Q values:  [  102.38500895   -40.34168621 -3346.88185116  -251.53897752]
Reward: -1  Episode Reward:  -288
xxxxx
x ..x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   138.7950603  -1482.55814493  -180.6       ]
------
Step:9, Action:South
State  183
Old Q Values:  [  22.25138791  241.05073647 5185.69205879    0.        ]
New Q values:  [  22.25138791  126.53579727 5185.69205879    0.        ]
Reward: -1  Episode Reward:  -289
xxxxx
x ..x
x ..x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  102.38500895   -40.34168621 -3346.88185116  -251.53897752]
------
Step:10, Action:North
State  261
Old Q Values:  [  102.38500895   -40.34168621 -3346.88185116  -251.53897752]
New Q values:  [   81.99252167   -40.34168621 -3346.88185116  -251.53897752]
Reward: -1  Episode Reward:  -290
xxxxx
x ..x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648   138.7950603  -1482.55814493  -180.6       ]
------
Step:11, Action:South
State  181
Old Q Values:  [   13.85659648   138.7950603  -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    79.51578062 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  -291
xxxxx
x .gx
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   81.99252167   -40.34168621 -3346.88185116  -251.53897752]
------
Step:12, Action:North
State  261
Old Q Values:  [   81.99252167   -40.34168621 -3346.88185116  -251.53897752]
New Q values:  [   56.05174285   -40.34168621 -3346.88185116  -251.53897752]
Reward: -1  Episode Reward:  -292
xxxxx
x g.x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    79.51578062 -1482.55814493  -180.6       ]
------
Step:13, Action:South
State  181
Old Q Values:  [   13.85659648    79.51578062 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    48.0218351  -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  -293
xxxxx
x ..x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   56.05174285   -40.34168621 -3346.88185116  -251.53897752]
------
Step:14, Action:North
State  261
Old Q Values:  [   56.05174285   -40.34168621 -3346.88185116  -251.53897752]
New Q values:  [   36.22724767   -40.34168621 -3346.88185116  -251.53897752]
Reward: -1  Episode Reward:  -294
xxxxx
x g.x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    48.0218351  -1482.55814493  -180.6       ]
------
Step:15, Action:South
State  181
Old Q Values:  [   13.85659648    48.0218351  -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    29.47690834 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  -295
xxxxx
x .gx
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[   36.22724767   -40.34168621 -3346.88185116  -251.53897752]
------
Step:16, Action:North
State  261
Old Q Values:  [   36.22724767   -40.34168621 -3346.88185116  -251.53897752]
New Q values:  [   22.73397157   -40.34168621 -3346.88185116  -251.53897752]
Reward: -1  Episode Reward:  -296
xxxxx
x g.x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    29.47690834 -1482.55814493  -180.6       ]
------
Step:17, Action:South
State  180
Old Q Values:  [-1367.02476015   177.2994391   2432.23066679     0.        ]
New Q values:  [-1367.02476015   138.46394834  2432.23066679     0.        ]
Reward: -1  Episode Reward:  -297
xxxxx
xg..x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  -56.74272828 -8695.4397473    227.14724233 -7094.93143822]
------
Step:18, Action:East
State  261
Old Q Values:  [   22.73397157   -40.34168621 -3346.88185116  -251.53897752]
New Q values:  [  22.73397157  -40.34168621 -792.9733772  -251.53897752]
Reward: -1  Episode Reward:  -298
xxxxx
x g.x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1821.26454422   35.60960053]
------
Step:19, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 -3329.90091085  3781.97636997]
New Q values:  [  37.74111519 -168.92307549  381.90482819 3781.97636997]
Reward: 9  Episode Reward:  -289
xxxxx
x .gx
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5694.88397509 -8656.02923281 -7525.7277781    914.75733383]
------
Step:20, Action:North
State  288
Old Q Values:  [ 5694.88397509 -8656.02923281 -7525.7277781    914.75733383]
New Q values:  [-2692.00095156 -8656.02923281 -7525.7277781    914.75733383]
Reward: -9991  Episode Reward:  -10280
xxxxx
x ..x
x .gx
x   x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   645.86756861 -4986.69906605]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   645.86756861 -4986.69906605]
New Q values:  [-2527.46239811 -8521.23367799   538.17422759 -4986.69906605]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2692.00095156 -8656.02923281 -7525.7277781    914.75733383]
------
Step:2, Action:North
State  288
Old Q Values:  [-2692.00095156 -8656.02923281 -7525.7277781    914.75733383]
New Q values:  [  -46.75492222 -8656.02923281 -7525.7277781    914.75733383]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3415.48486134   696.13364179 -1855.11188891 -3385.12952694]
------
Step:3, Action:North
State  208
Old Q Values:  [ 3415.48486134   696.13364179 -1855.11188891 -3385.12952694]
New Q values:  [18568.85709944   696.13364179 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3188.35296266  -180.00807518 57324.21051636]
------
Step:4, Action:West
State  136
Old Q Values:  [ -724.71310357   821.69097439 -6245.61866138    60.13371469]
New Q values:  [ -724.71310357   821.69097439 -6245.61866138   269.61429359]
Reward: 9  Episode Reward:  36
xxxxx
xga x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         800.5360257    -274.94467856]
------
Step:5, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686         800.5360257    -274.94467856]
New Q values:  [-10156.11771313  -5995.686         566.1217026    -274.94467856]
Reward: -1  Episode Reward:  35
xxxxx
x.gax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   821.69097439 -6245.61866138   269.61429359]
------
Step:6, Action:South
State  136
Old Q Values:  [ -724.71310357   821.69097439 -6245.61866138   269.61429359]
New Q values:  [ -724.71310357  5898.73351959 -6245.61866138   269.61429359]
Reward: -1  Episode Reward:  34
xxxxx
x. gx
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18568.85709944   696.13364179 -1855.11188891 -3385.12952694]
------
Step:7, Action:South
State  208
Old Q Values:  [18568.85709944   696.13364179 -1855.11188891 -3385.12952694]
New Q values:  [18568.85709944   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  -46.75492222 -8656.02923281 -7525.7277781    914.75733383]
------
Step:8, Action:West
State  288
Old Q Values:  [  -46.75492222 -8656.02923281 -7525.7277781    914.75733383]
New Q values:  [  -46.75492222 -8656.02923281 -7525.7277781   1499.89584452]
Reward: -1  Episode Reward:  32
xxxxx
x. gx
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  381.90482819 3781.97636997]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   538.17422759 -4986.69906605]
New Q values:  [-2527.46239811 -8521.23367799   538.17422759  -950.73737011]
Reward: -1  Episode Reward:  31
xxxxx
x.g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[2632.01024677 3481.80752105  534.95225968 2937.02145253]
------
Step:10, Action:South
State  256
Old Q Values:  [25869.19445322     0.          5576.40109469   644.94785455]
New Q values:  [25869.19445322  7580.15833597  5576.40109469   644.94785455]
Reward: -301  Episode Reward:  -270
xxxxx
xg  x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[25869.19445322  7580.15833597  5576.40109469   644.94785455]
------
Step:11, Action:North
State  256
Old Q Values:  [25869.19445322  7580.15833597  5576.40109469   644.94785455]
New Q values:  [23001.22962401  7580.15833597  5576.40109469   644.94785455]
Reward: -9991  Episode Reward:  -10261
xxxxx
x.  x
xg. x
x   x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  22.73397157  -40.34168621 -792.9733772  -251.53897752]
------
Step:1, Action:North
State  261
Old Q Values:  [  22.73397157  -40.34168621 -792.9733772  -251.53897752]
New Q values:  [  23.33666113  -40.34168621 -792.9733772  -251.53897752]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    29.47690834 -1482.55814493  -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [   13.85659648    29.47690834 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    18.19176168 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  23.33666113  -40.34168621 -792.9733772  -251.53897752]
------
Step:3, Action:North
State  261
Old Q Values:  [  23.33666113  -40.34168621 -792.9733772  -251.53897752]
New Q values:  [  14.19219296  -40.34168621 -792.9733772  -251.53897752]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    18.19176168 -1482.55814493  -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [   13.85659648    18.19176168 -1482.55814493  -180.6       ]
New Q values:  [   13.85659648    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
x.g.x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  14.19219296  -40.34168621 -792.9733772  -251.53897752]
------
Step:5, Action:North
State  260
Old Q Values:  [  -56.74272828 -8695.4397473    227.14724233 -7094.93143822]
New Q values:  [  706.37210872 -8695.4397473    227.14724233 -7094.93143822]
Reward: -1  Episode Reward:  5
xxxxx
xg..x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1367.02476015   138.46394834  2432.23066679     0.        ]
------
Step:6, Action:East
State  180
Old Q Values:  [-1367.02476015   138.46394834  2432.23066679     0.        ]
New Q values:  [-1367.02476015   138.46394834  2132.42585922     0.        ]
Reward: 9  Episode Reward:  14
xxxxx
x...x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  3.84711198e+03  1.31039399e+03 -9.78728177e+01]
------
Step:7, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -1.83942988e+03  9.72747566e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -1.89992589e+02  9.72747566e+03  1.20371620e+03]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1821.26454422   35.60960053]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   538.17422759  -950.73737011]
New Q values:  [-2527.46239811 -8521.23367799   670.63844439  -950.73737011]
Reward: 9  Episode Reward:  22
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  -46.75492222 -8656.02923281 -7525.7277781   1499.89584452]
------
Step:9, Action:West
State  288
Old Q Values:  [  -46.75492222 -8656.02923281 -7525.7277781   1499.89584452]
New Q values:  [  -46.75492222 -8656.02923281 -7525.7277781   1145.73770107]
Reward: -1  Episode Reward:  21
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1821.26454422   35.60960053]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   670.63844439  -950.73737011]
New Q values:  [-2527.46239811 -8521.23367799   611.37668808  -950.73737011]
Reward: -1  Episode Reward:  20
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  -46.75492222 -8656.02923281 -7525.7277781   1145.73770107]
------
Step:11, Action:West
State  288
Old Q Values:  [  -46.75492222 -8656.02923281 -7525.7277781   1145.73770107]
New Q values:  [  -46.75492222 -8656.02923281 -7525.7277781    641.10808685]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   611.37668808  -950.73737011]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   611.37668808  -950.73737011]
New Q values:  [-2527.46239811 -8521.23367799   436.28310129  -950.73737011]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  -46.75492222 -8656.02923281 -7525.7277781    641.10808685]
------
Step:13, Action:North
State  288
Old Q Values:  [  -46.75492222 -8656.02923281 -7525.7277781    641.10808685]
New Q values:  [ 5557.35516094 -8656.02923281 -7525.7277781    641.10808685]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18568.85709944   552.28065686 -1855.11188891 -3385.12952694]
------
Step:14, Action:North
State  208
Old Q Values:  [18568.85709944   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [24630.20599469   552.28065686 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3188.35296266  -180.00807518 57324.21051636]
------
Step:15, Action:West
State  130
Old Q Values:  [29415.41153072  3188.35296266  -180.00807518 57324.21051636]
New Q values:  [29415.41153072  3188.35296266  -180.00807518 57355.88368574]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[-1.80600000e+02  4.27238349e+03  1.12853766e+02  1.14735998e+05]
------
Step:16, Action:West
State  112
Old Q Values:  [    0.         18809.06432124 13004.59207411 93608.424     ]
New Q values:  [    0.         18809.06432124 13004.59207411 97448.7696    ]
Reward: 100009  Episode Reward:  100054
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   13.85659648    10.93436256 -1482.55814493  -180.6       ]
------
Step:1, Action:North
State  181
Old Q Values:  [   13.85659648    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [   95.08242645    10.93436256 -1482.55814493  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   280.46595952 -8213.80649336  -180.6       ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  934.15671921  862.42400667 -272.09726687]
New Q values:  [-177.44732869  401.58741562  862.42400667 -272.09726687]
Reward: -1  Episode Reward:  8
xxxxx
x . x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   95.08242645    10.93436256 -1482.55814493  -180.6       ]
------
Step:3, Action:North
State  181
Old Q Values:  [   95.08242645    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [  121.57275843    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   280.46595952 -8213.80649336  -180.6       ]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869  401.58741562  862.42400667 -272.09726687]
New Q values:  [-177.44732869  196.50679378  862.42400667 -272.09726687]
Reward: -1  Episode Reward:  6
xxxxx
x . x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  121.57275843    10.93436256 -1482.55814493  -180.6       ]
------
Step:5, Action:North
State  181
Old Q Values:  [  121.57275843    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [  306.75630537    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  5
xxxxx
xa. x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  196.50679378  862.42400667 -272.09726687]
------
Step:6, Action:East
State  111
Old Q Values:  [-177.44732869  196.50679378  862.42400667 -272.09726687]
New Q values:  [-177.44732869  196.50679378  938.28629986 -272.09726687]
Reward: 9  Episode Reward:  14
xxxxx
x a x
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1959.72232399   178.97765173]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  1048.65331357    55.65177422]
New Q values:  [ -281.736      -8877.87327254   826.03088345    55.65177422]
Reward: -1  Episode Reward:  13
xxxxx
x  ax
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1357.23186005 -180.6        1067.3709061 ]
------
Step:8, Action:South
State  138
Old Q Values:  [-180.6        1357.23186005 -180.6        1067.3709061 ]
New Q values:  [-180.6        1366.47798978 -180.6        1067.3709061 ]
Reward: 9  Episode Reward:  22
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1843.0414477   2727.28415251 -6170.35693855 -5742.4595223 ]
------
Step:9, Action:South
State  216
Old Q Values:  [ 1843.0414477   2727.28415251 -6170.35693855 -5742.4595223 ]
New Q values:  [ 1843.0414477   2763.52020929 -6170.35693855 -5742.4595223 ]
Reward: 9  Episode Reward:  31
xxxxx
x   x
x .gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5557.35516094 -8656.02923281 -7525.7277781    641.10808685]
------
Step:10, Action:West
State  288
Old Q Values:  [ 5557.35516094 -8656.02923281 -7525.7277781    641.10808685]
New Q values:  [ 5557.35516094 -8656.02923281 -7525.7277781   1396.43614573]
Reward: 9  Episode Reward:  40
xxxxx
x   x
x . x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  381.90482819 3781.97636997]
------
Step:11, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  381.90482819 3781.97636997]
New Q values:  [  37.74111519 -168.92307549  381.90482819 1522.44820588]
Reward: 9  Episode Reward:  49
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  14.19219296  -40.34168621 -792.9733772  -251.53897752]
------
Step:12, Action:North
State  261
Old Q Values:  [  14.19219296  -40.34168621 -792.9733772  -251.53897752]
New Q values:  [  97.10376879  -40.34168621 -792.9733772  -251.53897752]
Reward: -1  Episode Reward:  48
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  306.75630537    10.93436256 -1482.55814493  -180.6       ]
------
Step:13, Action:North
State  181
Old Q Values:  [  306.75630537    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [  403.58841211    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  47
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  196.50679378  938.28629986 -272.09726687]
------
Step:14, Action:East
State  111
Old Q Values:  [-177.44732869  196.50679378  938.28629986 -272.09726687]
New Q values:  [-177.44732869  196.50679378  962.63121714 -272.09726687]
Reward: -1  Episode Reward:  46
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1959.72232399   178.97765173]
------
Step:15, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   826.03088345    55.65177422]
New Q values:  [ -281.736      -8877.87327254   739.75575031    55.65177422]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1366.47798978 -180.6        1067.3709061 ]
------
Step:16, Action:South
State  138
Old Q Values:  [-180.6        1366.47798978 -180.6        1067.3709061 ]
New Q values:  [-180.6         933.61456406 -180.6        1067.3709061 ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1292.07789385 1249.16664728 -180.6           3.52184257]
------
Step:17, Action:North
State  208
Old Q Values:  [24630.20599469   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [10171.69366971   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         933.61456406 -180.6        1067.3709061 ]
------
Step:18, Action:West
State  136
Old Q Values:  [ -724.71310357  5898.73351959 -6245.61866138   269.61429359]
New Q values:  [ -724.71310357  5898.73351959 -6245.61866138   277.08222821]
Reward: -1  Episode Reward:  42
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         566.1217026    -274.94467856]
------
Step:19, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686         566.1217026    -274.94467856]
New Q values:  [-10156.11771313  -5995.686        1995.46873691   -274.94467856]
Reward: -1  Episode Reward:  41
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  5898.73351959 -6245.61866138   277.08222821]
------
Step:20, Action:South
State  130
Old Q Values:  [29415.41153072  3188.35296266  -180.00807518 57355.88368574]
New Q values:  [29415.41153072  4326.24928598  -180.00807518 57355.88368574]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10171.69366971   552.28065686 -1855.11188891 -3385.12952694]
------
Step:21, Action:North
State  208
Old Q Values:  [10171.69366971   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [ 4388.28873971   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         933.61456406 -180.6        1067.3709061 ]
------
Step:22, Action:West
State  138
Old Q Values:  [-180.6         933.61456406 -180.6        1067.3709061 ]
New Q values:  [-180.6         933.61456406 -180.6         648.27508753]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   739.75575031    55.65177422]
------
Step:23, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   739.75575031    55.65177422]
New Q values:  [ -281.736      -8877.87327254   575.38666934    55.65177422]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         933.61456406 -180.6         648.27508753]
------
Step:24, Action:South
State  130
Old Q Values:  [29415.41153072  4326.24928598  -180.00807518 57355.88368574]
New Q values:  [29415.41153072  3046.3863363   -180.00807518 57355.88368574]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4388.28873971   552.28065686 -1855.11188891 -3385.12952694]
------
Step:25, Action:North
State  208
Old Q Values:  [ 4388.28873971   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [ 2034.7998651    552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         933.61456406 -180.6         648.27508753]
------
Step:26, Action:South
State  138
Old Q Values:  [-180.6         933.61456406 -180.6         648.27508753]
New Q values:  [-180.6         760.46919378 -180.6         648.27508753]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1292.07789385 1249.16664728 -180.6           3.52184257]
------
Step:27, Action:North
State  208
Old Q Values:  [ 2034.7998651    552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [ 1041.46070418   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         760.46919378 -180.6         648.27508753]
------
Step:28, Action:South
State  138
Old Q Values:  [-180.6         760.46919378 -180.6         648.27508753]
New Q values:  [-180.6         691.21104567 -180.6         648.27508753]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1292.07789385 1249.16664728 -180.6           3.52184257]
------
Step:29, Action:North
State  208
Old Q Values:  [ 1041.46070418   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [  623.34759537   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         691.21104567 -180.6         648.27508753]
------
Step:30, Action:South
State  136
Old Q Values:  [ -724.71310357  5898.73351959 -6245.61866138   277.08222821]
New Q values:  [ -724.71310357  2545.89768645 -6245.61866138   277.08222821]
Reward: -1  Episode Reward:  30
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  623.34759537   552.28065686 -1855.11188891 -3385.12952694]
------
Step:31, Action:North
State  208
Old Q Values:  [  623.34759537   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [ 1012.50834408   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  29
xxxxx
xg ax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  2545.89768645 -6245.61866138   277.08222821]
------
Step:32, Action:South
State  136
Old Q Values:  [ -724.71310357  2545.89768645 -6245.61866138   277.08222821]
New Q values:  [ -724.71310357  1321.5115778  -6245.61866138   277.08222821]
Reward: -1  Episode Reward:  28
xxxxx
x g x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1012.50834408   552.28065686 -1855.11188891 -3385.12952694]
------
Step:33, Action:North
State  208
Old Q Values:  [ 1012.50834408   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [17611.16844335   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3046.3863363   -180.00807518 57355.88368574]
------
Step:34, Action:West
State  138
Old Q Values:  [-180.6         691.21104567 -180.6         648.27508753]
New Q values:  [-180.6         691.21104567 -180.6         431.32603582]
Reward: -1  Episode Reward:  26
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   575.38666934    55.65177422]
------
Step:35, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   575.38666934    55.65177422]
New Q values:  [ -281.736      -8877.87327254   436.91798144    55.65177422]
Reward: -1  Episode Reward:  25
xxxxx
x  ax
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         691.21104567 -180.6         431.32603582]
------
Step:36, Action:South
State  138
Old Q Values:  [-180.6         691.21104567 -180.6         431.32603582]
New Q values:  [-180.6        5559.23495127 -180.6         431.32603582]
Reward: -1  Episode Reward:  24
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17611.16844335   552.28065686 -1855.11188891 -3385.12952694]
------
Step:37, Action:North
State  210
Old Q Values:  [1292.07789385 1249.16664728 -180.6           3.52184257]
New Q values:  [2184.00164292 1249.16664728 -180.6           3.52184257]
Reward: -1  Episode Reward:  23
xxxxx
x  ax
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        5559.23495127 -180.6         431.32603582]
------
Step:38, Action:South
State  138
Old Q Values:  [-180.6        5559.23495127 -180.6         431.32603582]
New Q values:  [-180.6        2878.29447338 -180.6         431.32603582]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x .ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2184.00164292 1249.16664728 -180.6           3.52184257]
------
Step:39, Action:North
State  208
Old Q Values:  [17611.16844335   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [24250.63248306   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  21
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3046.3863363   -180.00807518 57355.88368574]
------
Step:40, Action:West
State  138
Old Q Values:  [-180.6        2878.29447338 -180.6         431.32603582]
New Q values:  [-180.6        2878.29447338 -180.6         303.00580876]
Reward: -1  Episode Reward:  20
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   436.91798144    55.65177422]
------
Step:41, Action:East
State  114
Old Q Values:  [-1.80600000e+02  4.27238349e+03  1.12853766e+02  1.14735998e+05]
New Q values:  [  -180.6          4272.38349051  17251.30661205 114735.99826397]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3046.3863363   -180.00807518 57355.88368574]
------
Step:42, Action:West
State  138
Old Q Values:  [-180.6        2878.29447338 -180.6         303.00580876]
New Q values:  [-180.6        2878.29447338 -180.6         251.67771793]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   436.91798144    55.65177422]
------
Step:43, Action:East
State  114
Old Q Values:  [  -180.6          4272.38349051  17251.30661205 114735.99826397]
New Q values:  [  -180.6          4272.38349051  24106.68775054 114735.99826397]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3046.3863363   -180.00807518 57355.88368574]
------
Step:44, Action:West
State  138
Old Q Values:  [-180.6        2878.29447338 -180.6         251.67771793]
New Q values:  [-180.6        2878.29447338 -180.6         687.98778437]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1959.72232399   178.97765173]
------
Step:45, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1959.72232399   178.97765173]
New Q values:  [ -253.44886264 -1902.20915811  1646.77727161   178.97765173]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2878.29447338 -180.6         687.98778437]
------
Step:46, Action:South
State  138
Old Q Values:  [-180.6        2878.29447338 -180.6         687.98778437]
New Q values:  [-180.6        2425.90753427 -180.6         687.98778437]
Reward: -10001  Episode Reward:  -9986
xxxxx
x   x
x .gx
x   x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24250.63248306   552.28065686 -1855.11188891 -3385.12952694]
------
Step:1, Action:North
State  210
Old Q Values:  [2184.00164292 1249.16664728 -180.6           3.52184257]
New Q values:  [1606.77291745 1249.16664728 -180.6           3.52184257]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2425.90753427 -180.6         687.98778437]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6        2425.90753427 -180.6         687.98778437]
New Q values:  [-180.6        1451.79488894 -180.6         687.98778437]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1606.77291745 1249.16664728 -180.6           3.52184257]
------
Step:3, Action:North
State  210
Old Q Values:  [1606.77291745 1249.16664728 -180.6           3.52184257]
New Q values:  [1077.64763366 1249.16664728 -180.6           3.52184257]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1451.79488894 -180.6         687.98778437]
------
Step:4, Action:South
State  138
Old Q Values:  [-180.6        1451.79488894 -180.6         687.98778437]
New Q values:  [-180.6        1855.3077005  -180.6         687.98778437]
Reward: -10001  Episode Reward:  -9994
xxxxx
x.. x
x .gx
x...x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4390.31246547 -3909.58186816  4353.80449455]
------
Step:1, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  3.84711198e+03  1.31039399e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  1.67512972e+03  1.31039399e+03 -9.78728177e+01]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   436.28310129  -950.73737011]
------
Step:2, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  381.90482819 1522.44820588]
New Q values:  [  37.74111519 -168.92307549 1825.36847956 1522.44820588]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5557.35516094 -8656.02923281 -7525.7277781   1396.43614573]
------
Step:3, Action:North
State  288
Old Q Values:  [ 5557.35516094 -8656.02923281 -7525.7277781   1396.43614573]
New Q values:  [ 9503.5318093  -8656.02923281 -7525.7277781   1396.43614573]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24250.63248306   552.28065686 -1855.11188891 -3385.12952694]
------
Step:4, Action:North
State  208
Old Q Values:  [24250.63248306   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [12837.43415695   552.28065686 -1855.11188891 -3385.12952694]
Reward: -10001  Episode Reward:  -9974
xxxxx
x..gx
x.  x
x.  x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   436.28310129  -950.73737011]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   436.28310129  -950.73737011]
New Q values:  [-2527.46239811 -8521.23367799  3030.9727833   -950.73737011]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9503.5318093  -8656.02923281 -7525.7277781   1396.43614573]
------
Step:2, Action:North
State  288
Old Q Values:  [ 9503.5318093  -8656.02923281 -7525.7277781   1396.43614573]
New Q values:  [ 7658.0429708  -8656.02923281 -7525.7277781   1396.43614573]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12837.43415695   552.28065686 -1855.11188891 -3385.12952694]
------
Step:3, Action:North
State  208
Old Q Values:  [12837.43415695   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [22347.1387685    552.28065686 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3046.3863363   -180.00807518 57355.88368574]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6        1855.3077005  -180.6         687.98778437]
New Q values:  [-180.6        1855.3077005  -180.6         774.62829523]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1646.77727161   178.97765173]
------
Step:5, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1646.77727161   178.97765173]
New Q values:  [ -253.44886264 -1902.20915811  1214.70321879   178.97765173]
Reward: -1  Episode Reward:  35
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1855.3077005  -180.6         774.62829523]
------
Step:6, Action:West
State  138
Old Q Values:  [-180.6        1855.3077005  -180.6         774.62829523]
New Q values:  [-180.6        1855.3077005  -180.6         440.32671252]
Reward: -1  Episode Reward:  34
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   436.91798144    55.65177422]
------
Step:7, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686        1995.46873691   -274.94467856]
New Q values:  [-10156.11771313  -5995.686        1194.04096811   -274.94467856]
Reward: -1  Episode Reward:  33
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1321.5115778  -6245.61866138   277.08222821]
------
Step:8, Action:South
State  136
Old Q Values:  [ -724.71310357  1321.5115778  -6245.61866138   277.08222821]
New Q values:  [ -724.71310357  7232.14626167 -6245.61866138   277.08222821]
Reward: -1  Episode Reward:  32
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22347.1387685    552.28065686 -1855.11188891 -3385.12952694]
------
Step:9, Action:North
State  208
Old Q Values:  [22347.1387685    552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [26145.02061312   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  31
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  3046.3863363   -180.00807518 57355.88368574]
------
Step:10, Action:West
State  138
Old Q Values:  [-180.6        1855.3077005  -180.6         440.32671252]
New Q values:  [-180.6        1855.3077005  -180.6         306.60607944]
Reward: -1  Episode Reward:  30
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   436.91798144    55.65177422]
------
Step:11, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1214.70321879   178.97765173]
New Q values:  [ -253.44886264 -1902.20915811  1041.87359767   178.97765173]
Reward: -1  Episode Reward:  29
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1855.3077005  -180.6         306.60607944]
------
Step:12, Action:West
State  138
Old Q Values:  [-180.6        1855.3077005  -180.6         306.60607944]
New Q values:  [-180.6        1855.3077005  -180.6         434.60451108]
Reward: -1  Episode Reward:  28
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1041.87359767   178.97765173]
------
Step:13, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   436.91798144    55.65177422]
New Q values:  [ -281.736      -8877.87327254   730.75950272    55.65177422]
Reward: -1  Episode Reward:  27
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1855.3077005  -180.6         434.60451108]
------
Step:14, Action:South
State  138
Old Q Values:  [-180.6        1855.3077005  -180.6         434.60451108]
New Q values:  [-180.6        1116.27307438 -180.6         434.60451108]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1077.64763366 1249.16664728 -180.6           3.52184257]
------
Step:15, Action:North
State  208
Old Q Values:  [26145.02061312   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [10792.29016756   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  25
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1116.27307438 -180.6         434.60451108]
------
Step:16, Action:West
State  138
Old Q Values:  [-180.6        1116.27307438 -180.6         434.60451108]
New Q values:  [-180.6        1116.27307438 -180.6         485.80388373]
Reward: -1  Episode Reward:  24
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1041.87359767   178.97765173]
------
Step:17, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   730.75950272    55.65177422]
New Q values:  [ -281.736      -8877.87327254   626.5857234     55.65177422]
Reward: -1  Episode Reward:  23
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1116.27307438 -180.6         485.80388373]
------
Step:18, Action:South
State  138
Old Q Values:  [-180.6        1116.27307438 -180.6         485.80388373]
New Q values:  [-180.6         820.65922394 -180.6         485.80388373]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1077.64763366 1249.16664728 -180.6           3.52184257]
------
Step:19, Action:North
State  210
Old Q Values:  [1077.64763366 1249.16664728 -180.6           3.52184257]
New Q values:  [ 676.65682065 1249.16664728 -180.6           3.52184257]
Reward: -1  Episode Reward:  21
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         820.65922394 -180.6         485.80388373]
------
Step:20, Action:South
State  130
Old Q Values:  [29415.41153072  3046.3863363   -180.00807518 57355.88368574]
New Q values:  [29415.41153072  1592.70452871  -180.00807518 57355.88368574]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 676.65682065 1249.16664728 -180.6           3.52184257]
------
Step:21, Action:South
State  210
Old Q Values:  [ 676.65682065 1249.16664728 -180.6           3.52184257]
New Q values:  [ 676.65682065 2796.47955015 -180.6           3.52184257]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x.  x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7658.0429708  -8656.02923281 -7525.7277781   1396.43614573]
------
Step:22, Action:North
State  288
Old Q Values:  [ 7658.0429708  -8656.02923281 -7525.7277781   1396.43614573]
New Q values:  [ 3901.56105337 -8656.02923281 -7525.7277781   1396.43614573]
Reward: -1  Episode Reward:  18
xxxxx
x.  x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 676.65682065 2796.47955015 -180.6           3.52184257]
------
Step:23, Action:South
State  210
Old Q Values:  [ 676.65682065 2796.47955015 -180.6           3.52184257]
New Q values:  [ 676.65682065 2288.46013607 -180.6           3.52184257]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3901.56105337 -8656.02923281 -7525.7277781   1396.43614573]
------
Step:24, Action:North
State  288
Old Q Values:  [ 3901.56105337 -8656.02923281 -7525.7277781   1396.43614573]
New Q values:  [ 4797.71147162 -8656.02923281 -7525.7277781   1396.43614573]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10792.29016756   552.28065686 -1855.11188891 -3385.12952694]
------
Step:25, Action:North
State  208
Old Q Values:  [10792.29016756   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [ 6485.95994553   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  15
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  7232.14626167 -6245.61866138   277.08222821]
------
Step:26, Action:South
State  138
Old Q Values:  [-180.6         820.65922394 -180.6         485.80388373]
New Q values:  [-180.6        2273.45167323 -180.6         485.80388373]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6485.95994553   552.28065686 -1855.11188891 -3385.12952694]
------
Step:27, Action:North
State  210
Old Q Values:  [ 676.65682065 2288.46013607 -180.6           3.52184257]
New Q values:  [ 952.09823023 2288.46013607 -180.6           3.52184257]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2273.45167323 -180.6         485.80388373]
------
Step:28, Action:South
State  138
Old Q Values:  [-180.6        2273.45167323 -180.6         485.80388373]
New Q values:  [-180.6        2854.56865295 -180.6         485.80388373]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6485.95994553   552.28065686 -1855.11188891 -3385.12952694]
------
Step:29, Action:North
State  208
Old Q Values:  [ 6485.95994553   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [ 3450.1545741    552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2854.56865295 -180.6         485.80388373]
------
Step:30, Action:West
State  138
Old Q Values:  [-180.6        2854.56865295 -180.6         485.80388373]
New Q values:  [-180.6        2854.56865295 -180.6         381.69727051]
Reward: -1  Episode Reward:  10
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   626.5857234     55.65177422]
------
Step:31, Action:East
State  114
Old Q Values:  [  -180.6          4272.38349051  24106.68775054 114735.99826397]
New Q values:  [  -180.6          4272.38349051  26848.84020594 114735.99826397]
Reward: -1  Episode Reward:  9
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  1592.70452871  -180.00807518 57355.88368574]
------
Step:32, Action:West
State  130
Old Q Values:  [29415.41153072  1592.70452871  -180.00807518 57355.88368574]
New Q values:  [29415.41153072  1592.70452871  -180.00807518 57362.55295349]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  26848.84020594 114735.99826397]
------
Step:33, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   626.5857234     55.65177422]
New Q values:  [ -281.736      -8877.87327254   626.5857234    230.38567195]
Reward: 9  Episode Reward:  17
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8399.5673214    675.74987421  -180.6       ]
------
Step:34, Action:East
State  104
Old Q Values:  [-8.65284000e+03  4.32678599e-01  6.06322547e+02 -8.65284000e+03]
New Q values:  [-8.65284000e+03  4.32678599e-01  6.00141309e+02 -8.65284000e+03]
Reward: -1  Episode Reward:  16
xxxxx
xga x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686        1194.04096811   -274.94467856]
------
Step:35, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   626.5857234    230.38567195]
New Q values:  [ -281.736      -8877.87327254  1106.40488525   230.38567195]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2854.56865295 -180.6         381.69727051]
------
Step:36, Action:South
State  138
Old Q Values:  [-180.6        2854.56865295 -180.6         381.69727051]
New Q values:  [-180.6        1827.765502   -180.6         381.69727051]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 952.09823023 2288.46013607 -180.6           3.52184257]
------
Step:37, Action:South
State  216
Old Q Values:  [ 1843.0414477   2763.52020929 -6170.35693855 -5742.4595223 ]
New Q values:  [ 1843.0414477   2544.1215252  -6170.35693855 -5742.4595223 ]
Reward: -1  Episode Reward:  13
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4797.71147162 -8656.02923281 -7525.7277781   1396.43614573]
------
Step:38, Action:North
State  288
Old Q Values:  [ 4797.71147162 -8656.02923281 -7525.7277781   1396.43614573]
New Q values:  [ 2953.53096088 -8656.02923281 -7525.7277781   1396.43614573]
Reward: -1  Episode Reward:  12
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3450.1545741    552.28065686 -1855.11188891 -3385.12952694]
------
Step:39, Action:North
State  208
Old Q Values:  [ 3450.1545741    552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [ 3549.10570814   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  11
xxxxx
x gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  7232.14626167 -6245.61866138   277.08222821]
------
Step:40, Action:South
State  136
Old Q Values:  [ -724.71310357  7232.14626167 -6245.61866138   277.08222821]
New Q values:  [ -724.71310357  3956.99021711 -6245.61866138   277.08222821]
Reward: -1  Episode Reward:  10
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3549.10570814   552.28065686 -1855.11188891 -3385.12952694]
------
Step:41, Action:North
State  216
Old Q Values:  [ 1843.0414477   2544.1215252  -6170.35693855 -5742.4595223 ]
New Q values:  [ 1284.94622968  2544.1215252  -6170.35693855 -5742.4595223 ]
Reward: -1  Episode Reward:  9
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1827.765502   -180.6         381.69727051]
------
Step:42, Action:South
State  136
Old Q Values:  [ -724.71310357  3956.99021711 -6245.61866138   277.08222821]
New Q values:  [ -724.71310357  2646.92779929 -6245.61866138   277.08222821]
Reward: -1  Episode Reward:  8
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3549.10570814   552.28065686 -1855.11188891 -3385.12952694]
------
Step:43, Action:North
State  208
Old Q Values:  [ 3549.10570814   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [ 2213.12062304   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  7
xxxxx
x gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  2646.92779929 -6245.61866138   277.08222821]
------
Step:44, Action:South
State  138
Old Q Values:  [-180.6        1827.765502   -180.6         381.69727051]
New Q values:  [-180.6        1394.44238771 -180.6         381.69727051]
Reward: -1  Episode Reward:  6
xxxxx
x   x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2213.12062304   552.28065686 -1855.11188891 -3385.12952694]
------
Step:45, Action:North
State  210
Old Q Values:  [ 952.09823023 2288.46013607 -180.6           3.52184257]
New Q values:  [ 798.57200841 2288.46013607 -180.6           3.52184257]
Reward: -1  Episode Reward:  5
xxxxx
x  ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1394.44238771 -180.6         381.69727051]
------
Step:46, Action:South
State  138
Old Q Values:  [-180.6        1394.44238771 -180.6         381.69727051]
New Q values:  [-180.6        1243.71499591 -180.6         381.69727051]
Reward: -1  Episode Reward:  4
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 798.57200841 2288.46013607 -180.6           3.52184257]
------
Step:47, Action:North
State  210
Old Q Values:  [ 798.57200841 2288.46013607 -180.6           3.52184257]
New Q values:  [ 691.94330213 2288.46013607 -180.6           3.52184257]
Reward: -1  Episode Reward:  3
xxxxx
x  ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1243.71499591 -180.6         381.69727051]
------
Step:48, Action:South
State  138
Old Q Values:  [-180.6        1243.71499591 -180.6         381.69727051]
New Q values:  [-180.6        1183.42403918 -180.6         381.69727051]
Reward: -1  Episode Reward:  2
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 691.94330213 2288.46013607 -180.6           3.52184257]
------
Step:49, Action:North
State  210
Old Q Values:  [ 691.94330213 2288.46013607 -180.6           3.52184257]
New Q values:  [ 631.20453261 2288.46013607 -180.6           3.52184257]
Reward: -1  Episode Reward:  1
xxxxx
x  ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1183.42403918 -180.6         381.69727051]
------
Step:50, Action:South
State  138
Old Q Values:  [-180.6        1183.42403918 -180.6         381.69727051]
New Q values:  [-180.6        1159.3076565  -180.6         381.69727051]
Reward: -1  Episode Reward:  0
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 631.20453261 2288.46013607 -180.6           3.52184257]
------
Step:51, Action:North
State  208
Old Q Values:  [ 2213.12062304   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [ 1232.44054617   552.28065686 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  -1
xxxxx
x  ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1159.3076565  -180.6         381.69727051]
------
Step:52, Action:West
State  136
Old Q Values:  [ -724.71310357  2646.92779929 -6245.61866138   277.08222821]
New Q values:  [ -724.71310357  2646.92779929 -6245.61866138   133.21024208]
Reward: -1  Episode Reward:  -2
xxxxx
x agx
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9726.26759903    76.59116932]
------
Step:53, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686        1194.04096811   -274.94467856]
New Q values:  [-10156.11771313  -5995.686        1194.04096811    -61.82856418]
Reward: -1  Episode Reward:  -3
xxxxx
xag x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6          162.4976908  -5851.25726525     0.        ]
------
Step:54, Action:South
State  104
Old Q Values:  [-8.65284000e+03  4.32678599e-01  6.00141309e+02 -8.65284000e+03]
New Q values:  [-8.65284000e+03  6.84575004e+00  6.00141309e+02 -8.65284000e+03]
Reward: 9  Episode Reward:  6
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[0.       0.       4.242262 0.      ]
------
Step:55, Action:East
State  184
Old Q Values:  [0.       0.       4.242262 0.      ]
New Q values:  [   0.            0.         1214.25535342    0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  4043.86149542  715.61136736  181.20343395]
------
Step:56, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -1.89992589e+02  9.72747566e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  8.32694800e+02  9.72747566e+03  1.20371620e+03]
Reward: -1  Episode Reward:  4
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3030.9727833   -950.73737011]
------
Step:57, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3030.9727833   -950.73737011]
New Q values:  [-2527.46239811 -8521.23367799  2097.84840158  -950.73737011]
Reward: -1  Episode Reward:  3
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2953.53096088 -8656.02923281 -7525.7277781   1396.43614573]
------
Step:58, Action:North
State  288
Old Q Values:  [ 2953.53096088 -8656.02923281 -7525.7277781   1396.43614573]
New Q values:  [ 1944.04884191 -8656.02923281 -7525.7277781   1396.43614573]
Reward: -1  Episode Reward:  2
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1284.94622968  2544.1215252  -6170.35693855 -5742.4595223 ]
------
Step:59, Action:South
State  216
Old Q Values:  [ 1284.94622968  2544.1215252  -6170.35693855 -5742.4595223 ]
New Q values:  [ 1284.94622968  1600.26326265 -6170.35693855 -5742.4595223 ]
Reward: -1  Episode Reward:  1
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1944.04884191 -8656.02923281 -7525.7277781   1396.43614573]
------
Step:60, Action:North
State  288
Old Q Values:  [ 1944.04884191 -8656.02923281 -7525.7277781   1396.43614573]
New Q values:  [ 1257.09851556 -8656.02923281 -7525.7277781   1396.43614573]
Reward: -1  Episode Reward:  0
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1284.94622968  1600.26326265 -6170.35693855 -5742.4595223 ]
------
Step:61, Action:South
State  216
Old Q Values:  [ 1284.94622968  1600.26326265 -6170.35693855 -5742.4595223 ]
New Q values:  [ 1284.94622968  1058.43614878 -6170.35693855 -5742.4595223 ]
Reward: -1  Episode Reward:  -1
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1257.09851556 -8656.02923281 -7525.7277781   1396.43614573]
------
Step:62, Action:West
State  288
Old Q Values:  [ 1257.09851556 -8656.02923281 -7525.7277781   1396.43614573]
New Q values:  [ 1257.09851556 -8656.02923281 -7525.7277781   1187.32897877]
Reward: -1  Episode Reward:  -2
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2097.84840158  -950.73737011]
------
Step:63, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2097.84840158  -950.73737011]
New Q values:  [-2527.46239811 -8521.23367799  1215.6689153   -950.73737011]
Reward: -1  Episode Reward:  -3
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1257.09851556 -8656.02923281 -7525.7277781   1187.32897877]
------
Step:64, Action:North
State  288
Old Q Values:  [ 1257.09851556 -8656.02923281 -7525.7277781   1187.32897877]
New Q values:  [ 1188.77744705 -8656.02923281 -7525.7277781   1187.32897877]
Reward: -1  Episode Reward:  -4
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 631.20453261 2288.46013607 -180.6           3.52184257]
------
Step:65, Action:South
State  218
Old Q Values:  [ 297.42391963 -610.30346672    0.          784.96322284]
New Q values:  [297.42391963 111.91184742   0.         784.96322284]
Reward: -1  Episode Reward:  -5
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1188.77744705 -8656.02923281 -7525.7277781   1187.32897877]
------
Step:66, Action:North
State  288
Old Q Values:  [ 1188.77744705 -8656.02923281 -7525.7277781   1187.32897877]
New Q values:  [  710.39994567 -8656.02923281 -7525.7277781   1187.32897877]
Reward: -1  Episode Reward:  -6
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[297.42391963 111.91184742   0.         784.96322284]
------
Step:67, Action:West
State  216
Old Q Values:  [ 1284.94622968  1058.43614878 -6170.35693855 -5742.4595223 ]
New Q values:  [ 1284.94622968  1058.43614878 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  -7
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.66210281e+03 -9.35708462e+03  2.40000000e-02]
------
Step:68, Action:South
State  200
Old Q Values:  [  62.8218634  4043.86149542  715.61136736  181.20343395]
New Q values:  [  62.8218634  1981.64527276  715.61136736  181.20343395]
Reward: -1  Episode Reward:  -8
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1215.6689153   -950.73737011]
------
Step:69, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 1825.36847956 1522.44820588]
New Q values:  [  37.74111519 -168.92307549 1085.74608545 1522.44820588]
Reward: -1  Episode Reward:  -9
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  710.39994567 -8656.02923281 -7525.7277781   1187.32897877]
------
Step:70, Action:West
State  288
Old Q Values:  [  710.39994567 -8656.02923281 -7525.7277781   1187.32897877]
New Q values:  [  710.39994567 -8656.02923281 -7525.7277781    931.06605327]
Reward: -1  Episode Reward:  -10
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1085.74608545 1522.44820588]
------
Step:71, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 1085.74608545 1522.44820588]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  1.08574609e+03  6.16589215e+04]
Reward: 100009  Episode Reward:  99999
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686        1194.04096811    -61.82856418]
------
Step:1, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686        1194.04096811    -61.82856418]
New Q values:  [-10156.11771313  -5995.686        1277.09472703    -61.82856418]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  2646.92779929 -6245.61866138   133.21024208]
------
Step:2, Action:South
State  136
Old Q Values:  [ -724.71310357  2646.92779929 -6245.61866138   133.21024208]
New Q values:  [ -724.71310357  1449.65498862 -6245.61866138   133.21024208]
Reward: 9  Episode Reward:  18
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1284.94622968  1058.43614878 -6170.35693855 -1798.95296703]
------
Step:3, Action:North
State  216
Old Q Values:  [ 1284.94622968  1058.43614878 -6170.35693855 -1798.95296703]
New Q values:  [  861.17078882  1058.43614878 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1159.3076565  -180.6         381.69727051]
------
Step:4, Action:South
State  136
Old Q Values:  [ -724.71310357  1449.65498862 -6245.61866138   133.21024208]
New Q values:  [ -724.71310357   896.79284008 -6245.61866138   133.21024208]
Reward: -1  Episode Reward:  16
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  861.17078882  1058.43614878 -6170.35693855 -1798.95296703]
------
Step:5, Action:South
State  208
Old Q Values:  [ 1232.44054617   552.28065686 -1855.11188891 -3385.12952694]
New Q values:  [ 1232.44054617   505.63207873 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  25
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  710.39994567 -8656.02923281 -7525.7277781    931.06605327]
------
Step:6, Action:West
State  288
Old Q Values:  [  710.39994567 -8656.02923281 -7525.7277781    931.06605327]
New Q values:  [  710.39994567 -8656.02923281 -7525.7277781    742.5270959 ]
Reward: 9  Episode Reward:  34
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1215.6689153   -950.73737011]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1215.6689153   -950.73737011]
New Q values:  [-2527.46239811 -8521.23367799   708.42569489  -950.73737011]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  710.39994567 -8656.02923281 -7525.7277781    742.5270959 ]
------
Step:8, Action:West
State  288
Old Q Values:  [  710.39994567 -8656.02923281 -7525.7277781    742.5270959 ]
New Q values:  [  710.39994567 -8656.02923281 -7525.7277781    508.93854683]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   708.42569489  -950.73737011]
------
Step:9, Action:East
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  1.08574609e+03  6.16589215e+04]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  6.46818418e+02  6.16589215e+04]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  710.39994567 -8656.02923281 -7525.7277781    508.93854683]
------
Step:10, Action:West
State  288
Old Q Values:  [  710.39994567 -8656.02923281 -7525.7277781    508.93854683]
New Q values:  [  710.39994567 -8656.02923281 -7525.7277781  18700.65188033]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  6.46818418e+02  6.16589215e+04]
------
Step:11, Action:West
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  6.46818418e+02  6.16589215e+04]
New Q values:  [   37.74111519  -168.92307549   646.81841788 25713.51087178]
Reward: 9  Episode Reward:  39
xxxxx
x   x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[2632.01024677 3481.80752105  534.95225968 2937.02145253]
------
Step:12, Action:South
State  257
Old Q Values:  [2632.01024677 3481.80752105  534.95225968 2937.02145253]
New Q values:  [2632.01024677 2256.66526474  534.95225968 2937.02145253]
Reward: -301  Episode Reward:  -262
xxxxx
x   x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[2632.01024677 2256.66526474  534.95225968 2937.02145253]
------
Step:13, Action:West
State  257
Old Q Values:  [2632.01024677 2256.66526474  534.95225968 2937.02145253]
New Q values:  [2632.01024677 2256.66526474  534.95225968 1875.31501677]
Reward: -301  Episode Reward:  -563
xxxxx
x g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[2632.01024677 2256.66526474  534.95225968 1875.31501677]
------
Step:14, Action:North
State  257
Old Q Values:  [2632.01024677 2256.66526474  534.95225968 1875.31501677]
New Q values:  [2265.75691284 2256.66526474  534.95225968 1875.31501677]
Reward: 9  Episode Reward:  -554
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[   0.         4025.17604709 3057.4385126     0.        ]
------
Step:15, Action:South
State  180
Old Q Values:  [-1367.02476015   138.46394834  2132.42585922     0.        ]
New Q values:  [-1367.02476015   266.69721195  2132.42585922     0.        ]
Reward: -1  Episode Reward:  -555
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  706.37210872 -8695.4397473    227.14724233 -7094.93143822]
------
Step:16, Action:East
State  257
Old Q Values:  [2265.75691284 2256.66526474  534.95225968 1875.31501677]
New Q values:  [2265.75691284 2256.66526474  425.90861234 1875.31501677]
Reward: -1  Episode Reward:  -556
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   708.42569489  -950.73737011]
------
Step:17, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   708.42569489  -950.73737011]
New Q values:  [-2527.46239811 -8521.23367799  5892.96584206  -950.73737011]
Reward: -1  Episode Reward:  -557
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  710.39994567 -8656.02923281 -7525.7277781  18700.65188033]
------
Step:18, Action:West
State  288
Old Q Values:  [  710.39994567 -8656.02923281 -7525.7277781  18700.65188033]
New Q values:  [  710.39994567 -8656.02923281 -7525.7277781  15193.71401367]
Reward: -1  Episode Reward:  -558
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549   646.81841788 25713.51087178]
------
Step:19, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549   646.81841788 25713.51087178]
New Q values:  [   37.74111519  -168.92307549   646.81841788 10313.93547935]
Reward: -1  Episode Reward:  -559
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  97.10376879  -40.34168621 -792.9733772  -251.53897752]
------
Step:20, Action:North
State  261
Old Q Values:  [  97.10376879  -40.34168621 -792.9733772  -251.53897752]
New Q values:  [ 159.31803115  -40.34168621 -792.9733772  -251.53897752]
Reward: -1  Episode Reward:  -560
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  403.58841211    10.93436256 -1482.55814493  -180.6       ]
------
Step:21, Action:North
State  181
Old Q Values:  [  403.58841211    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [  449.62472999    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  -561
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  196.50679378  962.63121714 -272.09726687]
------
Step:22, Action:East
State  111
Old Q Values:  [-177.44732869  196.50679378  962.63121714 -272.09726687]
New Q values:  [-177.44732869  196.50679378  697.01456616 -272.09726687]
Reward: -1  Episode Reward:  -562
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1041.87359767   178.97765173]
------
Step:23, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254  1106.40488525   230.38567195]
New Q values:  [ -281.736      -8877.87327254   789.75425105   230.38567195]
Reward: -1  Episode Reward:  -563
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1159.3076565  -180.6         381.69727051]
------
Step:24, Action:South
State  138
Old Q Values:  [-180.6        1159.3076565  -180.6         381.69727051]
New Q values:  [-180.6        1149.66110342 -180.6         381.69727051]
Reward: -1  Episode Reward:  -564
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 631.20453261 2288.46013607 -180.6           3.52184257]
------
Step:25, Action:North
State  208
Old Q Values:  [ 1232.44054617   505.63207873 -1855.11188891 -3385.12952694]
New Q values:  [  837.27454949   505.63207873 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  -565
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1149.66110342 -180.6         381.69727051]
------
Step:26, Action:West
State  136
Old Q Values:  [ -724.71310357   896.79284008 -6245.61866138   133.21024208]
New Q values:  [ -724.71310357   896.79284008 -6245.61866138    75.66144763]
Reward: -1  Episode Reward:  -566
xxxxx
x agx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9726.26759903    76.59116932]
------
Step:27, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1041.87359767   178.97765173]
New Q values:  [ -253.44886264 -1902.20915811  1041.87359767   280.09543054]
Reward: -1  Episode Reward:  -567
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  196.50679378  697.01456616 -272.09726687]
------
Step:28, Action:East
State  99
Old Q Values:  [    0.         26514.55127631 36051.30974755     0.        ]
New Q values:  [    0.         26514.55127631 48840.72337821     0.        ]
Reward: -1  Episode Reward:  -568
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  26848.84020594 114735.99826397]
------
Step:29, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1041.87359767   280.09543054]
New Q values:  [ -253.44886264 -1902.20915811  1041.87359767   320.54254206]
Reward: -1  Episode Reward:  -569
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  196.50679378  697.01456616 -272.09726687]
------
Step:30, Action:East
State  109
Old Q Values:  [ -241.10880094   280.46595952 -8213.80649336  -180.6       ]
New Q values:  [ -241.10880094   280.46595952 -3263.14524655  -180.6       ]
Reward: -1  Episode Reward:  -570
xxxxx
x agx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9726.26759903    76.59116932]
------
Step:31, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1041.87359767   320.54254206]
New Q values:  [ -253.44886264 -1902.20915811  1041.87359767   336.72138667]
Reward: -1  Episode Reward:  -571
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  196.50679378  697.01456616 -272.09726687]
------
Step:32, Action:East
State  111
Old Q Values:  [-177.44732869  196.50679378  697.01456616 -272.09726687]
New Q values:  [-177.44732869  196.50679378  590.76790576 -272.09726687]
Reward: -1  Episode Reward:  -572
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1041.87359767   336.72138667]
------
Step:33, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1041.87359767   336.72138667]
New Q values:  [ -253.44886264 -1902.20915811   761.04777009   336.72138667]
Reward: -1  Episode Reward:  -573
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1149.66110342 -180.6         381.69727051]
------
Step:34, Action:West
State  130
Old Q Values:  [29415.41153072  1592.70452871  -180.00807518 57362.55295349]
New Q values:  [29415.41153072  1592.70452871  -180.00807518 57365.22066059]
Reward: -1  Episode Reward:  -574
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  26848.84020594 114735.99826397]
------
Step:35, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   761.04777009   336.72138667]
New Q values:  [ -253.44886264 -1902.20915811   761.04777009   311.3189264 ]
Reward: -1  Episode Reward:  -575
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  196.50679378  590.76790576 -272.09726687]
------
Step:36, Action:East
State  111
Old Q Values:  [-177.44732869  196.50679378  590.76790576 -272.09726687]
New Q values:  [-177.44732869  196.50679378  464.02149333 -272.09726687]
Reward: -1  Episode Reward:  -576
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   761.04777009   311.3189264 ]
------
Step:37, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   761.04777009   311.3189264 ]
New Q values:  [ -253.44886264 -1902.20915811   648.71743906   311.3189264 ]
Reward: -1  Episode Reward:  -577
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1149.66110342 -180.6         381.69727051]
------
Step:38, Action:West
State  130
Old Q Values:  [29415.41153072  1592.70452871  -180.00807518 57365.22066059]
New Q values:  [29415.41153072  1592.70452871  -180.00807518 57366.28774343]
Reward: -1  Episode Reward:  -578
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  26848.84020594 114735.99826397]
------
Step:39, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   789.75425105   230.38567195]
New Q values:  [ -281.736      -8877.87327254   789.75425105   122.41264065]
Reward: -1  Episode Reward:  -579
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   102.86123958  -180.6       ]
------
Step:40, Action:East
State  99
Old Q Values:  [    0.         26514.55127631 48840.72337821     0.        ]
New Q values:  [    0.         26514.55127631 53956.48883048     0.        ]
Reward: -1  Episode Reward:  -580
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  26848.84020594 114735.99826397]
------
Step:41, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   648.71743906   311.3189264 ]
New Q values:  [ -253.44886264 -1902.20915811   648.71743906   263.13401856]
Reward: -1  Episode Reward:  -581
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  196.50679378  464.02149333 -272.09726687]
------
Step:42, Action:East
State  111
Old Q Values:  [-177.44732869  196.50679378  464.02149333 -272.09726687]
New Q values:  [-177.44732869  196.50679378  379.62382905 -272.09726687]
Reward: -1  Episode Reward:  -582
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   648.71743906   263.13401856]
------
Step:43, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   789.75425105   122.41264065]
New Q values:  [ -281.736      -8877.87327254   660.20003144   122.41264065]
Reward: -1  Episode Reward:  -583
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1149.66110342 -180.6         381.69727051]
------
Step:44, Action:South
State  138
Old Q Values:  [-180.6        1149.66110342 -180.6         381.69727051]
New Q values:  [-180.6        1145.80248219 -180.6         381.69727051]
Reward: -1  Episode Reward:  -584
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 631.20453261 2288.46013607 -180.6           3.52184257]
------
Step:45, Action:South
State  210
Old Q Values:  [ 631.20453261 2288.46013607 -180.6           3.52184257]
New Q values:  [ 6.31204533e+02  5.47289826e+03 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  -585
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  710.39994567 -8656.02923281 -7525.7277781  15193.71401367]
------
Step:46, Action:North
State  288
Old Q Values:  [  710.39994567 -8656.02923281 -7525.7277781  15193.71401367]
New Q values:  [  534.74234312 -8656.02923281 -7525.7277781  15193.71401367]
Reward: -1  Episode Reward:  -586
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  837.27454949   505.63207873 -1855.11188891 -3385.12952694]
------
Step:47, Action:North
State  208
Old Q Values:  [  837.27454949   505.63207873 -1855.11188891 -3385.12952694]
New Q values:  [  678.05056445   505.63207873 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  -587
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1145.80248219 -180.6         381.69727051]
------
Step:48, Action:West
State  136
Old Q Values:  [ -724.71310357   896.79284008 -6245.61866138    75.66144763]
New Q values:  [ -724.71310357   896.79284008 -6245.61866138    52.64192985]
Reward: -1  Episode Reward:  -588
xxxxx
x agx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9726.26759903    76.59116932]
------
Step:49, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   648.71743906   263.13401856]
New Q values:  [ -253.44886264 -1902.20915811   648.71743906   218.54075614]
Reward: -1  Episode Reward:  -589
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  196.50679378  379.62382905 -272.09726687]
------
Step:50, Action:East
State  109
Old Q Values:  [ -241.10880094   280.46595952 -3263.14524655  -180.6       ]
New Q values:  [ -241.10880094   280.46595952 -1282.88074782  -180.6       ]
Reward: -1  Episode Reward:  -590
xxxxx
x agx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9726.26759903    76.59116932]
------
Step:51, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686        1277.09472703    -61.82856418]
New Q values:  [-10156.11771313  -5995.686        1277.09472703     58.80836218]
Reward: -1  Episode Reward:  -591
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   280.46595952 -1282.88074782  -180.6       ]
------
Step:52, Action:South
State  109
Old Q Values:  [ -241.10880094   280.46595952 -1282.88074782  -180.6       ]
New Q values:  [ -241.10880094   246.4738028  -1282.88074782  -180.6       ]
Reward: -1  Episode Reward:  -592
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  449.62472999    10.93436256 -1482.55814493  -180.6       ]
------
Step:53, Action:North
State  181
Old Q Values:  [  449.62472999    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [  293.13704071    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  -593
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  196.50679378  379.62382905 -272.09726687]
------
Step:54, Action:East
State  111
Old Q Values:  [-177.44732869  196.50679378  379.62382905 -272.09726687]
New Q values:  [-177.44732869  196.50679378  345.86476334 -272.09726687]
Reward: -1  Episode Reward:  -594
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   648.71743906   218.54075614]
------
Step:55, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   648.71743906   218.54075614]
New Q values:  [ -253.44886264 -1902.20915811   602.62772028   218.54075614]
Reward: -1  Episode Reward:  -595
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1145.80248219 -180.6         381.69727051]
------
Step:56, Action:West
State  130
Old Q Values:  [29415.41153072  1592.70452871  -180.00807518 57366.28774343]
New Q values:  [29415.41153072  1592.70452871  -180.00807518 57366.71457656]
Reward: -1  Episode Reward:  -596
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  26848.84020594 114735.99826397]
------
Step:57, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   660.20003144   122.41264065]
New Q values:  [ -281.736      -8877.87327254   660.20003144    79.22342813]
Reward: -1  Episode Reward:  -597
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   102.86123958  -180.6       ]
------
Step:58, Action:East
State  99
Old Q Values:  [    0.         26514.55127631 53956.48883048     0.        ]
New Q values:  [    0.         26514.55127631 56002.79501138     0.        ]
Reward: -1  Episode Reward:  -598
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  26848.84020594 114735.99826397]
------
Step:59, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   602.62772028   218.54075614]
New Q values:  [ -253.44886264 -1902.20915811   602.62772028   190.57573146]
Reward: -1  Episode Reward:  -599
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  196.50679378  345.86476334 -272.09726687]
------
Step:60, Action:East
State  99
Old Q Values:  [    0.         26514.55127631 56002.79501138     0.        ]
New Q values:  [    0.         26514.55127631 56821.31748374     0.        ]
Reward: -1  Episode Reward:  -600
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  26848.84020594 114735.99826397]
------
Step:61, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254   660.20003144    79.22342813]
New Q values:  [ -281.736      -8877.87327254   660.20003144   134.84880026]
Reward: -1  Episode Reward:  -601
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  196.50679378  345.86476334 -272.09726687]
------
Step:62, Action:East
State  111
Old Q Values:  [-177.44732869  196.50679378  345.86476334 -272.09726687]
New Q values:  [-177.44732869  196.50679378  318.53422142 -272.09726687]
Reward: -1  Episode Reward:  -602
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   602.62772028   190.57573146]
------
Step:63, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   602.62772028   190.57573146]
New Q values:  [ -253.44886264 -1902.20915811   584.19183277   190.57573146]
Reward: -1  Episode Reward:  -603
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1145.80248219 -180.6         381.69727051]
------
Step:64, Action:West
State  138
Old Q Values:  [-180.6        1145.80248219 -180.6         381.69727051]
New Q values:  [-180.6        1145.80248219 -180.6         327.33645804]
Reward: -1  Episode Reward:  -604
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   584.19183277   190.57573146]
------
Step:65, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   660.20003144   134.84880026]
New Q values:  [ -281.736      -8877.87327254   607.22075723   134.84880026]
Reward: -1  Episode Reward:  -605
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1145.80248219 -180.6         327.33645804]
------
Step:66, Action:South
State  130
Old Q Values:  [29415.41153072  1592.70452871  -180.00807518 57366.71457656]
New Q values:  [29415.41153072   839.89698082  -180.00807518 57366.71457656]
Reward: -1  Episode Reward:  -606
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  678.05056445   505.63207873 -1855.11188891 -3385.12952694]
------
Step:67, Action:North
State  208
Old Q Values:  [  678.05056445   505.63207873 -1855.11188891 -3385.12952694]
New Q values:  [  614.36097044   505.63207873 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  -607
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1145.80248219 -180.6         327.33645804]
------
Step:68, Action:South
State  130
Old Q Values:  [29415.41153072   839.89698082  -180.00807518 57366.71457656]
New Q values:  [29415.41153072   519.66708346  -180.00807518 57366.71457656]
Reward: -1  Episode Reward:  -608
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  614.36097044   505.63207873 -1855.11188891 -3385.12952694]
------
Step:69, Action:North
State  208
Old Q Values:  [  614.36097044   505.63207873 -1855.11188891 -3385.12952694]
New Q values:  [  514.1822402    505.63207873 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  -609
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   896.79284008 -6245.61866138    52.64192985]
------
Step:70, Action:South
State  136
Old Q Values:  [ -724.71310357   896.79284008 -6245.61866138    52.64192985]
New Q values:  [ -724.71310357   512.37180809 -6245.61866138    52.64192985]
Reward: -1  Episode Reward:  -610
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  514.1822402    505.63207873 -1855.11188891 -3385.12952694]
------
Step:71, Action:South
State  208
Old Q Values:  [  514.1822402    505.63207873 -1855.11188891 -3385.12952694]
New Q values:  [  514.1822402   4759.76703559 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  -611
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  534.74234312 -8656.02923281 -7525.7277781  15193.71401367]
------
Step:72, Action:West
State  288
Old Q Values:  [  534.74234312 -8656.02923281 -7525.7277781  15193.71401367]
New Q values:  [  534.74234312 -8656.02923281 -7525.7277781   9171.06624927]
Reward: -1  Episode Reward:  -612
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549   646.81841788 10313.93547935]
------
Step:73, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5892.96584206  -950.73737011]
New Q values:  [-2527.46239811 -8521.23367799  5892.96584206  -333.0995387 ]
Reward: -1  Episode Reward:  -613
xxxxx
x g x
x . x
xa  x
xxxxx
Step:74, Action:West
State  261
Old Q Values:  [ 159.31803115  -40.34168621 -792.9733772  -251.53897752]
New Q values:  [ 159.31803115  -40.34168621 -792.9733772  -233.42018166]
Reward: -301  Episode Reward:  -914
xxxxx
x  gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 159.31803115  -40.34168621 -792.9733772  -233.42018166]
------
Step:75, Action:North
State  261
Old Q Values:  [ 159.31803115  -40.34168621 -792.9733772  -233.42018166]
New Q values:  [ 151.06832467  -40.34168621 -792.9733772  -233.42018166]
Reward: -1  Episode Reward:  -915
xxxxx
x g x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  293.13704071    10.93436256 -1482.55814493  -180.6       ]
------
Step:76, Action:North
State  180
Old Q Values:  [-1367.02476015   266.69721195  2132.42585922     0.        ]
New Q values:  [-6255.67585525   266.69721195  2132.42585922     0.        ]
Reward: -10001  Episode Reward:  -10916
xxxxx
xg  x
x . x
x   x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   102.86123958  -180.6       ]
------
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869  196.50679378  318.53422142 -272.09726687]
New Q values:  [-177.44732869  196.50679378  314.97991574 -272.09726687]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   607.22075723   134.84880026]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   607.22075723   134.84880026]
New Q values:  [ -281.736      -8877.87327254   592.02904755   134.84880026]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1145.80248219 -180.6         327.33645804]
------
Step:3, Action:South
State  138
Old Q Values:  [-180.6        1145.80248219 -180.6         327.33645804]
New Q values:  [-180.6         781.25183751 -180.6         327.33645804]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  861.17078882  1058.43614878 -6170.35693855 -1798.95296703]
------
Step:4, Action:South
State  208
Old Q Values:  [  514.1822402   4759.76703559 -1855.11188891 -3385.12952694]
New Q values:  [  514.1822402   4660.62668902 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  36
xxxxx
xg  x
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  534.74234312 -8656.02923281 -7525.7277781   9171.06624927]
------
Step:5, Action:West
State  288
Old Q Values:  [  534.74234312 -8656.02923281 -7525.7277781   9171.06624927]
New Q values:  [  534.74234312 -8656.02923281 -7525.7277781   5441.71625233]
Reward: 9  Episode Reward:  45
xxxxx
x g x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5892.96584206  -333.0995387 ]
------
Step:6, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549   646.81841788 10313.93547935]
New Q values:  [   37.74111519  -168.92307549  1890.64224285 10313.93547935]
Reward: -1  Episode Reward:  44
xxxxx
x  gx
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  534.74234312 -8656.02923281 -7525.7277781   5441.71625233]
------
Step:7, Action:West
State  288
Old Q Values:  [  534.74234312 -8656.02923281 -7525.7277781   5441.71625233]
New Q values:  [  534.74234312 -8656.02923281 -7525.7277781   5270.26714474]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x..gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  1890.64224285 10313.93547935]
------
Step:8, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  1890.64224285 10313.93547935]
New Q values:  [  37.74111519 -168.92307549 1890.64224285 4804.70126559]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x.. x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[2265.75691284 2256.66526474  425.90861234 1875.31501677]
------
Step:9, Action:North
State  261
Old Q Values:  [ 151.06832467  -40.34168621 -792.9733772  -233.42018166]
New Q values:  [ 153.76844208  -40.34168621 -792.9733772  -233.42018166]
Reward: 9  Episode Reward:  51
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  293.13704071    10.93436256 -1482.55814493  -180.6       ]
------
Step:10, Action:North
State  181
Old Q Values:  [  293.13704071    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [  190.59695712    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  50
xxxxx
xa gx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   246.4738028  -1282.88074782  -180.6       ]
------
Step:11, Action:South
State  111
Old Q Values:  [-177.44732869  196.50679378  314.97991574 -272.09726687]
New Q values:  [-177.44732869  135.18180465  314.97991574 -272.09726687]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  190.59695712    10.93436256 -1482.55814493  -180.6       ]
------
Step:12, Action:North
State  177
Old Q Values:  [   0.         4025.17604709 3057.4385126     0.        ]
New Q values:  [17045.79524512  4025.17604709  3057.4385126      0.        ]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SE
[    0.         26514.55127631 56821.31748374     0.        ]
------
Step:13, Action:East
State  111
Old Q Values:  [-177.44732869  135.18180465  314.97991574 -272.09726687]
New Q values:  [-177.44732869  135.18180465  303.00068056 -272.09726687]
Reward: -1  Episode Reward:  47
xxxxx
x a x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   592.02904755   134.84880026]
------
Step:14, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   584.19183277   190.57573146]
New Q values:  [ -253.44886264 -1902.20915811   467.45228436   190.57573146]
Reward: -1  Episode Reward:  46
xxxxx
x  ax
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         781.25183751 -180.6         327.33645804]
------
Step:15, Action:South
State  138
Old Q Values:  [-180.6         781.25183751 -180.6         327.33645804]
New Q values:  [-180.6        1953.77021256 -180.6         327.33645804]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x .ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6.31204533e+02  5.47289826e+03 -1.80600000e+02  3.52184257e+00]
------
Step:16, Action:South
State  210
Old Q Values:  [ 6.31204533e+02  5.47289826e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [  631.20453261 -2230.36055317  -180.6            3.52184257]
Reward: -10001  Episode Reward:  -9956
xxxxx
x   x
x . x
x  gx
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   102.86123958  -180.6       ]
------
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869  135.18180465  303.00068056 -272.09726687]
New Q values:  [-177.44732869  135.18180465  304.20898649 -272.09726687]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   592.02904755   134.84880026]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   592.02904755   134.84880026]
New Q values:  [ -281.736      -8877.87327254   828.34268279   134.84880026]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1953.77021256 -180.6         327.33645804]
------
Step:3, Action:South
State  136
Old Q Values:  [ -724.71310357   512.37180809 -6245.61866138    52.64192985]
New Q values:  [ -724.71310357   527.87956787 -6245.61866138    52.64192985]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  861.17078882  1058.43614878 -6170.35693855 -1798.95296703]
------
Step:4, Action:South
State  216
Old Q Values:  [  861.17078882  1058.43614878 -6170.35693855 -1798.95296703]
New Q values:  [  861.17078882  2009.85460293 -6170.35693855 -1798.95296703]
Reward: 9  Episode Reward:  36
xxxxx
x g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  534.74234312 -8656.02923281 -7525.7277781   5270.26714474]
------
Step:5, Action:West
State  288
Old Q Values:  [  534.74234312 -8656.02923281 -7525.7277781   5270.26714474]
New Q values:  [  534.74234312 -8656.02923281 -7525.7277781   3554.91723757]
Reward: 9  Episode Reward:  45
xxxxx
x  gx
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1890.64224285 4804.70126559]
------
Step:6, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 1890.64224285 4804.70126559]
New Q values:  [  37.74111519 -168.92307549 1890.64224285 1973.41103886]
Reward: 9  Episode Reward:  54
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 153.76844208  -40.34168621 -792.9733772  -233.42018166]
------
Step:7, Action:North
State  261
Old Q Values:  [ 153.76844208  -40.34168621 -792.9733772  -233.42018166]
New Q values:  [ 118.08646397  -40.34168621 -792.9733772  -233.42018166]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  190.59695712    10.93436256 -1482.55814493  -180.6       ]
------
Step:8, Action:North
State  183
Old Q Values:  [  22.25138791  126.53579727 5185.69205879    0.        ]
New Q values:  [  99.56325111  126.53579727 5185.69205879    0.        ]
Reward: -1  Episode Reward:  52
xxxxx
xa  x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  135.18180465  304.20898649 -272.09726687]
------
Step:9, Action:East
State  111
Old Q Values:  [-177.44732869  135.18180465  304.20898649 -272.09726687]
New Q values:  [-177.44732869  135.18180465  369.58639943 -272.09726687]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   828.34268279   134.84880026]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   828.34268279   134.84880026]
New Q values:  [ -281.736      -8877.87327254   916.86813688   134.84880026]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1953.77021256 -180.6         327.33645804]
------
Step:11, Action:South
State  138
Old Q Values:  [-180.6        1953.77021256 -180.6         327.33645804]
New Q values:  [-180.6         970.26944481 -180.6         327.33645804]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x .ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  631.20453261 -2230.36055317  -180.6            3.52184257]
------
Step:12, Action:North
State  208
Old Q Values:  [  514.1822402   4660.62668902 -1855.11188891 -3385.12952694]
New Q values:  [17415.08726905  4660.62668902 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  48
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072   519.66708346  -180.00807518 57366.71457656]
------
Step:13, Action:West
State  138
Old Q Values:  [-180.6         970.26944481 -180.6         327.33645804]
New Q values:  [-180.6         970.26944481 -180.6         270.57026852]
Reward: -1  Episode Reward:  47
xxxxx
x a x
x .gx
x   x
xxxxx
Step:14, Action:East
State  114
Old Q Values:  [  -180.6          4272.38349051  26848.84020594 114735.99826397]
New Q values:  [  -180.6          4272.38349051  27948.95045534 114735.99826397]
Reward: -1  Episode Reward:  46
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072   519.66708346  -180.00807518 57366.71457656]
------
Step:15, Action:West
State  136
Old Q Values:  [ -724.71310357   527.87956787 -6245.61866138    52.64192985]
New Q values:  [ -724.71310357   527.87956787 -6245.61866138 -5596.41480995]
Reward: -10001  Episode Reward:  -9955
xxxxx
x g x
x . x
x   x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  1.67512972e+03  1.31039399e+03 -9.78728177e+01]
------
Step:1, Action:South
State  196
Old Q Values:  [-2469.90645144  1430.01910237   174.55451539     0.        ]
New Q values:  [-2469.90645144  1123.78700421   174.55451539     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1821.26454422   35.60960053]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5892.96584206  -333.0995387 ]
New Q values:  [-2527.46239811 -8521.23367799  3429.06150809  -333.0995387 ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  534.74234312 -8656.02923281 -7525.7277781   3554.91723757]
------
Step:3, Action:West
State  288
Old Q Values:  [  534.74234312 -8656.02923281 -7525.7277781   3554.91723757]
New Q values:  [  534.74234312 -8656.02923281 -7525.7277781   2450.08534746]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3429.06150809  -333.0995387 ]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3429.06150809  -333.0995387 ]
New Q values:  [-2527.46239811 -8521.23367799  2106.05020747  -333.0995387 ]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  534.74234312 -8656.02923281 -7525.7277781   2450.08534746]
------
Step:5, Action:West
State  288
Old Q Values:  [  534.74234312 -8656.02923281 -7525.7277781   2450.08534746]
New Q values:  [  534.74234312 -8656.02923281 -7525.7277781   1611.24920123]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2106.05020747  -333.0995387 ]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2106.05020747  -333.0995387 ]
New Q values:  [-2527.46239811 -8521.23367799  1325.19484336  -333.0995387 ]
Reward: -1  Episode Reward:  14
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  534.74234312 -8656.02923281 -7525.7277781   1611.24920123]
------
Step:7, Action:West
State  288
Old Q Values:  [  534.74234312 -8656.02923281 -7525.7277781   1611.24920123]
New Q values:  [  534.74234312 -8656.02923281 -7525.7277781   1041.4581335 ]
Reward: -1  Episode Reward:  13
xxxxx
x g.x
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1325.19484336  -333.0995387 ]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1325.19484336  -333.0995387 ]
New Q values:  [-2527.46239811 -8521.23367799   841.91537739  -333.0995387 ]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  534.74234312 -8656.02923281 -7525.7277781   1041.4581335 ]
------
Step:9, Action:West
State  288
Old Q Values:  [  534.74234312 -8656.02923281 -7525.7277781   1041.4581335 ]
New Q values:  [  534.74234312 -8656.02923281 -7525.7277781    668.55786662]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   841.91537739  -333.0995387 ]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   841.91537739  -333.0995387 ]
New Q values:  [-2527.46239811 -8521.23367799   536.73351094  -333.0995387 ]
Reward: -1  Episode Reward:  10
xxxxx
x ..x
x. .x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  534.74234312 -8656.02923281 -7525.7277781    668.55786662]
------
Step:11, Action:West
State  288
Old Q Values:  [  534.74234312 -8656.02923281 -7525.7277781    668.55786662]
New Q values:  [  534.74234312 -8656.02923281 -7525.7277781    427.84319993]
Reward: -1  Episode Reward:  9
xxxxx
x ..x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   536.73351094  -333.0995387 ]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   536.73351094  -333.0995387 ]
New Q values:  [-2527.46239811 -8521.23367799   374.51610731  -333.0995387 ]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  534.74234312 -8656.02923281 -7525.7277781    427.84319993]
------
Step:13, Action:North
State  288
Old Q Values:  [  534.74234312 -8656.02923281 -7525.7277781    427.84319993]
New Q values:  [ 5443.82311796 -8656.02923281 -7525.7277781    427.84319993]
Reward: 9  Episode Reward:  17
xxxxx
x g.x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17415.08726905  4660.62668902 -1855.11188891 -3385.12952694]
------
Step:14, Action:North
State  208
Old Q Values:  [17415.08726905  4660.62668902 -1855.11188891 -3385.12952694]
New Q values:  [ 7129.79877798  4660.62668902 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  26
xxxxx
xg.ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   527.87956787 -6245.61866138 -5596.41480995]
------
Step:15, Action:South
State  130
Old Q Values:  [29415.41153072   519.66708346  -180.00807518 57366.71457656]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 57366.71457656]
Reward: -1  Episode Reward:  25
xxxxx
x . x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7129.79877798  4660.62668902 -1855.11188891 -3385.12952694]
------
Step:16, Action:North
State  208
Old Q Values:  [ 7129.79877798  4660.62668902 -1855.11188891 -3385.12952694]
New Q values:  [ 3142.40034463  4660.62668902 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  24
xxxxx
x .ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         970.26944481 -180.6         270.57026852]
------
Step:17, Action:South
State  138
Old Q Values:  [-180.6         970.26944481 -180.6         270.57026852]
New Q values:  [ -180.6        -4214.30421537  -180.6          270.57026852]
Reward: -10001  Episode Reward:  -9977
xxxxx
x . x
x. gx
x.  x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  1869.80484129     0.        ]
------
Step:1, Action:East
State  180
Old Q Values:  [-6255.67585525   266.69721195  2132.42585922     0.        ]
New Q values:  [-6255.67585525   266.69721195  1360.9092598      0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  1.67512972e+03  1.31039399e+03 -9.78728177e+01]
------
Step:2, Action:South
State  196
Old Q Values:  [-2469.90645144  1123.78700421   174.55451539     0.        ]
New Q values:  [-2469.90645144  1001.29416495   174.55451539     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1821.26454422   35.60960053]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   374.51610731  -333.0995387 ]
New Q values:  [-2527.46239811 -8521.23367799  1788.35337831  -333.0995387 ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5443.82311796 -8656.02923281 -7525.7277781    427.84319993]
------
Step:4, Action:North
State  288
Old Q Values:  [ 5443.82311796 -8656.02923281 -7525.7277781    427.84319993]
New Q values:  [ 3581.11725389 -8656.02923281 -7525.7277781    427.84319993]
Reward: 9  Episode Reward:  36
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3142.40034463  4660.62668902 -1855.11188891 -3385.12952694]
------
Step:5, Action:South
State  208
Old Q Values:  [ 3142.40034463  4660.62668902 -1855.11188891 -3385.12952694]
New Q values:  [ 3142.40034463  2937.98585177 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  35
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3581.11725389 -8656.02923281 -7525.7277781    427.84319993]
------
Step:6, Action:West
State  288
Old Q Values:  [ 3581.11725389 -8656.02923281 -7525.7277781    427.84319993]
New Q values:  [ 3581.11725389 -8656.02923281 -7525.7277781    707.04329347]
Reward: -1  Episode Reward:  34
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1788.35337831  -333.0995387 ]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1788.35337831  -333.0995387 ]
New Q values:  [-2527.46239811 -8521.23367799  1789.07652749  -333.0995387 ]
Reward: -1  Episode Reward:  33
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3581.11725389 -8656.02923281 -7525.7277781    707.04329347]
------
Step:8, Action:North
State  288
Old Q Values:  [ 3581.11725389 -8656.02923281 -7525.7277781    707.04329347]
New Q values:  [ 1621.20826134 -8656.02923281 -7525.7277781    707.04329347]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  631.20453261 -2230.36055317  -180.6            3.52184257]
------
Step:9, Action:North
State  208
Old Q Values:  [ 3142.40034463  2937.98585177 -1855.11188891 -3385.12952694]
New Q values:  [18472.37451082  2937.98585177 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  41
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 57366.71457656]
------
Step:10, Action:West
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 57366.71457656]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 57372.88530982]
Reward: 9  Episode Reward:  50
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  27948.95045534 114735.99826397]
------
Step:11, Action:West
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.16805372e+05]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.23773944e+05]
Reward: 100009  Episode Reward:  100059
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  190.59695712    10.93436256 -1482.55814493  -180.6       ]
------
Step:1, Action:North
State  181
Old Q Values:  [  190.59695712    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [  155.58092369    10.93436256 -1482.55814493  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
xa.gx
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   246.4738028  -1282.88074782  -180.6       ]
------
Step:2, Action:South
State  109
Old Q Values:  [ -241.10880094   246.4738028  -1282.88074782  -180.6       ]
New Q values:  [ -241.10880094   144.66379823 -1282.88074782  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x g.x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  155.58092369    10.93436256 -1482.55814493  -180.6       ]
------
Step:3, Action:North
State  180
Old Q Values:  [-6255.67585525   266.69721195  1360.9092598      0.        ]
New Q values:  [-8211.13629329   266.69721195  1360.9092598      0.        ]
Reward: -10001  Episode Reward:  -9993
xxxxx
xg..x
x . x
x...x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 14927.26725394  6267.88141429  2546.60363946]
------
Step:1, Action:South
State  194
Old Q Values:  [-6.00000000e-01  8.32694800e+02  9.72747566e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -5.12479912e+03  9.72747566e+03  1.20371620e+03]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. .x
x.g x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  135.18180465  369.58639943 -272.09726687]
------
Step:1, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   89.87869734 -252.78192178]
New Q values:  [-252.35169558   11.28108573  181.58716424 -252.78192178]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   467.45228436   190.57573146]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   916.86813688   134.84880026]
New Q values:  [ -281.736      -8877.87327254   453.31833531   134.84880026]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6          270.57026852]
------
Step:3, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6          270.57026852]
New Q values:  [ -180.6        -4214.30421537  -180.6          247.86379272]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   467.45228436   190.57573146]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   467.45228436   190.57573146]
New Q values:  [ -253.44886264 -1902.20915811   260.74005156   190.57573146]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6          247.86379272]
------
Step:5, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6          247.86379272]
New Q values:  [ -180.6        -4214.30421537  -180.6          234.54101768]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   453.31833531   134.84880026]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   453.31833531   134.84880026]
New Q values:  [ -281.736      -8877.87327254   251.08963943   134.84880026]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6          234.54101768]
------
Step:7, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6          234.54101768]
New Q values:  [ -180.6        -4214.30421537  -180.6          168.5432989 ]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   251.08963943   134.84880026]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   251.08963943   134.84880026]
New Q values:  [ -281.736      -8877.87327254   150.39884544   134.84880026]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6          168.5432989 ]
------
Step:9, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6          168.5432989 ]
New Q values:  [ -180.6        -4214.30421537  -180.6          111.93697319]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254   150.39884544   134.84880026]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254   150.39884544   134.84880026]
New Q values:  [ -281.736      -8877.87327254    93.14063013   134.84880026]
Reward: -1  Episode Reward:  10
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6          111.93697319]
------
Step:11, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6          111.93697319]
New Q values:  [ -180.6        -4214.30421537  -180.6          122.39680474]
Reward: -1  Episode Reward:  9
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   260.74005156   190.57573146]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254    93.14063013   134.84880026]
New Q values:  [ -281.736      -8877.87327254    73.37529348   134.84880026]
Reward: -1  Episode Reward:  8
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6          122.39680474]
------
Step:13, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6          122.39680474]
New Q values:  [ -180.6        -4214.30421537  -180.6           88.81336197]
Reward: -1  Episode Reward:  7
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    73.37529348   134.84880026]
------
Step:14, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686        1277.09472703     58.80836218]
New Q values:  [-10156.11771313  -5995.686        1277.09472703     66.32248434]
Reward: -1  Episode Reward:  6
xxxxx
xag x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   144.66379823 -1282.88074782  -180.6       ]
------
Step:15, Action:South
State  111
Old Q Values:  [-177.44732869  135.18180465  369.58639943 -272.09726687]
New Q values:  [-177.44732869  156.16647704  369.58639943 -272.09726687]
Reward: 9  Episode Reward:  15
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  322.31251728 -549.80333104 -244.98066897]
------
Step:16, Action:South
State  188
Old Q Values:  [-6523.78898263   103.59077741  1687.41680659     0.        ]
New Q values:  [-6523.78898263   258.74794358  1687.41680659     0.        ]
Reward: 9  Episode Reward:  24
xxxxx
x   x
xg. x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  706.37210872 -8695.4397473    227.14724233 -7094.93143822]
------
Step:17, Action:East
State  260
Old Q Values:  [  706.37210872 -8695.4397473    227.14724233 -7094.93143822]
New Q values:  [  706.37210872 -8695.4397473    632.98185518 -7094.93143822]
Reward: 9  Episode Reward:  33
xxxxx
x   x
x . x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1789.07652749  -333.0995387 ]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1789.07652749  -333.0995387 ]
New Q values:  [-2527.46239811 -8521.23367799  1207.3930894   -333.0995387 ]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1621.20826134 -8656.02923281 -7525.7277781    707.04329347]
------
Step:19, Action:North
State  288
Old Q Values:  [ 1621.20826134 -8656.02923281 -7525.7277781    707.04329347]
New Q values:  [ 6189.59565778 -8656.02923281 -7525.7277781    707.04329347]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18472.37451082  2937.98585177 -1855.11188891 -3385.12952694]
------
Step:20, Action:North
State  208
Old Q Values:  [18472.37451082  2937.98585177 -1855.11188891 -3385.12952694]
New Q values:  [ 7414.99381292  2937.98585177 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           88.81336197]
------
Step:21, Action:West
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 57372.88530982]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 57369.35360312]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  27948.95045534 114735.99826397]
------
Step:22, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    73.37529348   134.84880026]
New Q values:  [ -281.736      -8877.87327254    73.37529348    84.19789198]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   102.86123958  -180.6       ]
------
Step:23, Action:East
State  99
Old Q Values:  [    0.         26514.55127631 56821.31748374     0.        ]
New Q values:  [    0.         26514.55127631 57148.72647269     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  27948.95045534 114735.99826397]
------
Step:24, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    73.37529348    84.19789198]
New Q values:  [ -281.736      -8877.87327254    73.37529348    63.93752866]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   102.86123958  -180.6       ]
------
Step:25, Action:East
State  108
Old Q Values:  [-8463.16477134   972.44682938   536.87315378     0.        ]
New Q values:  [-8463.16477134   972.44682938   597.27767962     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686        1277.09472703     66.32248434]
------
Step:26, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686        1277.09472703     66.32248434]
New Q values:  [-10156.11771313  -5995.686         668.60176117     66.32248434]
Reward: -1  Episode Reward:  34
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   527.87956787 -6245.61866138 -5596.41480995]
------
Step:27, Action:South
State  136
Old Q Values:  [ -724.71310357   527.87956787 -6245.61866138 -5596.41480995]
New Q values:  [ -724.71310357  2435.04997103 -6245.61866138 -5596.41480995]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7414.99381292  2937.98585177 -1855.11188891 -3385.12952694]
------
Step:28, Action:South
State  208
Old Q Values:  [ 7414.99381292  2937.98585177 -1855.11188891 -3385.12952694]
New Q values:  [ 7414.99381292  3031.47303804 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6189.59565778 -8656.02923281 -7525.7277781    707.04329347]
------
Step:29, Action:West
State  288
Old Q Values:  [ 6189.59565778 -8656.02923281 -7525.7277781    707.04329347]
New Q values:  [ 6189.59565778 -8656.02923281 -7525.7277781    874.24062904]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1890.64224285 1973.41103886]
------
Step:30, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1207.3930894   -333.0995387 ]
New Q values:  [-2527.46239811 -8521.23367799  1207.3930894    -98.41387629]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 118.08646397  -40.34168621 -792.9733772  -233.42018166]
------
Step:31, Action:North
State  261
Old Q Values:  [ 118.08646397  -40.34168621 -792.9733772  -233.42018166]
New Q values:  [1602.34220323  -40.34168621 -792.9733772  -233.42018166]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  99.56325111  126.53579727 5185.69205879    0.        ]
------
Step:32, Action:East
State  177
Old Q Values:  [17045.79524512  4025.17604709  3057.4385126      0.        ]
New Q values:  [17045.79524512  4025.17604709 62545.46914468     0.        ]
Reward: 100009  Episode Reward:  100038
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  156.16647704  369.58639943 -272.09726687]
------
Step:1, Action:East
State  109
Old Q Values:  [ -241.10880094   144.66379823 -1282.88074782  -180.6       ]
New Q values:  [-241.10880094  144.66379823 -484.77494833 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9726.26759903    76.59116932]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   260.74005156   190.57573146]
New Q values:  [ -253.44886264 -1902.20915811   260.74005156   186.50621241]
Reward: -1  Episode Reward:  8
xxxxx
xa .x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  156.16647704  369.58639943 -272.09726687]
------
Step:3, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573  181.58716424 -252.78192178]
New Q values:  [-252.35169558   11.28108573  150.25688117 -252.78192178]
Reward: -1  Episode Reward:  7
xxxxx
x a.x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   260.74005156   186.50621241]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   260.74005156   186.50621241]
New Q values:  [ -253.44886264 -1902.20915811   136.34002922   186.50621241]
Reward: 9  Episode Reward:  16
xxxxx
x  ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           88.81336197]
------
Step:5, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           88.81336197]
New Q values:  [ -180.6        -4214.30421537  -180.6           90.87720851]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   136.34002922   186.50621241]
------
Step:6, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   136.34002922   186.50621241]
New Q values:  [ -253.44886264 -1902.20915811   136.34002922   184.87840479]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  156.16647704  369.58639943 -272.09726687]
------
Step:7, Action:East
State  109
Old Q Values:  [-241.10880094  144.66379823 -484.77494833 -180.6       ]
New Q values:  [-241.10880094  144.66379823 -171.53262854 -180.6       ]
Reward: -1  Episode Reward:  13
xxxxx
x agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9726.26759903    76.59116932]
------
Step:8, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   136.34002922   184.87840479]
New Q values:  [ -253.44886264 -1902.20915811   136.34002922   184.22728175]
Reward: -1  Episode Reward:  12
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  156.16647704  369.58639943 -272.09726687]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573  150.25688117 -252.78192178]
New Q values:  [-252.35169558   11.28108573   81.51534051 -252.78192178]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    73.37529348    63.93752866]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254    73.37529348    63.93752866]
New Q values:  [ -281.736      -8877.87327254    56.01327995    63.93752866]
Reward: -1  Episode Reward:  10
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           90.87720851]
------
Step:11, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           90.87720851]
New Q values:  [ -180.6        -4214.30421537  -180.6           54.932142  ]
Reward: -1  Episode Reward:  9
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    56.01327995    63.93752866]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    56.01327995    63.93752866]
New Q values:  [ -281.736      -8877.87327254    56.01327995    49.42961362]
Reward: -1  Episode Reward:  8
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   81.51534051 -252.78192178]
------
Step:13, Action:East
State  110
Old Q Values:  [ -180.6        -1554.81716921   102.86123958  -180.6       ]
New Q values:  [ -180.6        -1554.81716921    57.34847981  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    56.01327995    49.42961362]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254    56.01327995    49.42961362]
New Q values:  [ -281.736      -8877.87327254    38.28495458    49.42961362]
Reward: -1  Episode Reward:  6
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           54.932142  ]
------
Step:15, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           54.932142  ]
New Q values:  [ -180.6        -4214.30421537  -180.6           36.20174089]
Reward: -1  Episode Reward:  5
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    38.28495458    49.42961362]
------
Step:16, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    38.28495458    49.42961362]
New Q values:  [ -281.736      -8877.87327254    38.28495458    36.37638939]
Reward: -1  Episode Reward:  4
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921    57.34847981  -180.6       ]
------
Step:17, Action:East
State  110
Old Q Values:  [ -180.6        -1554.81716921    57.34847981  -180.6       ]
New Q values:  [ -180.6        -1554.81716921    33.8248783   -180.6       ]
Reward: -1  Episode Reward:  3
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    38.28495458    36.37638939]
------
Step:18, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254    38.28495458    36.37638939]
New Q values:  [ -281.736      -8877.87327254    25.5745041     36.37638939]
Reward: -1  Episode Reward:  2
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           36.20174089]
------
Step:19, Action:West
State  136
Old Q Values:  [ -724.71310357  2435.04997103 -6245.61866138 -5596.41480995]
New Q values:  [ -724.71310357  2435.04997103 -6245.61866138 -2038.58539563]
Reward: -1  Episode Reward:  1
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         668.60176117     66.32248434]
------
Step:20, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254    25.5745041     36.37638939]
New Q values:  [ -281.736      -8877.87327254    20.49032391    36.37638939]
Reward: -1  Episode Reward:  0
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           36.20174089]
------
Step:21, Action:West
State  136
Old Q Values:  [ -724.71310357  2435.04997103 -6245.61866138 -2038.58539563]
New Q values:  [ -724.71310357  2435.04997103 -6245.61866138  -615.4536299 ]
Reward: -1  Episode Reward:  -1
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         668.60176117     66.32248434]
------
Step:22, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254    20.49032391    36.37638939]
New Q values:  [ -281.736      -8877.87327254    18.45665183    36.37638939]
Reward: -1  Episode Reward:  -2
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           36.20174089]
------
Step:23, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           36.20174089]
New Q values:  [ -180.6        -4214.30421537  -180.6           24.79361317]
Reward: -1  Episode Reward:  -3
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    18.45665183    36.37638939]
------
Step:24, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    18.45665183    36.37638939]
New Q values:  [ -281.736      -8877.87327254    18.45665183    38.40515791]
Reward: -1  Episode Reward:  -4
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   81.51534051 -252.78192178]
------
Step:25, Action:East
State  110
Old Q Values:  [ -180.6        -1554.81716921    33.8248783   -180.6       ]
New Q values:  [ -180.6        -1554.81716921    24.45149869  -180.6       ]
Reward: -1  Episode Reward:  -5
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    18.45665183    38.40515791]
------
Step:26, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    18.45665183    38.40515791]
New Q values:  [ -281.736      -8877.87327254    18.45665183    22.09751277]
Reward: -1  Episode Reward:  -6
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921    24.45149869  -180.6       ]
------
Step:27, Action:East
State  108
Old Q Values:  [-8463.16477134   972.44682938   597.27767962     0.        ]
New Q values:  [-8463.16477134   972.44682938   438.8916002      0.        ]
Reward: -1  Episode Reward:  -7
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         668.60176117     66.32248434]
------
Step:28, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686         668.60176117     66.32248434]
New Q values:  [-10156.11771313  -5995.686         997.35569578     66.32248434]
Reward: -1  Episode Reward:  -8
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  2435.04997103 -6245.61866138  -615.4536299 ]
------
Step:29, Action:South
State  136
Old Q Values:  [ -724.71310357  2435.04997103 -6245.61866138  -615.4536299 ]
New Q values:  [ -724.71310357  1576.37636929 -6245.61866138  -615.4536299 ]
Reward: -1  Episode Reward:  -9
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  861.17078882  2009.85460293 -6170.35693855 -1798.95296703]
------
Step:30, Action:South
State  216
Old Q Values:  [  861.17078882  2009.85460293 -6170.35693855 -1798.95296703]
New Q values:  [  861.17078882  2666.22053851 -6170.35693855 -1798.95296703]
Reward: 9  Episode Reward:  0
xxxxx
x   x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6189.59565778 -8656.02923281 -7525.7277781    874.24062904]
------
Step:31, Action:North
State  288
Old Q Values:  [ 6189.59565778 -8656.02923281 -7525.7277781    874.24062904]
New Q values:  [ 4699.73640699 -8656.02923281 -7525.7277781    874.24062904]
Reward: -1  Episode Reward:  -1
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7414.99381292  3031.47303804 -1855.11188891 -3385.12952694]
------
Step:32, Action:North
State  216
Old Q Values:  [  861.17078882  2666.22053851 -6170.35693855 -1798.95296703]
New Q values:  [  351.30639948  2666.22053851 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  -2
xxxxx
x  ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           24.79361317]
------
Step:33, Action:West
State  136
Old Q Values:  [ -724.71310357  1576.37636929 -6245.61866138  -615.4536299 ]
New Q values:  [ -724.71310357  1576.37636929 -6245.61866138    52.42525677]
Reward: -1  Episode Reward:  -3
xxxxx
xga x
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -5995.686         997.35569578     66.32248434]
------
Step:34, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686         997.35569578     66.32248434]
New Q values:  [-10156.11771313  -5995.686         871.2551891      66.32248434]
Reward: -1  Episode Reward:  -4
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1576.37636929 -6245.61866138    52.42525677]
------
Step:35, Action:South
State  136
Old Q Values:  [ -724.71310357  1576.37636929 -6245.61866138    52.42525677]
New Q values:  [ -724.71310357  2854.44869159 -6245.61866138    52.42525677]
Reward: -1  Episode Reward:  -5
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7414.99381292  3031.47303804 -1855.11188891 -3385.12952694]
------
Step:36, Action:North
State  208
Old Q Values:  [ 7414.99381292  3031.47303804 -1855.11188891 -3385.12952694]
New Q values:  [ 3821.73213265  3031.47303804 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  -6
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  2854.44869159 -6245.61866138    52.42525677]
------
Step:37, Action:South
State  136
Old Q Values:  [ -724.71310357  2854.44869159 -6245.61866138    52.42525677]
New Q values:  [ -724.71310357  2287.69911643 -6245.61866138    52.42525677]
Reward: -1  Episode Reward:  -7
xxxxx
x  gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3821.73213265  3031.47303804 -1855.11188891 -3385.12952694]
------
Step:38, Action:South
State  208
Old Q Values:  [ 3821.73213265  3031.47303804 -1855.11188891 -3385.12952694]
New Q values:  [ 3821.73213265  2621.91013731 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  -8
xxxxx
x   x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4699.73640699 -8656.02923281 -7525.7277781    874.24062904]
------
Step:39, Action:West
State  288
Old Q Values:  [ 4699.73640699 -8656.02923281 -7525.7277781    874.24062904]
New Q values:  [ 4699.73640699 -8656.02923281 -7525.7277781    717.31417844]
Reward: 9  Episode Reward:  1
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1207.3930894    -98.41387629]
------
Step:40, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1207.3930894    -98.41387629]
New Q values:  [-2527.46239811 -8521.23367799  1892.27815786   -98.41387629]
Reward: -1  Episode Reward:  0
xxxxx
x   x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4699.73640699 -8656.02923281 -7525.7277781    717.31417844]
------
Step:41, Action:North
State  288
Old Q Values:  [ 4699.73640699 -8656.02923281 -7525.7277781    717.31417844]
New Q values:  [ 2068.65592258 -8656.02923281 -7525.7277781    717.31417844]
Reward: -1  Episode Reward:  -1
xxxxx
x   x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  631.20453261 -2230.36055317  -180.6            3.52184257]
------
Step:42, Action:North
State  210
Old Q Values:  [  631.20453261 -2230.36055317  -180.6            3.52184257]
New Q values:  [  259.319897   -2230.36055317  -180.6            3.52184257]
Reward: -1  Episode Reward:  -2
xxxxx
x  ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           24.79361317]
------
Step:43, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           24.79361317]
New Q values:  [ -180.6        -4214.30421537  -180.6           64.58562979]
Reward: -1  Episode Reward:  -3
xxxxx
x a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   136.34002922   184.22728175]
------
Step:44, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   136.34002922   184.22728175]
New Q values:  [ -253.44886264 -1902.20915811   136.34002922    97.54551485]
Reward: -1  Episode Reward:  -4
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   81.51534051 -252.78192178]
------
Step:45, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   81.51534051 -252.78192178]
New Q values:  [-252.35169558   11.28108573   38.63539004 -252.78192178]
Reward: -1  Episode Reward:  -5
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    18.45665183    22.09751277]
------
Step:46, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   136.34002922    97.54551485]
New Q values:  [ -253.44886264 -1902.20915811   136.34002922    50.00882295]
Reward: -1  Episode Reward:  -6
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   38.63539004 -252.78192178]
------
Step:47, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   38.63539004 -252.78192178]
New Q values:  [-252.35169558   11.28108573   55.75616478 -252.78192178]
Reward: -1  Episode Reward:  -7
xxxxx
x a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   136.34002922    50.00882295]
------
Step:48, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   136.34002922    50.00882295]
New Q values:  [ -253.44886264 -1902.20915811    73.31170062    50.00882295]
Reward: -1  Episode Reward:  -8
xxxxx
x  ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           64.58562979]
------
Step:49, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           64.58562979]
New Q values:  [ -180.6        -4214.30421537  -180.6           47.2277621 ]
Reward: -1  Episode Reward:  -9
xxxxx
x a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    73.31170062    50.00882295]
------
Step:50, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254    18.45665183    22.09751277]
New Q values:  [ -281.736      -8877.87327254    20.95098936    22.09751277]
Reward: -1  Episode Reward:  -10
xxxxx
x  ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           47.2277621 ]
------
Step:51, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           47.2277621 ]
New Q values:  [ -180.6        -4214.30421537  -180.6           24.92035867]
Reward: -1  Episode Reward:  -11
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    20.95098936    22.09751277]
------
Step:52, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    20.95098936    22.09751277]
New Q values:  [ -281.736      -8877.87327254    20.95098936   210.96396737]
Reward: -1  Episode Reward:  -12
xxxxx
xa  x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8399.5673214    675.74987421  -180.6       ]
------
Step:53, Action:East
State  106
Old Q Values:  [ -180.6        -8399.5673214    675.74987421  -180.6       ]
New Q values:  [ -180.6       -8399.5673214   332.9891399  -180.6      ]
Reward: -1  Episode Reward:  -13
xxxxx
x a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    20.95098936   210.96396737]
------
Step:54, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    20.95098936   210.96396737]
New Q values:  [ -281.736      -8877.87327254    20.95098936   100.51243638]
Reward: -1  Episode Reward:  -14
xxxxx
xa  x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   55.75616478 -252.78192178]
------
Step:55, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   55.75616478 -252.78192178]
New Q values:  [-252.35169558   11.28108573   43.6959761  -252.78192178]
Reward: -1  Episode Reward:  -15
xxxxx
x a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    73.31170062    50.00882295]
------
Step:56, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    73.31170062    50.00882295]
New Q values:  [ -253.44886264 -1902.20915811    36.20078785    50.00882295]
Reward: -1  Episode Reward:  -16
xxxxx
x  ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           24.92035867]
------
Step:57, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           24.92035867]
New Q values:  [ -180.6        -4214.30421537  -180.6           39.52187438]
Reward: -1  Episode Reward:  -17
xxxxx
x a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    20.95098936   100.51243638]
------
Step:58, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    36.20078785    50.00882295]
New Q values:  [ -253.44886264 -1902.20915811    36.20078785    32.51232201]
Reward: -1  Episode Reward:  -18
xxxxx
xa  x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   43.6959761  -252.78192178]
------
Step:59, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   43.6959761  -252.78192178]
New Q values:  [-252.35169558   11.28108573   47.03212135 -252.78192178]
Reward: -1  Episode Reward:  -19
xxxxx
x a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    20.95098936   100.51243638]
------
Step:60, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    20.95098936   100.51243638]
New Q values:  [ -281.736      -8877.87327254    20.95098936   139.50171652]
Reward: -1  Episode Reward:  -20
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6       -8399.5673214   332.9891399  -180.6      ]
------
Step:61, Action:East
State  106
Old Q Values:  [ -180.6       -8399.5673214   332.9891399  -180.6      ]
New Q values:  [ -180.6        -8399.5673214    174.44617091  -180.6       ]
Reward: -1  Episode Reward:  -21
xxxxx
x a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    20.95098936   139.50171652]
------
Step:62, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    20.95098936   139.50171652]
New Q values:  [ -281.736      -8877.87327254    20.95098936    69.31032301]
Reward: -1  Episode Reward:  -22
xxxxx
xa  x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   47.03212135 -252.78192178]
------
Step:63, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   47.03212135 -252.78192178]
New Q values:  [-252.35169558   11.28108573   29.0730849  -252.78192178]
Reward: -1  Episode Reward:  -23
xxxxx
x a x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    36.20078785    32.51232201]
------
Step:64, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    36.20078785    32.51232201]
New Q values:  [ -253.44886264 -1902.20915811    25.73687746    32.51232201]
Reward: -1  Episode Reward:  -24
xxxxx
x  ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           39.52187438]
------
Step:65, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           39.52187438]
New Q values:  [ -180.6        -4214.30421537  -180.6           24.96244636]
Reward: -1  Episode Reward:  -25
xxxxx
x a x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    25.73687746    32.51232201]
------
Step:66, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    25.73687746    32.51232201]
New Q values:  [ -253.44886264 -1902.20915811    25.73687746    21.12685427]
Reward: -1  Episode Reward:  -26
xxxxx
xa  x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   29.0730849  -252.78192178]
------
Step:67, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   29.0730849  -252.78192178]
New Q values:  [-252.35169558   11.28108573   31.82233086 -252.78192178]
Reward: -1  Episode Reward:  -27
xxxxx
x a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    20.95098936    69.31032301]
------
Step:68, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    25.73687746    21.12685427]
New Q values:  [ -253.44886264 -1902.20915811    25.73687746    17.39744097]
Reward: -1  Episode Reward:  -28
xxxxx
xa  x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   31.82233086 -252.78192178]
------
Step:69, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   31.82233086 -252.78192178]
New Q values:  [-252.35169558   11.28108573   19.84999558 -252.78192178]
Reward: -1  Episode Reward:  -29
xxxxx
x a x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    25.73687746    17.39744097]
------
Step:70, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    25.73687746    17.39744097]
New Q values:  [ -253.44886264 -1902.20915811    17.18348489    17.39744097]
Reward: -1  Episode Reward:  -30
xxxxx
x  ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           24.96244636]
------
Step:71, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           24.96244636]
New Q values:  [ -180.6        -4214.30421537  -180.6           14.60421083]
Reward: -1  Episode Reward:  -31
xxxxx
x a x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    17.18348489    17.39744097]
------
Step:72, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    20.95098936    69.31032301]
New Q values:  [ -281.736      -8877.87327254    20.95098936    33.07912788]
Reward: -1  Episode Reward:  -32
xxxxx
xa  x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   19.84999558 -252.78192178]
------
Step:73, Action:East
State  106
Old Q Values:  [ -180.6        -8399.5673214    174.44617091  -180.6       ]
New Q values:  [ -180.6        -8399.5673214     79.10220673  -180.6       ]
Reward: -1  Episode Reward:  -33
xxxxx
x a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    20.95098936    33.07912788]
------
Step:74, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    20.95098936    33.07912788]
New Q values:  [ -281.736      -8877.87327254    20.95098936    36.36231317]
Reward: -1  Episode Reward:  -34
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8399.5673214     79.10220673  -180.6       ]
------
Step:75, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   19.84999558 -252.78192178]
New Q values:  [-252.35169558   11.28108573   18.24869218 -252.78192178]
Reward: -1  Episode Reward:  -35
xxxxx
x a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    20.95098936    36.36231317]
------
Step:76, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    20.95098936    36.36231317]
New Q values:  [ -281.736      -8877.87327254    20.95098936    37.67558729]
Reward: -1  Episode Reward:  -36
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8399.5673214     79.10220673  -180.6       ]
------
Step:77, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   18.24869218 -252.78192178]
New Q values:  [-252.35169558   11.28108573   18.00215306 -252.78192178]
Reward: -1  Episode Reward:  -37
xxxxx
x a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    20.95098936    37.67558729]
------
Step:78, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    17.18348489    17.39744097]
New Q values:  [ -253.44886264 -1902.20915811    17.18348489    11.75962231]
Reward: -1  Episode Reward:  -38
xxxxx
xa  x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   18.00215306 -252.78192178]
------
Step:79, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   18.00215306 -252.78192178]
New Q values:  [-252.35169558   11.28108573   11.75590669 -252.78192178]
Reward: -1  Episode Reward:  -39
xxxxx
x a x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    17.18348489    11.75962231]
------
Step:80, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    17.18348489    11.75962231]
New Q values:  [ -253.44886264 -1902.20915811    10.65465721    11.75962231]
Reward: -1  Episode Reward:  -40
xxxxx
x  ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           14.60421083]
------
Step:81, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           14.60421083]
New Q values:  [ -180.6        -4214.30421537  -180.6            8.76957102]
Reward: -1  Episode Reward:  -41
xxxxx
x a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    10.65465721    11.75962231]
------
Step:82, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    10.65465721    11.75962231]
New Q values:  [ -253.44886264 -1902.20915811    10.65465721     7.63062093]
Reward: -1  Episode Reward:  -42
xxxxx
xa  x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573   11.75590669 -252.78192178]
------
Step:83, Action:East
State  107
Old Q Values:  [-252.35169558   11.28108573   11.75590669 -252.78192178]
New Q values:  [-252.35169558   11.28108573    7.29875984 -252.78192178]
Reward: -1  Episode Reward:  -43
xxxxx
x a x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    10.65465721     7.63062093]
------
Step:84, Action:East
State  122
Old Q Values:  [ -281.736      -8877.87327254    20.95098936    37.67558729]
New Q values:  [ -281.736      -8877.87327254    10.41126705    37.67558729]
Reward: -1  Episode Reward:  -44
xxxxx
x  ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6            8.76957102]
------
Step:85, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6            8.76957102]
New Q values:  [ -180.6        -4214.30421537  -180.6           14.2105046 ]
Reward: -1  Episode Reward:  -45
xxxxx
x a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    10.41126705    37.67558729]
------
Step:86, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    10.41126705    37.67558729]
New Q values:  [ -281.736      -8877.87327254    10.41126705    38.20089693]
Reward: -1  Episode Reward:  -46
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8399.5673214     79.10220673  -180.6       ]
------
Step:87, Action:East
State  106
Old Q Values:  [ -180.6        -8399.5673214     79.10220673  -180.6       ]
New Q values:  [ -180.6        -8399.5673214     42.50115177  -180.6       ]
Reward: -1  Episode Reward:  -47
xxxxx
x a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    10.41126705    38.20089693]
------
Step:88, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    10.41126705    38.20089693]
New Q values:  [ -281.736      -8877.87327254    10.41126705    18.06468449]
Reward: -1  Episode Reward:  -48
xxxxx
xa  x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573    7.29875984 -252.78192178]
------
Step:89, Action:South
State  105
Old Q Values:  [ -180.6          162.4976908  -5851.25726525     0.        ]
New Q values:  [ -180.6          117.07335343 -5851.25726525     0.        ]
Reward: 9  Episode Reward:  -39
xxxxx
x g x
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  155.58092369    10.93436256 -1482.55814493  -180.6       ]
------
Step:90, Action:North
State  180
Old Q Values:  [-8211.13629329   266.69721195  1360.9092598      0.        ]
New Q values:  [-9105.01212451   266.69721195  1360.9092598      0.        ]
Reward: -10001  Episode Reward:  -10040
xxxxx
xg  x
x . x
x.  x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3821.73213265  2621.91013731 -1855.11188891 -3385.12952694]
------
Step:1, Action:North
State  216
Old Q Values:  [  351.30639948  2666.22053851 -6170.35693855 -1798.95296703]
New Q values:  [  150.18571117  2666.22053851 -6170.35693855 -1798.95296703]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           14.2105046 ]
------
Step:2, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           14.2105046 ]
New Q values:  [ -180.6        -4214.30421537  -180.6           16.50360719]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    10.41126705    18.06468449]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    10.41126705    18.06468449]
New Q values:  [ -281.736      -8877.87327254    10.41126705   123.50179363]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  156.16647704  369.58639943 -272.09726687]
------
Step:4, Action:East
State  110
Old Q Values:  [ -180.6        -1554.81716921    24.45149869  -180.6       ]
New Q values:  [ -180.6        -1554.81716921    46.23113757  -180.6       ]
Reward: -1  Episode Reward:  26
xxxxx
x a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    10.41126705   123.50179363]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    10.41126705   123.50179363]
New Q values:  [ -281.736      -8877.87327254    10.41126705    62.67005872]
Reward: -1  Episode Reward:  25
xxxxx
xa  x
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921    46.23113757  -180.6       ]
------
Step:6, Action:East
State  110
Old Q Values:  [ -180.6        -1554.81716921    46.23113757  -180.6       ]
New Q values:  [ -180.6        -1554.81716921    36.69347264  -180.6       ]
Reward: -1  Episode Reward:  24
xxxxx
x a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    10.41126705    62.67005872]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    10.41126705    62.67005872]
New Q values:  [ -281.736      -8877.87327254    10.41126705    35.47606528]
Reward: -1  Episode Reward:  23
xxxxx
xa  x
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921    36.69347264  -180.6       ]
------
Step:8, Action:East
State  110
Old Q Values:  [ -180.6        -1554.81716921    36.69347264  -180.6       ]
New Q values:  [ -180.6        -1554.81716921    24.72020864  -180.6       ]
Reward: -1  Episode Reward:  22
xxxxx
x a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    10.41126705    35.47606528]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -8877.87327254    10.41126705    35.47606528]
New Q values:  [ -281.736      -8877.87327254    10.41126705    21.0064887 ]
Reward: -1  Episode Reward:  21
xxxxx
xa  x
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921    24.72020864  -180.6       ]
------
Step:10, Action:East
State  110
Old Q Values:  [ -180.6        -1554.81716921    24.72020864  -180.6       ]
New Q values:  [ -180.6        -1554.81716921    15.59003007  -180.6       ]
Reward: -1  Episode Reward:  20
xxxxx
x a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -8877.87327254    10.41126705    21.0064887 ]
------
Step:11, Action:West
State  120
Old Q Values:  [-10156.11771313  -5995.686         871.2551891      66.32248434]
New Q values:  [-10156.11771313  -5995.686         871.2551891   -5682.33695745]
Reward: -10001  Episode Reward:  -9981
xxxxx
xg  x
x . x
x...x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2068.65592258 -8656.02923281 -7525.7277781    717.31417844]
------
Step:1, Action:West
State  288
Old Q Values:  [ 2068.65592258 -8656.02923281 -7525.7277781    717.31417844]
New Q values:  [ 2068.65592258 -8656.02923281 -7525.7277781    860.00911873]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1892.27815786   -98.41387629]
------
Step:2, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 1890.64224285 1973.41103886]
New Q values:  [  37.74111519 -168.92307549 1376.25367391 1973.41103886]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2068.65592258 -8656.02923281 -7525.7277781    860.00911873]
------
Step:3, Action:West
State  288
Old Q Values:  [ 2068.65592258 -8656.02923281 -7525.7277781    860.00911873]
New Q values:  [ 2068.65592258 -8656.02923281 -7525.7277781    911.08709485]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1892.27815786   -98.41387629]
------
Step:4, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 1376.25367391 1973.41103886]
New Q values:  [  37.74111519 -168.92307549 1170.49824634 1973.41103886]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2068.65592258 -8656.02923281 -7525.7277781    911.08709485]
------
Step:5, Action:West
State  288
Old Q Values:  [ 2068.65592258 -8656.02923281 -7525.7277781    911.08709485]
New Q values:  [ 2068.65592258 -8656.02923281 -7525.7277781    955.8581496 ]
Reward: -1  Episode Reward:  5
xxxxx
x..gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1170.49824634 1973.41103886]
------
Step:6, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 1170.49824634 1973.41103886]
New Q values:  [  37.74111519 -168.92307549 1170.49824634 1474.4914894 ]
Reward: 9  Episode Reward:  14
xxxxx
x..gx
x.. x
xa  x
xxxxx
Step:7, Action:West
State  261
Old Q Values:  [1602.34220323  -40.34168621 -792.9733772  -233.42018166]
New Q values:  [1602.34220323  -40.34168621 -792.9733772   206.7345883 ]
Reward: -301  Episode Reward:  -287
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1602.34220323  -40.34168621 -792.9733772   206.7345883 ]
------
Step:8, Action:North
State  261
Old Q Values:  [1602.34220323  -40.34168621 -792.9733772   206.7345883 ]
New Q values:  [2202.04449893  -40.34168621 -792.9733772   206.7345883 ]
Reward: 9  Episode Reward:  -278
xxxxx
x...x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  99.56325111  126.53579727 5185.69205879    0.        ]
------
Step:9, Action:East
State  183
Old Q Values:  [  99.56325111  126.53579727 5185.69205879    0.        ]
New Q values:  [  99.56325111  126.53579727 4997.91952105    0.        ]
Reward: 9  Episode Reward:  -269
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -5.12479912e+03  9.72747566e+03  1.20371620e+03]
------
Step:10, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  1.67512972e+03  1.31039399e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  1.67512972e+03  1.67007723e+03 -9.78728177e+01]
Reward: -1  Episode Reward:  -270
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3821.73213265  2621.91013731 -1855.11188891 -3385.12952694]
------
Step:11, Action:North
State  208
Old Q Values:  [ 3821.73213265  2621.91013731 -1855.11188891 -3385.12952694]
New Q values:  [18744.89893399  2621.91013731 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  -261
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 57369.35360312]
------
Step:12, Action:West
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 57369.35360312]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 57373.94092044]
Reward: 9  Episode Reward:  -252
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  27948.95045534 114735.99826397]
------
Step:13, Action:West
State  114
Old Q Values:  [  -180.6          4272.38349051  27948.95045534 114735.99826397]
New Q values:  [  -180.6          4272.38349051  27948.95045534 123044.4172474 ]
Reward: 100009  Episode Reward:  99757
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  155.58092369    10.93436256 -1482.55814493  -180.6       ]
------
Step:1, Action:North
State  181
Old Q Values:  [  155.58092369    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [  111.03150894    10.93436256 -1482.55814493  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  144.66379823 -171.53262854 -180.6       ]
------
Step:2, Action:South
State  108
Old Q Values:  [-8463.16477134   972.44682938   438.8916002      0.        ]
New Q values:  [-8463.16477134   796.65150969   438.8916002      0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xg. x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-9105.01212451   266.69721195  1360.9092598      0.        ]
------
Step:3, Action:East
State  189
Old Q Values:  [   9.84673294  322.31251728 -549.80333104 -244.98066897]
New Q values:  [   9.84673294  322.31251728  379.97224941 -244.98066897]
Reward: 9  Episode Reward:  17
xxxxx
x g x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1981.64527276  715.61136736  181.20343395]
------
Step:4, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  1.67512972e+03  1.67007723e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  1.24313534e+03  1.67007723e+03 -9.78728177e+01]
Reward: 9  Episode Reward:  26
xxxxx
x . x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1892.27815786   -98.41387629]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1892.27815786   -98.41387629]
New Q values:  [-2527.46239811 -8521.23367799  1382.90803992   -98.41387629]
Reward: 9  Episode Reward:  35
xxxxx
x . x
x  .x
x.gax
xxxxx
Step:6, Action:South
State  288
Old Q Values:  [ 2068.65592258 -8656.02923281 -7525.7277781    955.8581496 ]
New Q values:  [ 2068.65592258 -9022.41491635 -7525.7277781    955.8581496 ]
Reward: -10301  Episode Reward:  -10266
xxxxx
x . x
x  .x
x. gx
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    10.65465721     7.63062093]
------
Step:1, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    10.65465721     7.63062093]
New Q values:  [ -253.44886264 -1902.20915811    14.61294504     7.63062093]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           16.50360719]
------
Step:2, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           16.50360719]
New Q values:  [ -180.6        -4214.30421537  -180.6           12.30338949]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x.g x
x...x
xxxxx
Step:3, Action:South
State  122
Old Q Values:  [ -281.736      -8877.87327254    10.41126705    21.0064887 ]
New Q values:  [ -281.736      -9545.4473624     10.41126705    21.0064887 ]
Reward: -10001  Episode Reward:  -9993
xxxxx
x.a x
xg. x
x...x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2202.04449893  -40.34168621 -792.9733772   206.7345883 ]
------
Step:1, Action:North
State  261
Old Q Values:  [2202.04449893  -40.34168621 -792.9733772   206.7345883 ]
New Q values:  [ 919.52725225  -40.34168621 -792.9733772   206.7345883 ]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  111.03150894    10.93436256 -1482.55814493  -180.6       ]
------
Step:2, Action:North
State  183
Old Q Values:  [  99.56325111  126.53579727 4997.91952105    0.        ]
New Q values:  [ 156.10122027  126.53579727 4997.91952105    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xa. x
x ..x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  156.16647704  369.58639943 -272.09726687]
------
Step:3, Action:East
State  111
Old Q Values:  [-177.44732869  156.16647704  369.58639943 -272.09726687]
New Q values:  [-177.44732869  156.16647704  157.61844328 -272.09726687]
Reward: 9  Episode Reward:  27
xxxxx
x a x
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    14.61294504     7.63062093]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    14.61294504     7.63062093]
New Q values:  [ -253.44886264 -1902.20915811     8.93619486     7.63062093]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x ..x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6           12.30338949]
------
Step:5, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6           12.30338949]
New Q values:  [ -180.6        -4214.30421537  -180.6         3126.15176691]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x ..x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[    0.          2047.43645092 10406.10137037   233.92161362]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     10.41126705    21.0064887 ]
New Q values:  [ -281.736      -9545.4473624    941.41003689    21.0064887 ]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -4214.30421537  -180.6         3126.15176691]
------
Step:7, Action:West
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6         3126.15176691]
New Q values:  [ -180.6        -4214.30421537  -180.6         1532.28371783]
Reward: -1  Episode Reward:  23
xxxxx
x a x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    941.41003689    21.0064887 ]
------
Step:8, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686         871.2551891   -5682.33695745]
New Q values:  [-10156.11771313  -5995.686        1034.21181057  -5682.33695745]
Reward: -1  Episode Reward:  22
xxxxx
xg ax
x ..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  2287.69911643 -6245.61866138    52.42525677]
------
Step:9, Action:South
State  138
Old Q Values:  [ -180.6        -4214.30421537  -180.6         1532.28371783]
New Q values:  [-180.6        -880.4555246  -180.6        1532.28371783]
Reward: 9  Episode Reward:  31
xxxxx
x   x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  150.18571117  2666.22053851 -6170.35693855 -1798.95296703]
------
Step:10, Action:South
State  208
Old Q Values:  [18744.89893399  2621.91013731 -1855.11188891 -3385.12952694]
New Q values:  [18744.89893399  1674.7608317  -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  40
xxxxx
xg  x
x . x
x .ax
xxxxx
Step:11, Action:North
State  288
Old Q Values:  [ 2068.65592258 -9022.41491635 -7525.7277781    955.8581496 ]
New Q values:  [ 6450.33204923 -9022.41491635 -7525.7277781    955.8581496 ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18744.89893399  1674.7608317  -1855.11188891 -3385.12952694]
------
Step:12, Action:North
State  216
Old Q Values:  [  150.18571117  2666.22053851 -6170.35693855 -1798.95296703]
New Q values:  [  519.15939982  2666.22053851 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6        1532.28371783]
------
Step:13, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6        1532.28371783]
New Q values:  [-180.6       -880.4555246 -180.6        894.7364982]
Reward: -1  Episode Reward:  37
xxxxx
x a x
xg. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    941.41003689    21.0064887 ]
------
Step:14, Action:East
State  120
Old Q Values:  [-10156.11771313  -5995.686        1034.21181057  -5682.33695745]
New Q values:  [-10156.11771313  -5995.686        1099.39445916  -5682.33695745]
Reward: -1  Episode Reward:  36
xxxxx
xg ax
x . x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  2287.69911643 -6245.61866138    52.42525677]
------
Step:15, Action:South
State  136
Old Q Values:  [ -724.71310357  2287.69911643 -6245.61866138    52.42525677]
New Q values:  [ -724.71310357  6537.94932677 -6245.61866138    52.42525677]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x .ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18744.89893399  1674.7608317  -1855.11188891 -3385.12952694]
------
Step:16, Action:North
State  216
Old Q Values:  [  519.15939982  2666.22053851 -6170.35693855 -1798.95296703]
New Q values:  [  475.48470939  2666.22053851 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6        894.7364982]
------
Step:17, Action:West
State  136
Old Q Values:  [ -724.71310357  6537.94932677 -6245.61866138    52.42525677]
New Q values:  [ -724.71310357  6537.94932677 -6245.61866138 -5649.81155954]
Reward: -10001  Episode Reward:  -9967
xxxxx
x g x
x . x
x . x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  144.66379823 -171.53262854 -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869  156.16647704  157.61844328 -272.09726687]
New Q values:  [-177.44732869  101.1760435   157.61844328 -272.09726687]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  111.03150894    10.93436256 -1482.55814493  -180.6       ]
------
Step:2, Action:North
State  181
Old Q Values:  [  111.03150894    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [   87.21174305    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
xa.gx
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  144.66379823 -171.53262854 -180.6       ]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869  101.1760435   157.61844328 -272.09726687]
New Q values:  [-177.44732869   66.03394031  157.61844328 -272.09726687]
Reward: -1  Episode Reward:  7
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   87.21174305    10.93436256 -1482.55814493  -180.6       ]
------
Step:4, Action:North
State  183
Old Q Values:  [ 156.10122027  126.53579727 4997.91952105    0.        ]
New Q values:  [ 109.12602109  126.53579727 4997.91952105    0.        ]
Reward: -1  Episode Reward:  6
xxxxx
xa. x
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   66.03394031  157.61844328 -272.09726687]
------
Step:5, Action:East
State  111
Old Q Values:  [-177.44732869   66.03394031  157.61844328 -272.09726687]
New Q values:  [-177.44732869   66.03394031  350.87038838 -272.09726687]
Reward: 9  Episode Reward:  15
xxxxx
x a x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    941.41003689    21.0064887 ]
------
Step:6, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811     8.93619486     7.63062093]
New Q values:  [ -253.44886264 -1902.20915811   271.3954274      7.63062093]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6        894.7364982]
------
Step:7, Action:West
State  138
Old Q Values:  [-180.6       -880.4555246 -180.6        894.7364982]
New Q values:  [-180.6       -880.4555246 -180.6        438.7132275]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   271.3954274      7.63062093]
------
Step:8, Action:East
State  121
Old Q Values:  [    0.             0.         -9726.26759903    76.59116932]
New Q values:  [    0.             0.         -7929.72224158    76.59116932]
Reward: -10001  Episode Reward:  -9988
xxxxx
x  gx
x ..x
x...x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   87.21174305    10.93436256 -1482.55814493  -180.6       ]
------
Step:1, Action:North
State  181
Old Q Values:  [   87.21174305    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [   83.68383669    10.93436256 -1482.55814493  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
xa.gx
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  144.66379823 -171.53262854 -180.6       ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869   66.03394031  350.87038838 -272.09726687]
New Q values:  [-177.44732869   50.91872713  350.87038838 -272.09726687]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   83.68383669    10.93436256 -1482.55814493  -180.6       ]
------
Step:3, Action:North
State  181
Old Q Values:  [   83.68383669    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [  175.34520886    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
xa..x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        474.9055806    0.           0.       ]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869   50.91872713  350.87038838 -272.09726687]
New Q values:  [-177.44732869   72.37105351  350.87038838 -272.09726687]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  175.34520886    10.93436256 -1482.55814493  -180.6       ]
------
Step:5, Action:North
State  183
Old Q Values:  [ 109.12602109  126.53579727 4997.91952105    0.        ]
New Q values:  [ 185.52208262  126.53579727 4997.91952105    0.        ]
Reward: -1  Episode Reward:  5
xxxxx
xa..x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        474.9055806    0.           0.       ]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869   72.37105351  350.87038838 -272.09726687]
New Q values:  [-177.44732869   80.95198406  350.87038838 -272.09726687]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
xa.gx
x...x
xxxxx
Step:7, Action:South
State  183
Old Q Values:  [ 185.52208262  126.53579727 4997.91952105    0.        ]
New Q values:  [ 185.52208262  331.87249459 4997.91952105    0.        ]
Reward: 9  Episode Reward:  13
xxxxx
x ..x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 919.52725225  -40.34168621 -792.9733772   206.7345883 ]
------
Step:8, Action:North
State  261
Old Q Values:  [ 919.52725225  -40.34168621 -792.9733772   206.7345883 ]
New Q values:  [1866.58675722  -40.34168621 -792.9733772   206.7345883 ]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
xa. x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 185.52208262  331.87249459 4997.91952105    0.        ]
------
Step:9, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  1869.80484129     0.        ]
New Q values:  [    0.         -5969.29177534  3671.56463405     0.        ]
Reward: 9  Episode Reward:  21
xxxxx
x ..x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -5.12479912e+03  9.72747566e+03  1.20371620e+03]
------
Step:10, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  1.24313534e+03  1.67007723e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  1.24313534e+03  6.29090057e+03 -9.78728177e+01]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18744.89893399  1674.7608317  -1855.11188891 -3385.12952694]
------
Step:11, Action:North
State  216
Old Q Values:  [  475.48470939  2666.22053851 -6170.35693855 -1798.95296703]
New Q values:  [  327.20785201  2666.22053851 -6170.35693855 -1798.95296703]
Reward: 9  Episode Reward:  29
xxxxx
x .ax
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6        438.7132275]
------
Step:12, Action:West
State  136
Old Q Values:  [ -724.71310357  6537.94932677 -6245.61866138 -5649.81155954]
New Q values:  [ -724.71310357  6537.94932677 -6245.61866138 -7924.70628607]
Reward: -9991  Episode Reward:  -9962
xxxxx
x g x
x   x
x ..x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18744.89893399  1674.7608317  -1855.11188891 -3385.12952694]
------
Step:1, Action:North
State  216
Old Q Values:  [  327.20785201  2666.22053851 -6170.35693855 -1798.95296703]
New Q values:  [ 2097.66793883  2666.22053851 -6170.35693855 -1798.95296703]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  6537.94932677 -6245.61866138 -7924.70628607]
------
Step:2, Action:South
State  136
Old Q Values:  [ -724.71310357  6537.94932677 -6245.61866138 -7924.70628607]
New Q values:  [ -724.71310357  8238.04941091 -6245.61866138 -7924.70628607]
Reward: -1  Episode Reward:  8
xxxxx
x .gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18744.89893399  1674.7608317  -1855.11188891 -3385.12952694]
------
Step:3, Action:South
State  208
Old Q Values:  [18744.89893399  1674.7608317  -1855.11188891 -3385.12952694]
New Q values:  [18744.89893399  2610.40394745 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  17
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6450.33204923 -9022.41491635 -7525.7277781    955.8581496 ]
------
Step:4, Action:North
State  288
Old Q Values:  [ 6450.33204923 -9022.41491635 -7525.7277781    955.8581496 ]
New Q values:  [ 8203.00249989 -9022.41491635 -7525.7277781    955.8581496 ]
Reward: -1  Episode Reward:  16
xxxxx
x . x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18744.89893399  2610.40394745 -1855.11188891 -3385.12952694]
------
Step:5, Action:North
State  210
Old Q Values:  [  259.319897   -2230.36055317  -180.6            3.52184257]
New Q values:  [  234.74192705 -2230.36055317  -180.6            3.52184257]
Reward: -1  Episode Reward:  15
xxxxx
x .ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6        438.7132275]
------
Step:6, Action:West
State  138
Old Q Values:  [-180.6       -880.4555246 -180.6        438.7132275]
New Q values:  [-180.6        -880.4555246  -180.6         463.30830207]
Reward: 9  Episode Reward:  24
xxxxx
x a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    941.41003689    21.0064887 ]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    941.41003689    21.0064887 ]
New Q values:  [ -281.736      -9545.4473624    514.95650538    21.0064887 ]
Reward: -1  Episode Reward:  23
xxxxx
x  ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         463.30830207]
------
Step:8, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         463.30830207]
New Q values:  [-180.6        -880.4555246  -180.6         339.21027244]
Reward: -1  Episode Reward:  22
xxxxx
x a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    514.95650538    21.0064887 ]
------
Step:9, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    514.95650538    21.0064887 ]
New Q values:  [ -281.736      -9545.4473624    307.14568388    21.0064887 ]
Reward: -1  Episode Reward:  21
xxxxx
x  ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         339.21027244]
------
Step:10, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         339.21027244]
New Q values:  [-180.6        -880.4555246  -180.6         227.22781414]
Reward: -1  Episode Reward:  20
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    307.14568388    21.0064887 ]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    307.14568388    21.0064887 ]
New Q values:  [ -281.736     -9545.4473624   190.4266178    21.0064887]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         227.22781414]
------
Step:12, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         227.22781414]
New Q values:  [-180.6       -880.4555246 -180.6        147.419111 ]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736     -9545.4473624   190.4266178    21.0064887]
------
Step:13, Action:East
State  122
Old Q Values:  [ -281.736     -9545.4473624   190.4266178    21.0064887]
New Q values:  [ -281.736      -9545.4473624    119.79638042    21.0064887 ]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6        147.419111 ]
------
Step:14, Action:West
State  138
Old Q Values:  [-180.6       -880.4555246 -180.6        147.419111 ]
New Q values:  [-180.6        -880.4555246  -180.6          94.30655852]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    119.79638042    21.0064887 ]
------
Step:15, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    119.79638042    21.0064887 ]
New Q values:  [ -281.736      -9545.4473624     75.61051972    21.0064887 ]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          94.30655852]
------
Step:16, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          94.30655852]
New Q values:  [-180.6        -880.4555246  -180.6          59.80577933]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x.g x
x.. x
xxxxx
Step:17, Action:South
State  120
Old Q Values:  [-10156.11771313  -5995.686        1099.39445916  -5682.33695745]
New Q values:  [-10156.11771313  -8069.05606225   1099.39445916  -5682.33695745]
Reward: -10001  Episode Reward:  -9987
xxxxx
x g x
x.. x
x.. x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 14927.26725394  6267.88141429  2546.60363946]
------
Step:1, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -5.12479912e+03  9.72747566e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.62964724e+03  9.72747566e+03  1.20371620e+03]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. .x
x.g x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1866.58675722  -40.34168621 -792.9733772   206.7345883 ]
------
Step:1, Action:North
State  260
Old Q Values:  [  706.37210872 -8695.4397473    632.98185518 -7094.93143822]
New Q values:  [ 1389.4182337  -8695.4397473    632.98185518 -7094.93143822]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3671.56463405     0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 185.52208262  331.87249459 4997.91952105    0.        ]
New Q values:  [ 185.52208262  331.87249459 4922.81050595    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.62964724e+03  9.72747566e+03  1.20371620e+03]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.62964724e+03  9.72747566e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.62964724e+03  3.96681284e+03  1.20371620e+03]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  234.74192705 -2230.36055317  -180.6            3.52184257]
------
Step:4, Action:North
State  208
Old Q Values:  [18744.89893399  2610.40394745 -1855.11188891 -3385.12952694]
New Q values:  [24715.54184973  2610.40394745 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 57373.94092044]
------
Step:5, Action:West
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 57373.94092044]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 59868.30154239]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  27948.95045534 123044.4172474 ]
------
Step:6, Action:West
State  126
Old Q Values:  [    0.          2047.43645092 10406.10137037   233.92161362]
New Q values:  [    0.          2047.43645092 10406.10137037   204.22976196]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   80.95198406  350.87038838 -272.09726687]
------
Step:7, Action:East
State  111
Old Q Values:  [-177.44732869   80.95198406  350.87038838 -272.09726687]
New Q values:  [-177.44732869   80.95198406 3261.57856646 -272.09726687]
Reward: -1  Episode Reward:  53
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[    0.          2047.43645092 10406.10137037   204.22976196]
------
Step:8, Action:East
State  126
Old Q Values:  [    0.          2047.43645092 10406.10137037   204.22976196]
New Q values:  [   0.         2047.43645092 4179.78228195  204.22976196]
Reward: -1  Episode Reward:  52
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          59.80577933]
------
Step:9, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          59.80577933]
New Q values:  [-180.6        -880.4555246  -180.6        1277.25699631]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.         2047.43645092 4179.78228195  204.22976196]
------
Step:10, Action:East
State  127
Old Q Values:  [  0.           1.67014986   5.38992087 403.43324931]
New Q values:  [  0.           1.67014986 384.73306724 403.43324931]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6        1277.25699631]
------
Step:11, Action:West
State  136
Old Q Values:  [ -724.71310357  8238.04941091 -6245.61866138 -7924.70628607]
New Q values:  [ -724.71310357  8238.04941091 -6245.61866138 -3170.15807104]
Reward: -1  Episode Reward:  49
xxxxx
x agx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         1.08147795]
------
Step:12, Action:West
State  127
Old Q Values:  [  0.           1.67014986 384.73306724 403.43324931]
New Q values:  [   0.            1.67014986  384.73306724 1139.24686966]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   80.95198406 3261.57856646 -272.09726687]
------
Step:13, Action:East
State  111
Old Q Values:  [-177.44732869   80.95198406 3261.57856646 -272.09726687]
New Q values:  [-177.44732869   80.95198406 2557.96611117 -272.09726687]
Reward: -1  Episode Reward:  47
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.         2047.43645092 4179.78228195  204.22976196]
------
Step:14, Action:East
State  124
Old Q Values:  [ 0.00000000e+00  5.40000000e+00 -5.98640012e+03  3.41121603e+02]
New Q values:  [  0.           5.4         76.25477662 341.12160345]
Reward: -1  Episode Reward:  46
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  8238.04941091 -6245.61866138 -3170.15807104]
------
Step:15, Action:South
State  136
Old Q Values:  [ -724.71310357  8238.04941091 -6245.61866138 -3170.15807104]
New Q values:  [ -724.71310357  4094.48592591 -6245.61866138 -3170.15807104]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2097.66793883  2666.22053851 -6170.35693855 -1798.95296703]
------
Step:16, Action:South
State  208
Old Q Values:  [24715.54184973  2610.40394745 -1855.11188891 -3385.12952694]
New Q values:  [24715.54184973 63510.46232895 -1855.11188891 -3385.12952694]
Reward: 100009  Episode Reward:  100054
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8203.00249989 -9022.41491635 -7525.7277781    955.8581496 ]
------
Step:1, Action:North
State  288
Old Q Values:  [ 8203.00249989 -9022.41491635 -7525.7277781    955.8581496 ]
New Q values:  [22339.73969864 -9022.41491635 -7525.7277781    955.8581496 ]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24715.54184973 63510.46232895 -1855.11188891 -3385.12952694]
------
Step:2, Action:South
State  208
Old Q Values:  [24715.54184973 63510.46232895 -1855.11188891 -3385.12952694]
New Q values:  [24715.54184973 32105.50684117 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  8
xxxxx
x .gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[22339.73969864 -9022.41491635 -7525.7277781    955.8581496 ]
------
Step:3, Action:North
State  288
Old Q Values:  [22339.73969864 -9022.41491635 -7525.7277781    955.8581496 ]
New Q values:  [18566.94793181 -9022.41491635 -7525.7277781    955.8581496 ]
Reward: -1  Episode Reward:  7
xxxxx
x .gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24715.54184973 32105.50684117 -1855.11188891 -3385.12952694]
------
Step:4, Action:South
State  208
Old Q Values:  [24715.54184973 32105.50684117 -1855.11188891 -3385.12952694]
New Q values:  [24715.54184973 18411.68711601 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  6
xxxxx
x .gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18566.94793181 -9022.41491635 -7525.7277781    955.8581496 ]
------
Step:5, Action:North
State  288
Old Q Values:  [18566.94793181 -9022.41491635 -7525.7277781    955.8581496 ]
New Q values:  [ 8840.84172764 -9022.41491635 -7525.7277781    955.8581496 ]
Reward: -10001  Episode Reward:  -9995
xxxxx
x ..x
x..gx
x.. x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  175.34520886    10.93436256 -1482.55814493  -180.6       ]
------
Step:1, Action:North
State  181
Old Q Values:  [  175.34520886    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [  118.93722301    10.93436256 -1482.55814493  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
xa.gx
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  144.66379823 -171.53262854 -180.6       ]
------
Step:2, Action:South
State  109
Old Q Values:  [-241.10880094  144.66379823 -171.53262854 -180.6       ]
New Q values:  [-241.10880094   92.94668619 -171.53262854 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x g.x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  118.93722301    10.93436256 -1482.55814493  -180.6       ]
------
Step:3, Action:North
State  181
Old Q Values:  [  118.93722301    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [   74.85889506    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
xa.gx
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   92.94668619 -171.53262854 -180.6       ]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869   80.95198406 2557.96611117 -272.09726687]
New Q values:  [-177.44732869   54.23846214 2557.96611117 -272.09726687]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   74.85889506    10.93436256 -1482.55814493  -180.6       ]
------
Step:5, Action:North
State  183
Old Q Values:  [ 185.52208262  331.87249459 4922.81050595    0.        ]
New Q values:  [ 216.08050723  331.87249459 4922.81050595    0.        ]
Reward: -1  Episode Reward:  5
xxxxx
xa..x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        474.9055806    0.           0.       ]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869   54.23846214 2557.96611117 -272.09726687]
New Q values:  [-177.44732869   43.55305338 2557.96611117 -272.09726687]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   74.85889506    10.93436256 -1482.55814493  -180.6       ]
------
Step:7, Action:North
State  181
Old Q Values:  [   74.85889506    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [   57.22756388    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  3
xxxxx
xa.gx
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   92.94668619 -171.53262854 -180.6       ]
------
Step:8, Action:South
State  109
Old Q Values:  [-241.10880094   92.94668619 -171.53262854 -180.6       ]
New Q values:  [-241.10880094   53.74694364 -171.53262854 -180.6       ]
Reward: -1  Episode Reward:  2
xxxxx
x g.x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   57.22756388    10.93436256 -1482.55814493  -180.6       ]
------
Step:9, Action:North
State  181
Old Q Values:  [   57.22756388    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [   38.41510865    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  1
xxxxx
xa.gx
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   53.74694364 -171.53262854 -180.6       ]
------
Step:10, Action:South
State  111
Old Q Values:  [-177.44732869   43.55305338 2557.96611117 -272.09726687]
New Q values:  [-177.44732869   28.34575394 2557.96611117 -272.09726687]
Reward: -1  Episode Reward:  0
xxxxx
x ..x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   38.41510865    10.93436256 -1482.55814493  -180.6       ]
------
Step:11, Action:North
State  183
Old Q Values:  [ 216.08050723  331.87249459 4922.81050595    0.        ]
New Q values:  [ 228.30387707  331.87249459 4922.81050595    0.        ]
Reward: -1  Episode Reward:  -1
xxxxx
xa..x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        474.9055806    0.           0.       ]
------
Step:12, Action:South
State  103
Old Q Values:  [-180.6        474.9055806    0.           0.       ]
New Q values:  [-180.6        1666.20538403    0.            0.        ]
Reward: -1  Episode Reward:  -2
xxxxx
x ..x
xa. x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 228.30387707  331.87249459 4922.81050595    0.        ]
------
Step:13, Action:East
State  183
Old Q Values:  [ 228.30387707  331.87249459 4922.81050595    0.        ]
New Q values:  [ 228.30387707  331.87249459 2399.71824529    0.        ]
Reward: 9  Episode Reward:  7
xxxxx
x ..x
x a x
x..gx
xxxxx
Step:14, Action:East
State  201
Old Q Values:  [ 2.33354578e+00  1.66210281e+03 -9.35708462e+03  2.40000000e-02]
New Q values:  [ 2.33354578e+00  1.66210281e+03 -8.94356769e+03  2.40000000e-02]
Reward: -10001  Episode Reward:  -9994
xxxxx
x ..x
x  gx
x...x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  234.74192705 -2230.36055317  -180.6            3.52184257]
------
Step:1, Action:North
State  208
Old Q Values:  [24715.54184973 18411.68711601 -1855.11188891 -3385.12952694]
New Q values:  [10274.79383879 18411.68711601 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6        1277.25699631]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6        1277.25699631]
New Q values:  [-180.6        -880.4555246  -180.6         538.98595444]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     75.61051972    21.0064887 ]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     75.61051972    21.0064887 ]
New Q values:  [ -281.736      -9545.4473624    191.33999422    21.0064887 ]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         538.98595444]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         538.98595444]
New Q values:  [-180.6       -880.4555246 -180.6        296.41301  ]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   271.3954274      7.63062093]
------
Step:5, Action:East
State  121
Old Q Values:  [    0.             0.         -7929.72224158    76.59116932]
New Q values:  [    0.             0.         -7944.14311886    76.59116932]
Reward: -10001  Episode Reward:  -9985
xxxxx
x. gx
x.. x
x. .x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   271.3954274      7.63062093]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    191.33999422    21.0064887 ]
New Q values:  [ -281.736      -9545.4473624    170.85990069    21.0064887 ]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6        296.41301  ]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6       -880.4555246 -180.6        296.41301  ]
New Q values:  [-180.6        -880.4555246  -180.6         199.38383222]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   271.3954274      7.63062093]
------
Step:3, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   271.3954274      7.63062093]
New Q values:  [ -253.44886264 -1902.20915811   167.77332063     7.63062093]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         199.38383222]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         199.38383222]
New Q values:  [-180.6        -880.4555246  -180.6         130.41150309]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    170.85990069    21.0064887 ]
------
Step:5, Action:East
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   1099.39445916  -5682.33695745]
New Q values:  [-10156.11771313  -8069.05606225   1667.50356144  -5682.33695745]
Reward: -1  Episode Reward:  5
xxxxx
x.gax
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  4094.48592591 -6245.61866138 -3170.15807104]
------
Step:6, Action:South
State  136
Old Q Values:  [ -724.71310357  4094.48592591 -6245.61866138 -3170.15807104]
New Q values:  [ -724.71310357  7166.70050517 -6245.61866138 -3170.15807104]
Reward: 9  Episode Reward:  14
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10274.79383879 18411.68711601 -1855.11188891 -3385.12952694]
------
Step:7, Action:South
State  208
Old Q Values:  [10274.79383879 18411.68711601 -1855.11188891 -3385.12952694]
New Q values:  [10274.79383879 10016.3273647  -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  13
xxxxx
xg  x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8840.84172764 -9022.41491635 -7525.7277781    955.8581496 ]
------
Step:8, Action:North
State  288
Old Q Values:  [ 8840.84172764 -9022.41491635 -7525.7277781    955.8581496 ]
New Q values:  [ 6618.17484269 -9022.41491635 -7525.7277781    955.8581496 ]
Reward: -1  Episode Reward:  12
xxxxx
x.g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10274.79383879 10016.3273647  -1855.11188891 -3385.12952694]
------
Step:9, Action:North
State  208
Old Q Values:  [10274.79383879 10016.3273647  -1855.11188891 -3385.12952694]
New Q values:  [ 4148.44098644 10016.3273647  -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         130.41150309]
------
Step:10, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         130.41150309]
New Q values:  [-180.6        -880.4555246  -180.6         102.82257144]
Reward: -1  Episode Reward:  10
xxxxx
x.a x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    170.85990069    21.0064887 ]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    170.85990069    21.0064887 ]
New Q values:  [ -281.736      -9545.4473624     98.59073171    21.0064887 ]
Reward: -1  Episode Reward:  9
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         102.82257144]
------
Step:12, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         102.82257144]
New Q values:  [-180.6        -880.4555246  -180.6          90.86102477]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   167.77332063     7.63062093]
------
Step:13, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   167.77332063     7.63062093]
New Q values:  [ -253.44886264 -1902.20915811    93.76763568     7.63062093]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          90.86102477]
------
Step:14, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          90.86102477]
New Q values:  [-180.6        -880.4555246  -180.6          63.87470061]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    93.76763568     7.63062093]
------
Step:15, Action:East
State  121
Old Q Values:  [    0.             0.         -7944.14311886    76.59116932]
New Q values:  [    0.             0.         -7028.24709599    76.59116932]
Reward: -10001  Episode Reward:  -9995
xxxxx
x. gx
x.. x
x.. x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   38.41510865    10.93436256 -1482.55814493  -180.6       ]
------
Step:1, Action:North
State  183
Old Q Values:  [ 228.30387707  331.87249459 2399.71824529    0.        ]
New Q values:  [ 596.58316604  331.87249459 2399.71824529    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        1666.20538403    0.            0.        ]
------
Step:2, Action:South
State  103
Old Q Values:  [-180.6        1666.20538403    0.            0.        ]
New Q values:  [-180.6       1385.7976272    0.           0.       ]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xa. x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 596.58316604  331.87249459 2399.71824529    0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [ 596.58316604  331.87249459 2399.71824529    0.        ]
New Q values:  [ 596.58316604  331.87249459 1390.48134102    0.        ]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[   3.60604218 1417.31347636   16.53929541    0.        ]
------
Step:4, Action:South
State  193
Old Q Values:  [-5922.26708831  4390.31246547 -3909.58186816  4353.80449455]
New Q values:  [-5922.26708831  2203.87243301 -3909.58186816  4353.80449455]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1170.49824634 1474.4914894 ]
------
Step:5, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 1170.49824634 1474.4914894 ]
New Q values:  [  37.74111519 -168.92307549 1170.49824634 1155.17262292]
Reward: 9  Episode Reward:  35
xxxxx
x .gx
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1866.58675722  -40.34168621 -792.9733772   206.7345883 ]
------
Step:6, Action:North
State  261
Old Q Values:  [1866.58675722  -40.34168621 -792.9733772   206.7345883 ]
New Q values:  [ 757.55923548  -40.34168621 -792.9733772   206.7345883 ]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   38.41510865    10.93436256 -1482.55814493  -180.6       ]
------
Step:7, Action:North
State  181
Old Q Values:  [   38.41510865    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [   34.62799388    10.93436256 -1482.55814493  -180.6       ]
Reward: -1  Episode Reward:  33
xxxxx
xa.gx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[ 0.         66.20650142  0.          0.        ]
------
Step:8, Action:South
State  103
Old Q Values:  [-180.6       1385.7976272    0.           0.       ]
New Q values:  [-180.6         564.10744904    0.            0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   34.62799388    10.93436256 -1482.55814493  -180.6       ]
------
Step:9, Action:North
State  183
Old Q Values:  [ 596.58316604  331.87249459 1390.48134102    0.        ]
New Q values:  [ 407.26550113  331.87249459 1390.48134102    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
xa..x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         564.10744904    0.            0.        ]
------
Step:10, Action:South
State  103
Old Q Values:  [-180.6         564.10744904    0.            0.        ]
New Q values:  [-180.6         642.18738192    0.            0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 407.26550113  331.87249459 1390.48134102    0.        ]
------
Step:11, Action:East
State  181
Old Q Values:  [   34.62799388    10.93436256 -1482.55814493  -180.6       ]
New Q values:  [   34.62799388    10.93436256 -4706.35308576  -180.6       ]
Reward: -10001  Episode Reward:  -9971
xxxxx
x ..x
x g x
x  .x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.62964724e+03  3.96681284e+03  1.20371620e+03]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.62964724e+03  3.96681284e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.62964724e+03  1.66254771e+03  1.20371620e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  234.74192705 -2230.36055317  -180.6            3.52184257]
------
Step:2, Action:North
State  210
Old Q Values:  [  234.74192705 -2230.36055317  -180.6            3.52184257]
New Q values:  [  118.459181   -2230.36055317  -180.6            3.52184257]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          63.87470061]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          63.87470061]
New Q values:  [ -180.6         -880.4555246   -180.6        36944.27505446]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  27948.95045534 123044.4172474 ]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     98.59073171    21.0064887 ]
New Q values:  [ -281.736      -9545.4473624     98.59073171    26.55294101]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8399.5673214     42.50115177  -180.6       ]
------
Step:5, Action:East
State  106
Old Q Values:  [ -180.6        -8399.5673214     42.50115177  -180.6       ]
New Q values:  [ -180.6        -8399.5673214     45.97768022  -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     98.59073171    26.55294101]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     98.59073171    26.55294101]
New Q values:  [ -281.736      -9545.4473624  11122.11880902    26.55294101]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6         -880.4555246   -180.6        36944.27505446]
------
Step:7, Action:West
State  138
Old Q Values:  [ -180.6         -880.4555246   -180.6        36944.27505446]
New Q values:  [ -180.6         -880.4555246   -180.6        14805.24031249]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    93.76763568     7.63062093]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624  11122.11880902    26.55294101]
New Q values:  [ -281.736      -9545.4473624   8889.81961736    26.55294101]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6         -880.4555246   -180.6        14805.24031249]
------
Step:9, Action:West
State  138
Old Q Values:  [ -180.6         -880.4555246   -180.6        14805.24031249]
New Q values:  [-180.6       -880.4555246 -180.6       5949.6264157]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    93.76763568     7.63062093]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   8889.81961736    26.55294101]
New Q values:  [ -281.736      -9545.4473624   5340.21577165    26.55294101]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6       5949.6264157]
------
Step:11, Action:West
State  138
Old Q Values:  [-180.6       -880.4555246 -180.6       5949.6264157]
New Q values:  [-180.6        -880.4555246  -180.6        2407.38085698]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    93.76763568     7.63062093]
------
Step:12, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    93.76763568     7.63062093]
New Q values:  [ -253.44886264 -1902.20915811   759.12131137     7.63062093]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6        2407.38085698]
------
Step:13, Action:West
State  136
Old Q Values:  [ -724.71310357  7166.70050517 -6245.61866138 -3170.15807104]
New Q values:  [ -724.71310357  7166.70050517 -6245.61866138 -1245.68587762]
Reward: -1  Episode Reward:  27
xxxxx
x agx
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7028.24709599    76.59116932]
------
Step:14, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   759.12131137     7.63062093]
New Q values:  [ -253.44886264 -1902.20915811   759.12131137     5.83657409]
Reward: -1  Episode Reward:  26
xxxxx
xa  x
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573    7.29875984 -252.78192178]
------
Step:15, Action:South
State  105
Old Q Values:  [ -180.6          117.07335343 -5851.25726525     0.        ]
New Q values:  [ -180.6          265.81186233 -5851.25726525     0.        ]
Reward: 9  Episode Reward:  35
xxxxx
x  gx
xa  x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00  7.11941737e+02 -1.78980000e+02]
------
Step:16, Action:East
State  185
Old Q Values:  [-6.00000000e-01  0.00000000e+00  7.11941737e+02 -1.78980000e+02]
New Q values:  [-6.00000000e-01  0.00000000e+00  8.78670276e+02 -1.78980000e+02]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x a x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1981.64527276  715.61136736  181.20343395]
------
Step:17, Action:South
State  200
Old Q Values:  [  62.8218634  1981.64527276  715.61136736  181.20343395]
New Q values:  [  62.8218634  1206.93052108  715.61136736  181.20343395]
Reward: -1  Episode Reward:  33
xxxxx
xg  x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1382.90803992   -98.41387629]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1382.90803992   -98.41387629]
New Q values:  [-2527.46239811 -8521.23367799  2544.01566877   -98.41387629]
Reward: 9  Episode Reward:  42
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6618.17484269 -9022.41491635 -7525.7277781    955.8581496 ]
------
Step:19, Action:North
State  288
Old Q Values:  [ 6618.17484269 -9022.41491635 -7525.7277781    955.8581496 ]
New Q values:  [ 3446.53609863 -9022.41491635 -7525.7277781    955.8581496 ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2097.66793883  2666.22053851 -6170.35693855 -1798.95296703]
------
Step:20, Action:South
State  216
Old Q Values:  [ 2097.66793883  2666.22053851 -6170.35693855 -1798.95296703]
New Q values:  [ 2097.66793883  2099.84904499 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3446.53609863 -9022.41491635 -7525.7277781    955.8581496 ]
------
Step:21, Action:North
State  288
Old Q Values:  [ 3446.53609863 -9022.41491635 -7525.7277781    955.8581496 ]
New Q values:  [ 1413.55219375 -9022.41491635 -7525.7277781    955.8581496 ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  118.459181   -2230.36055317  -180.6            3.52184257]
------
Step:22, Action:North
State  218
Old Q Values:  [297.42391963 111.91184742   0.         784.96322284]
New Q values:  [840.58382495 111.91184742   0.         784.96322284]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6        2407.38085698]
------
Step:23, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6        2407.38085698]
New Q values:  [-180.6        -880.4555246  -180.6        2564.41707429]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   5340.21577165    26.55294101]
------
Step:24, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   759.12131137     5.83657409]
New Q values:  [ -253.44886264 -1902.20915811  1072.37364683     5.83657409]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6        2564.41707429]
------
Step:25, Action:West
State  136
Old Q Values:  [ -724.71310357  7166.70050517 -6245.61866138 -1245.68587762]
New Q values:  [ -724.71310357  7166.70050517 -6245.61866138  -475.89700025]
Reward: -1  Episode Reward:  35
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7028.24709599    76.59116932]
------
Step:26, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   1667.50356144  -5682.33695745]
New Q values:  [-10156.11771313  -8069.05606225   1667.50356144  -2193.79122428]
Reward: -1  Episode Reward:  34
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6          265.81186233 -5851.25726525     0.        ]
------
Step:27, Action:South
State  105
Old Q Values:  [ -180.6          265.81186233 -5851.25726525     0.        ]
New Q values:  [ -180.6          369.32582787 -5851.25726525     0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[-6.00000000e-01  0.00000000e+00  8.78670276e+02 -1.78980000e+02]
------
Step:28, Action:East
State  185
Old Q Values:  [-6.00000000e-01  0.00000000e+00  8.78670276e+02 -1.78980000e+02]
New Q values:  [-6.00000000e-01  0.00000000e+00  8.49498952e+02 -1.78980000e+02]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.66210281e+03 -8.94356769e+03  2.40000000e-02]
------
Step:29, Action:South
State  203
Old Q Values:  [   3.60604218 1417.31347636   16.53929541    0.        ]
New Q values:  [  3.60604218 917.47486444  16.53929541   0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1170.49824634 1155.17262292]
------
Step:30, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 1170.49824634 1155.17262292]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  1.17049825e+03  6.11471961e+04]
Reward: 100009  Episode Reward:  100040
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  118.459181   -2230.36055317  -180.6            3.52184257]
------
Step:1, Action:North
State  210
Old Q Values:  [  118.459181   -2230.36055317  -180.6            3.52184257]
New Q values:  [  822.10879469 -2230.36055317  -180.6            3.52184257]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6        2564.41707429]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6        2564.41707429]
New Q values:  [-180.6        -880.4555246  -180.6        1352.87892377]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1072.37364683     5.83657409]
------
Step:3, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1072.37364683     5.83657409]
New Q values:  [ -253.44886264 -1902.20915811   834.21313586     5.83657409]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6        1352.87892377]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6        1352.87892377]
New Q values:  [-180.6        -880.4555246  -180.6         790.81551027]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   834.21313586     5.83657409]
------
Step:5, Action:East
State  121
Old Q Values:  [    0.             0.         -7028.24709599    76.59116932]
New Q values:  [    0.             0.         -6661.88868685    76.59116932]
Reward: -10001  Episode Reward:  -9985
xxxxx
x. gx
x.. x
x. .x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2544.01566877   -98.41387629]
------
Step:1, Action:East
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  1.17049825e+03  6.11471961e+04]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  8.97664957e+02  6.11471961e+04]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1413.55219375 -9022.41491635 -7525.7277781    955.8581496 ]
------
Step:2, Action:West
State  288
Old Q Values:  [ 1413.55219375 -9022.41491635 -7525.7277781    955.8581496 ]
New Q values:  [ 1413.55219375 -9022.41491635 -7525.7277781  18725.90209675]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  8.97664957e+02  6.11471961e+04]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2544.01566877   -98.41387629]
New Q values:  [-2527.46239811 -8521.23367799  2544.01566877   645.76152334]
Reward: 9  Episode Reward:  17
xxxxx
x.g.x
x. .x
xa  x
xxxxx
Step:4, Action:West
State  260
Old Q Values:  [ 1389.4182337  -8695.4397473    632.98185518 -7094.93143822]
New Q values:  [ 1389.4182337  -8695.4397473    632.98185518 -2601.74710518]
Reward: -301  Episode Reward:  -284
xxxxx
xg..x
x. .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1389.4182337  -8695.4397473    632.98185518 -2601.74710518]
------
Step:5, Action:North
State  260
Old Q Values:  [ 1389.4182337  -8695.4397473    632.98185518 -2601.74710518]
New Q values:  [-5030.55992858 -8695.4397473    632.98185518 -2601.74710518]
Reward: -9991  Episode Reward:  -10275
xxxxx
x...x
xg .x
x   x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2544.01566877   645.76152334]
------
Step:1, Action:East
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  8.97664957e+02  6.11471961e+04]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  5.98223661e+03  6.11471961e+04]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1413.55219375 -9022.41491635 -7525.7277781  18725.90209675]
------
Step:2, Action:West
State  288
Old Q Values:  [ 1413.55219375 -9022.41491635 -7525.7277781  18725.90209675]
New Q values:  [ 1413.55219375 -9022.41491635 -7525.7277781  25833.9196756 ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  5.98223661e+03  6.11471961e+04]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2544.01566877   645.76152334]
New Q values:  [-2527.46239811 -8521.23367799  2544.01566877   490.97237998]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x. .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 757.55923548  -40.34168621 -792.9733772   206.7345883 ]
------
Step:4, Action:North
State  261
Old Q Values:  [ 757.55923548  -40.34168621 -792.9733772   206.7345883 ]
New Q values:  [ 725.5680965   -40.34168621 -792.9733772   206.7345883 ]
Reward: 9  Episode Reward:  26
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 407.26550113  331.87249459 1390.48134102    0.        ]
------
Step:5, Action:East
State  181
Old Q Values:  [   34.62799388    10.93436256 -4706.35308576  -180.6       ]
New Q values:  [  34.62799388   10.93436256 -576.99988594 -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2203.87243301 -3909.58186816  4353.80449455]
------
Step:6, Action:West
State  192
Old Q Values:  [ 3.89777037e-01  1.24313534e+03  6.29090057e+03 -9.78728177e+01]
New Q values:  [ 3.89777037e-01  1.24313534e+03  6.29090057e+03 -2.93607289e+01]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  34.62799388   10.93436256 -576.99988594 -180.6       ]
------
Step:7, Action:North
State  181
Old Q Values:  [  34.62799388   10.93436256 -576.99988594 -180.6       ]
New Q values:  [  39.11314798   10.93436256 -576.99988594 -180.6       ]
Reward: 9  Episode Reward:  33
xxxxx
xag.x
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[ 0.         66.20650142  0.          0.        ]
------
Step:8, Action:South
State  101
Old Q Values:  [ 0.         66.20650142  0.          0.        ]
New Q values:  [ 0.         37.61654496  0.          0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  39.11314798   10.93436256 -576.99988594 -180.6       ]
------
Step:9, Action:North
State  181
Old Q Values:  [  39.11314798   10.93436256 -576.99988594 -180.6       ]
New Q values:  [  26.33022268   10.93436256 -576.99988594 -180.6       ]
Reward: -1  Episode Reward:  31
xxxxx
xag.x
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[ 0.         37.61654496  0.          0.        ]
------
Step:10, Action:South
State  100
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [  0.         407.67277794   0.           0.        ]
Reward: -1  Episode Reward:  30
xxxxx
xg..x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-9105.01212451   266.69721195  1360.9092598      0.        ]
------
Step:11, Action:East
State  181
Old Q Values:  [  26.33022268   10.93436256 -576.99988594 -180.6       ]
New Q values:  [  26.33022268   10.93436256   68.98829511 -180.6       ]
Reward: -1  Episode Reward:  29
xxxxx
x g.x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1001.29416495   174.55451539     0.        ]
------
Step:12, Action:South
State  196
Old Q Values:  [-2469.90645144  1001.29416495   174.55451539     0.        ]
New Q values:  [-2469.90645144   946.29702925   174.55451539     0.        ]
Reward: -1  Episode Reward:  28
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1821.26454422   35.60960053]
------
Step:13, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        1821.26454422   35.60960053]
New Q values:  [  16.82637525 -180.6        8478.08172037   35.60960053]
Reward: -1  Episode Reward:  27
xxxxx
x g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1413.55219375 -9022.41491635 -7525.7277781  25833.9196756 ]
------
Step:14, Action:West
State  288
Old Q Values:  [ 1413.55219375 -9022.41491635 -7525.7277781  25833.9196756 ]
New Q values:  [ 1413.55219375 -9022.41491635 -7525.7277781  12876.39238635]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        8478.08172037   35.60960053]
------
Step:15, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        8478.08172037   35.60960053]
New Q values:  [  16.82637525 -180.6        7253.55040405   35.60960053]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1413.55219375 -9022.41491635 -7525.7277781  12876.39238635]
------
Step:16, Action:North
State  288
Old Q Values:  [ 1413.55219375 -9022.41491635 -7525.7277781  12876.39238635]
New Q values:  [  817.45351591 -9022.41491635 -7525.7277781  12876.39238635]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  822.10879469 -2230.36055317  -180.6            3.52184257]
------
Step:17, Action:North
State  210
Old Q Values:  [  822.10879469 -2230.36055317  -180.6            3.52184257]
New Q values:  [ 1.82947340e+04 -2.23036055e+03 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  43
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 59868.30154239]
------
Step:18, Action:West
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 59868.30154239]
New Q values:  [ 29415.41153072   2346.20646678   -180.00807518 121084.90385162]
Reward: 100009  Episode Reward:  100052
xxxxx
x a x
x   x
x  gx
xxxxx
xxxxx
x...x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5030.55992858 -8695.4397473    632.98185518 -2601.74710518]
------
Step:1, Action:East
State  260
Old Q Values:  [-5030.55992858 -8695.4397473    632.98185518 -2601.74710518]
New Q values:  [-5030.55992858 -8695.4397473   2434.65786329 -2601.74710518]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        7253.55040405   35.60960053]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2544.01566877   490.97237998]
New Q values:  [-2527.46239811 -8521.23367799  4885.92398342   490.97237998]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  817.45351591 -9022.41491635 -7525.7277781  12876.39238635]
------
Step:3, Action:West
State  288
Old Q Values:  [  817.45351591 -9022.41491635 -7525.7277781  12876.39238635]
New Q values:  [  817.45351591 -9022.41491635 -7525.7277781   6615.73414957]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4885.92398342   490.97237998]
------
Step:4, Action:East
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  5.98223661e+03  6.11471961e+04]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  4.37701489e+03  6.11471961e+04]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  817.45351591 -9022.41491635 -7525.7277781   6615.73414957]
------
Step:5, Action:West
State  288
Old Q Values:  [  817.45351591 -9022.41491635 -7525.7277781   6615.73414957]
New Q values:  [  817.45351591 -9022.41491635 -7525.7277781  20989.85249673]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x ..x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  4.37701489e+03  6.11471961e+04]
------
Step:6, Action:West
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  4.37701489e+03  6.11471961e+04]
New Q values:  [   37.74111519  -168.92307549  4377.01488954 24675.94887816]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 725.5680965   -40.34168621 -792.9733772   206.7345883 ]
------
Step:7, Action:North
State  261
Old Q Values:  [ 725.5680965   -40.34168621 -792.9733772   206.7345883 ]
New Q values:  [ 310.32372713  -40.34168621 -792.9733772   206.7345883 ]
Reward: -1  Episode Reward:  13
xxxxx
x...x
xag.x
x   x
xxxxx
Step:8, Action:South
State  181
Old Q Values:  [  26.33022268   10.93436256   68.98829511 -180.6       ]
New Q values:  [  26.33022268   96.87086316   68.98829511 -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 310.32372713  -40.34168621 -792.9733772   206.7345883 ]
------
Step:9, Action:North
State  261
Old Q Values:  [ 310.32372713  -40.34168621 -792.9733772   206.7345883 ]
New Q values:  [ 152.5907498   -40.34168621 -792.9733772   206.7345883 ]
Reward: -1  Episode Reward:  11
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  26.33022268   96.87086316   68.98829511 -180.6       ]
------
Step:10, Action:South
State  181
Old Q Values:  [  26.33022268   96.87086316   68.98829511 -180.6       ]
New Q values:  [  26.33022268  100.16872176   68.98829511 -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x.g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 152.5907498   -40.34168621 -792.9733772   206.7345883 ]
------
Step:11, Action:West
State  261
Old Q Values:  [ 152.5907498   -40.34168621 -792.9733772   206.7345883 ]
New Q values:  [ 152.5907498   -40.34168621 -792.9733772   -35.88578819]
Reward: -301  Episode Reward:  -291
xxxxx
x..gx
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 152.5907498   -40.34168621 -792.9733772   -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [ 152.5907498   -40.34168621 -792.9733772   -35.88578819]
New Q values:  [  90.48691645  -40.34168621 -792.9733772   -35.88578819]
Reward: -1  Episode Reward:  -292
xxxxx
x.g.x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  26.33022268  100.16872176   68.98829511 -180.6       ]
------
Step:13, Action:South
State  181
Old Q Values:  [  26.33022268  100.16872176   68.98829511 -180.6       ]
New Q values:  [  26.33022268   66.61356364   68.98829511 -180.6       ]
Reward: -1  Episode Reward:  -293
xxxxx
x..gx
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  90.48691645  -40.34168621 -792.9733772   -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [  90.48691645  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [  56.29125511  -40.34168621 -792.9733772   -35.88578819]
Reward: -1  Episode Reward:  -294
xxxxx
x...x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  26.33022268   66.61356364   68.98829511 -180.6       ]
------
Step:15, Action:East
State  181
Old Q Values:  [  26.33022268   66.61356364   68.98829511 -180.6       ]
New Q values:  [   26.33022268    66.61356364 -4079.73450974  -180.6       ]
Reward: -9991  Episode Reward:  -10285
xxxxx
x...x
x g.x
x   x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4148.44098644 10016.3273647  -1855.11188891 -3385.12952694]
------
Step:1, Action:South
State  208
Old Q Values:  [ 4148.44098644 10016.3273647  -1855.11188891 -3385.12952694]
New Q values:  [ 4148.44098644 10308.8866949  -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  817.45351591 -9022.41491635 -7525.7277781  20989.85249673]
------
Step:2, Action:West
State  288
Old Q Values:  [  817.45351591 -9022.41491635 -7525.7277781  20989.85249673]
New Q values:  [  817.45351591 -9022.41491635 -7525.7277781   9867.11819372]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4885.92398342   490.97237998]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4885.92398342   490.97237998]
New Q values:  [-2527.46239811 -8521.23367799  4913.90505148   490.97237998]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  817.45351591 -9022.41491635 -7525.7277781   9867.11819372]
------
Step:4, Action:West
State  288
Old Q Values:  [  817.45351591 -9022.41491635 -7525.7277781   9867.11819372]
New Q values:  [  817.45351591 -9022.41491635 -7525.7277781   5420.41879293]
Reward: -1  Episode Reward:  16
xxxxx
xg..x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4913.90505148   490.97237998]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4913.90505148   490.97237998]
New Q values:  [-2527.46239811 -8521.23367799  3591.08765847   490.97237998]
Reward: -1  Episode Reward:  15
xxxxx
x...x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  817.45351591 -9022.41491635 -7525.7277781   5420.41879293]
------
Step:6, Action:West
State  288
Old Q Values:  [  817.45351591 -9022.41491635 -7525.7277781   5420.41879293]
New Q values:  [  817.45351591 -9022.41491635 -7525.7277781   3244.89381471]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3591.08765847   490.97237998]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3591.08765847   490.97237998]
New Q values:  [-2527.46239811 -8521.23367799  2409.3032078    490.97237998]
Reward: -1  Episode Reward:  13
xxxxx
x...x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  817.45351591 -9022.41491635 -7525.7277781   3244.89381471]
------
Step:8, Action:West
State  288
Old Q Values:  [  817.45351591 -9022.41491635 -7525.7277781   3244.89381471]
New Q values:  [  817.45351591 -9022.41491635 -7525.7277781   2020.14848823]
Reward: -1  Episode Reward:  12
xxxxx
xg..x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2409.3032078    490.97237998]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2409.3032078    490.97237998]
New Q values:  [-2527.46239811 -8521.23367799  1569.16582959   490.97237998]
Reward: -1  Episode Reward:  11
xxxxx
x.g.x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  817.45351591 -9022.41491635 -7525.7277781   2020.14848823]
------
Step:10, Action:West
State  288
Old Q Values:  [  817.45351591 -9022.41491635 -7525.7277781   2020.14848823]
New Q values:  [  817.45351591 -9022.41491635 -7525.7277781   1278.20914417]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1569.16582959   490.97237998]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1569.16582959   490.97237998]
New Q values:  [-2527.46239811 -8521.23367799  1010.52907509   490.97237998]
Reward: -1  Episode Reward:  9
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  817.45351591 -9022.41491635 -7525.7277781   1278.20914417]
------
Step:12, Action:North
State  288
Old Q Values:  [  817.45351591 -9022.41491635 -7525.7277781   1278.20914417]
New Q values:  [ 3419.04741483 -9022.41491635 -7525.7277781   1278.20914417]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4148.44098644 10308.8866949  -1855.11188891 -3385.12952694]
------
Step:13, Action:South
State  208
Old Q Values:  [ 4148.44098644 10308.8866949  -1855.11188891 -3385.12952694]
New Q values:  [ 4148.44098644  5148.66890241 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3419.04741483 -9022.41491635 -7525.7277781   1278.20914417]
------
Step:14, Action:North
State  288
Old Q Values:  [ 3419.04741483 -9022.41491635 -7525.7277781   1278.20914417]
New Q values:  [ 2911.61963666 -9022.41491635 -7525.7277781   1278.20914417]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4148.44098644  5148.66890241 -1855.11188891 -3385.12952694]
------
Step:15, Action:South
State  210
Old Q Values:  [ 1.82947340e+04 -2.23036055e+03 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 1.82947340e+04 -1.92583303e+01 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2911.61963666 -9022.41491635 -7525.7277781   1278.20914417]
------
Step:16, Action:North
State  288
Old Q Values:  [ 2911.61963666 -9022.41491635 -7525.7277781   1278.20914417]
New Q values:  [ 6652.46804884 -9022.41491635 -7525.7277781   1278.20914417]
Reward: -1  Episode Reward:  4
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.82947340e+04 -1.92583303e+01 -1.80600000e+02  3.52184257e+00]
------
Step:17, Action:North
State  210
Old Q Values:  [ 1.82947340e+04 -1.92583303e+01 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 4.36487647e+04 -1.92583303e+01 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  13
xxxxx
x..ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 121084.90385162]
------
Step:18, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         790.81551027]
New Q values:  [-180.6       -880.4555246 -180.6       1923.7909356]
Reward: 9  Episode Reward:  22
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   5340.21577165    26.55294101]
------
Step:19, Action:East
State  114
Old Q Values:  [  -180.6          4272.38349051  27948.95045534 123044.4172474 ]
New Q values:  [  -180.6          4272.38349051  47504.45133763 123044.4172474 ]
Reward: -1  Episode Reward:  21
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 121084.90385162]
------
Step:20, Action:West
State  138
Old Q Values:  [-180.6       -880.4555246 -180.6       1923.7909356]
New Q values:  [-180.6        -880.4555246  -180.6        2370.98110574]
Reward: -1  Episode Reward:  20
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   5340.21577165    26.55294101]
------
Step:21, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   5340.21577165    26.55294101]
New Q values:  [ -281.736      -9545.4473624   2846.78064038    26.55294101]
Reward: -1  Episode Reward:  19
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6        2370.98110574]
------
Step:22, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6        2370.98110574]
New Q values:  [-180.6        -880.4555246  -180.6        1801.82663441]
Reward: -1  Episode Reward:  18
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2846.78064038    26.55294101]
------
Step:23, Action:East
State  114
Old Q Values:  [  -180.6          4272.38349051  47504.45133763 123044.4172474 ]
New Q values:  [  -180.6          4272.38349051  55326.65169054 123044.4172474 ]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 121084.90385162]
------
Step:24, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6        1801.82663441]
New Q values:  [-180.6        -880.4555246  -180.6        1574.16484588]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2846.78064038    26.55294101]
------
Step:25, Action:East
State  114
Old Q Values:  [  -180.6          4272.38349051  55326.65169054 123044.4172474 ]
New Q values:  [  -180.6          4272.38349051  58455.5318317  123044.4172474 ]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 121084.90385162]
------
Step:26, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6        1574.16484588]
New Q values:  [-180.6        -880.4555246  -180.6        1483.10013047]
Reward: -1  Episode Reward:  14
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2846.78064038    26.55294101]
------
Step:27, Action:East
State  114
Old Q Values:  [  -180.6          4272.38349051  58455.5318317  123044.4172474 ]
New Q values:  [  -180.6          4272.38349051  59707.08388817 123044.4172474 ]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 121084.90385162]
------
Step:28, Action:West
State  136
Old Q Values:  [ -724.71310357  7166.70050517 -6245.61866138  -475.89700025]
New Q values:  [ -724.71310357  7166.70050517 -6245.61866138   309.29226833]
Reward: -1  Episode Reward:  12
xxxxx
xga x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225   1667.50356144  -2193.79122428]
------
Step:29, Action:East
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   1667.50356144  -2193.79122428]
New Q values:  [-10156.11771313  -8069.05606225   2816.41157613  -2193.79122428]
Reward: -1  Episode Reward:  11
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  7166.70050517 -6245.61866138   309.29226833]
------
Step:30, Action:South
State  136
Old Q Values:  [ -724.71310357  7166.70050517 -6245.61866138   309.29226833]
New Q values:  [ -724.71310357  4410.68087279 -6245.61866138   309.29226833]
Reward: -1  Episode Reward:  10
xxxxx
x. gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4148.44098644  5148.66890241 -1855.11188891 -3385.12952694]
------
Step:31, Action:South
State  208
Old Q Values:  [ 4148.44098644  5148.66890241 -1855.11188891 -3385.12952694]
New Q values:  [ 4148.44098644  4054.60797562 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  9
xxxxx
x.  x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6652.46804884 -9022.41491635 -7525.7277781   1278.20914417]
------
Step:32, Action:West
State  288
Old Q Values:  [ 6652.46804884 -9022.41491635 -7525.7277781   1278.20914417]
New Q values:  [ 6652.46804884 -9022.41491635 -7525.7277781   7913.46832111]
Reward: -1  Episode Reward:  8
xxxxx
x. gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4377.01488954 24675.94887816]
------
Step:33, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  4377.01488954 24675.94887816]
New Q values:  [   37.74111519  -168.92307549  4377.01488954 10555.50662511]
Reward: 9  Episode Reward:  17
xxxxx
x.  x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[2265.75691284 2256.66526474  425.90861234 1875.31501677]
------
Step:34, Action:North
State  257
Old Q Values:  [2265.75691284 2256.66526474  425.90861234 1875.31501677]
New Q values:  [5347.79251673 2256.66526474  425.90861234 1875.31501677]
Reward: 9  Episode Reward:  26
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.          4614.46100011 14786.96583865     0.        ]
------
Step:35, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 14786.96583865     0.        ]
New Q values:  [   0.         4614.46100011 6412.95064987    0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.62964724e+03  1.66254771e+03  1.20371620e+03]
------
Step:36, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  1.24313534e+03  6.29090057e+03 -2.93607289e+01]
New Q values:  [ 3.89777037e-01  1.24313534e+03  3.76029253e+03 -2.93607289e+01]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4148.44098644  4054.60797562 -1855.11188891 -3385.12952694]
------
Step:37, Action:North
State  210
Old Q Values:  [ 4.36487647e+04 -1.92583303e+01 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 5.37843771e+04 -1.92583303e+01 -1.80600000e+02  3.52184257e+00]
Reward: -1  Episode Reward:  23
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 121084.90385162]
------
Step:38, Action:West
State  130
Old Q Values:  [ 29415.41153072   2346.20646678   -180.00807518 121084.90385162]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 85346.68671487]
Reward: -1  Episode Reward:  22
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  59707.08388817 123044.4172474 ]
------
Step:39, Action:West
State  114
Old Q Values:  [  -180.6          4272.38349051  59707.08388817 123044.4172474 ]
New Q values:  [  -180.6          4272.38349051  59707.08388817 118727.87289289]
Reward: 100009  Episode Reward:  100031
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  56.29125511  -40.34168621 -792.9733772   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [  56.29125511  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [  47.90057114  -40.34168621 -792.9733772   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   26.33022268    66.61356364 -4079.73450974  -180.6       ]
------
Step:2, Action:South
State  181
Old Q Values:  [   26.33022268    66.61356364 -4079.73450974  -180.6       ]
New Q values:  [   26.33022268    40.4155968  -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x ..x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  47.90057114  -40.34168621 -792.9733772   -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [  47.90057114  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [  30.68490749  -40.34168621 -792.9733772   -35.88578819]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   26.33022268    40.4155968  -4079.73450974  -180.6       ]
------
Step:4, Action:South
State  181
Old Q Values:  [   26.33022268    40.4155968  -4079.73450974  -180.6       ]
New Q values:  [   26.33022268    24.77171097 -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
x..gx
x ..x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  30.68490749  -40.34168621 -792.9733772   -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [  30.68490749  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [  19.5730298   -40.34168621 -792.9733772   -35.88578819]
Reward: -1  Episode Reward:  5
xxxxx
x.g.x
xa..x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   26.33022268    24.77171097 -4079.73450974  -180.6       ]
------
Step:6, Action:North
State  181
Old Q Values:  [   26.33022268    24.77171097 -4079.73450974  -180.6       ]
New Q values:  [   32.05617216    24.77171097 -4079.73450974  -180.6       ]
Reward: 9  Episode Reward:  14
xxxxx
xa.gx
x ..x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   53.74694364 -171.53262854 -180.6       ]
------
Step:7, Action:South
State  109
Old Q Values:  [-241.10880094   53.74694364 -171.53262854 -180.6       ]
New Q values:  [-241.10880094   30.51562911 -171.53262854 -180.6       ]
Reward: -1  Episode Reward:  13
xxxxx
x g.x
xa..x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   32.05617216    24.77171097 -4079.73450974  -180.6       ]
------
Step:8, Action:North
State  181
Old Q Values:  [   32.05617216    24.77171097 -4079.73450974  -180.6       ]
New Q values:  [   21.3771576     24.77171097 -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
xa.gx
x ..x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   30.51562911 -171.53262854 -180.6       ]
------
Step:9, Action:South
State  103
Old Q Values:  [-180.6         642.18738192    0.            0.        ]
New Q values:  [-180.6         263.70646606    0.            0.        ]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   21.3771576     24.77171097 -4079.73450974  -180.6       ]
------
Step:10, Action:South
State  181
Old Q Values:  [   21.3771576     24.77171097 -4079.73450974  -180.6       ]
New Q values:  [   21.3771576     15.18059333 -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x .gx
x ..x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  19.5730298   -40.34168621 -792.9733772   -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [  19.5730298   -40.34168621 -792.9733772   -35.88578819]
New Q values:  [  13.6423592   -40.34168621 -792.9733772   -35.88578819]
Reward: -1  Episode Reward:  9
xxxxx
x g.x
xa..x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   21.3771576     15.18059333 -4079.73450974  -180.6       ]
------
Step:12, Action:North
State  180
Old Q Values:  [-9105.01212451   266.69721195  1360.9092598      0.        ]
New Q values:  [-9403.6093969    266.69721195  1360.9092598      0.        ]
Reward: -10001  Episode Reward:  -9992
xxxxx
xg..x
x ..x
x . x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4148.44098644  4054.60797562 -1855.11188891 -3385.12952694]
------
Step:1, Action:North
State  208
Old Q Values:  [ 4148.44098644  4054.60797562 -1855.11188891 -3385.12952694]
New Q values:  [-3012.01934359  4054.60797562 -1855.11188891 -3385.12952694]
Reward: -9991  Episode Reward:  -9991
xxxxx
x. gx
x.. x
x...x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   21.3771576     15.18059333 -4079.73450974  -180.6       ]
------
Step:1, Action:North
State  183
Old Q Values:  [ 407.26550113  331.87249459 1390.48134102    0.        ]
New Q values:  [ 247.41814027  331.87249459 1390.48134102    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         263.70646606    0.            0.        ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869   28.34575394 2557.96611117 -272.09726687]
New Q values:  [-177.44732869   17.15144886 2557.96611117 -272.09726687]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   21.3771576     15.18059333 -4079.73450974  -180.6       ]
------
Step:3, Action:North
State  181
Old Q Values:  [   21.3771576     15.18059333 -4079.73450974  -180.6       ]
New Q values:  [   17.10555177    15.18059333 -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
xa.gx
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   30.51562911 -171.53262854 -180.6       ]
------
Step:4, Action:South
State  109
Old Q Values:  [-241.10880094   30.51562911 -171.53262854 -180.6       ]
New Q values:  [-241.10880094   16.73791717 -171.53262854 -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
x g.x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   17.10555177    15.18059333 -4079.73450974  -180.6       ]
------
Step:5, Action:North
State  181
Old Q Values:  [   17.10555177    15.18059333 -4079.73450974  -180.6       ]
New Q values:  [   85.35416053    15.18059333 -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  5
xxxxx
xa..x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         263.70646606    0.            0.        ]
------
Step:6, Action:South
State  109
Old Q Values:  [-241.10880094   16.73791717 -171.53262854 -180.6       ]
New Q values:  [-241.10880094   31.70141503 -171.53262854 -180.6       ]
Reward: -1  Episode Reward:  4
xxxxx
x g.x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   85.35416053    15.18059333 -4079.73450974  -180.6       ]
------
Step:7, Action:North
State  180
Old Q Values:  [-9403.6093969    266.69721195  1360.9092598      0.        ]
New Q values:  [-9523.04830585   266.69721195  1360.9092598      0.        ]
Reward: -10001  Episode Reward:  -9997
xxxxx
xg..x
x . x
x...x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1010.52907509   490.97237998]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1010.52907509   490.97237998]
New Q values:  [-2527.46239811 -8521.23367799  2783.65212637   490.97237998]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6652.46804884 -9022.41491635 -7525.7277781   7913.46832111]
------
Step:2, Action:North
State  288
Old Q Values:  [ 6652.46804884 -9022.41491635 -7525.7277781   7913.46832111]
New Q values:  [18801.70033591 -9022.41491635 -7525.7277781   7913.46832111]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5.37843771e+04 -1.92583303e+01 -1.80600000e+02  3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [ 5.37843771e+04 -1.92583303e+01 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 4.71231568e+04 -1.92583303e+01 -1.80600000e+02  3.52184257e+00]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 85346.68671487]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6        1483.10013047]
New Q values:  [-180.6        -880.4555246  -180.6         848.90399295]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.. x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   834.21313586     5.83657409]
------
Step:5, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   834.21313586     5.83657409]
New Q values:  [ -253.44886264 -1902.20915811   587.75645223     5.83657409]
Reward: -1  Episode Reward:  35
xxxxx
x. ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         848.90399295]
------
Step:6, Action:West
State  136
Old Q Values:  [ -724.71310357  4410.68087279 -6245.61866138   309.29226833]
New Q values:  [ -724.71310357  4410.68087279 -6245.61866138   146.09425813]
Reward: -1  Episode Reward:  34
xxxxx
x.agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -6661.88868685    76.59116932]
------
Step:7, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   587.75645223     5.83657409]
New Q values:  [ -253.44886264 -1902.20915811   587.75645223    11.11895536]
Reward: 9  Episode Reward:  43
xxxxx
xa  x
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573    7.29875984 -252.78192178]
------
Step:8, Action:South
State  111
Old Q Values:  [-177.44732869   17.15144886 2557.96611117 -272.09726687]
New Q values:  [-177.44732869  429.40498185 2557.96611117 -272.09726687]
Reward: 9  Episode Reward:  52
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 247.41814027  331.87249459 1390.48134102    0.        ]
------
Step:9, Action:East
State  177
Old Q Values:  [17045.79524512  4025.17604709 62545.46914468     0.        ]
New Q values:  [17045.79524512  4025.17604709 86329.72900624     0.        ]
Reward: 100009  Episode Reward:  100061
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  4054.60797562 -1855.11188891 -3385.12952694]
------
Step:1, Action:South
State  208
Old Q Values:  [-3012.01934359  4054.60797562 -1855.11188891 -3385.12952694]
New Q values:  [-3012.01934359  7267.75329102 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18801.70033591 -9022.41491635 -7525.7277781   7913.46832111]
------
Step:2, Action:North
State  288
Old Q Values:  [18801.70033591 -9022.41491635 -7525.7277781   7913.46832111]
New Q values:  [ 9700.40612167 -9022.41491635 -7525.7277781   7913.46832111]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x.gax
x.. x
xxxxx
Step:3, Action:East
State  210
Old Q Values:  [ 4.71231568e+04 -1.92583303e+01 -1.80600000e+02  3.52184257e+00]
New Q values:  [ 4.71231568e+04 -1.92583303e+01  1.38841071e+04  3.52184257e+00]
Reward: -301  Episode Reward:  -293
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4.71231568e+04 -1.92583303e+01  1.38841071e+04  3.52184257e+00]
------
Step:4, Action:North
State  210
Old Q Values:  [ 4.71231568e+04 -1.92583303e+01  1.38841071e+04  3.52184257e+00]
New Q values:  [ 1.91033339e+04 -1.92583303e+01  1.38841071e+04  3.52184257e+00]
Reward: -1  Episode Reward:  -294
xxxxx
x..ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         848.90399295]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         848.90399295]
New Q values:  [-180.6        -880.4555246  -180.6        1198.99578929]
Reward: 9  Episode Reward:  -285
xxxxx
x.a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2846.78064038    26.55294101]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   2846.78064038    26.55294101]
New Q values:  [ -281.736      -9545.4473624   1497.81099294    26.55294101]
Reward: -1  Episode Reward:  -286
xxxxx
x. ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6        1198.99578929]
------
Step:7, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6        1198.99578929]
New Q values:  [-180.6       -880.4555246 -180.6        928.3416136]
Reward: -1  Episode Reward:  -287
xxxxx
x.a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   1497.81099294    26.55294101]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   1497.81099294    26.55294101]
New Q values:  [ -281.736      -9545.4473624    877.02688126    26.55294101]
Reward: -1  Episode Reward:  -288
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6        928.3416136]
------
Step:9, Action:West
State  138
Old Q Values:  [-180.6       -880.4555246 -180.6        928.3416136]
New Q values:  [-180.6        -880.4555246  -180.6         633.84470982]
Reward: -1  Episode Reward:  -289
xxxxx
x.a x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    877.02688126    26.55294101]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    877.02688126    26.55294101]
New Q values:  [ -281.736      -9545.4473624    540.36416545    26.55294101]
Reward: -1  Episode Reward:  -290
xxxxx
x. ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         633.84470982]
------
Step:11, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         633.84470982]
New Q values:  [-180.6        -880.4555246  -180.6         415.04713356]
Reward: -1  Episode Reward:  -291
xxxxx
x.a x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    540.36416545    26.55294101]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    540.36416545    26.55294101]
New Q values:  [ -281.736      -9545.4473624    340.05980625    26.55294101]
Reward: -1  Episode Reward:  -292
xxxxx
x. ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         415.04713356]
------
Step:13, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         415.04713356]
New Q values:  [-180.6       -880.4555246 -180.6        267.4367953]
Reward: -1  Episode Reward:  -293
xxxxx
x.a x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    340.05980625    26.55294101]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    340.05980625    26.55294101]
New Q values:  [ -281.736      -9545.4473624    215.65496109    26.55294101]
Reward: -1  Episode Reward:  -294
xxxxx
x. ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6        267.4367953]
------
Step:15, Action:West
State  138
Old Q Values:  [-180.6       -880.4555246 -180.6        267.4367953]
New Q values:  [-180.6        -880.4555246  -180.6         171.07120645]
Reward: -1  Episode Reward:  -295
xxxxx
x.a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    215.65496109    26.55294101]
------
Step:16, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    215.65496109    26.55294101]
New Q values:  [ -281.736      -9545.4473624    136.98334637    26.55294101]
Reward: -1  Episode Reward:  -296
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         171.07120645]
------
Step:17, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         171.07120645]
New Q values:  [-180.6        -880.4555246  -180.6         108.92348649]
Reward: -1  Episode Reward:  -297
xxxxx
x.a x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    136.98334637    26.55294101]
------
Step:18, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    136.98334637    26.55294101]
New Q values:  [ -281.736      -9545.4473624     86.87038449    26.55294101]
Reward: -1  Episode Reward:  -298
xxxxx
x. ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         108.92348649]
------
Step:19, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         108.92348649]
New Q values:  [-180.6        -880.4555246  -180.6          69.03050994]
Reward: -1  Episode Reward:  -299
xxxxx
x.a x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     86.87038449    26.55294101]
------
Step:20, Action:East
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   2816.41157613  -2193.79122428]
New Q values:  [-10156.11771313  -8069.05606225   2449.16889229  -2193.79122428]
Reward: -1  Episode Reward:  -300
xxxxx
xg ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  4410.68087279 -6245.61866138   146.09425813]
------
Step:21, Action:South
State  136
Old Q Values:  [ -724.71310357  4410.68087279 -6245.61866138   146.09425813]
New Q values:  [ -724.71310357  3943.99833642 -6245.61866138   146.09425813]
Reward: -1  Episode Reward:  -301
xxxxx
x.g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  7267.75329102 -1855.11188891 -3385.12952694]
------
Step:22, Action:South
State  208
Old Q Values:  [-3012.01934359  7267.75329102 -1855.11188891 -3385.12952694]
New Q values:  [-3012.01934359  5816.62315291 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  -302
xxxxx
xg  x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9700.40612167 -9022.41491635 -7525.7277781   7913.46832111]
------
Step:23, Action:North
State  288
Old Q Values:  [ 9700.40612167 -9022.41491635 -7525.7277781   7913.46832111]
New Q values:  [ 5624.54939454 -9022.41491635 -7525.7277781   7913.46832111]
Reward: -1  Episode Reward:  -303
xxxxx
x.g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  5816.62315291 -1855.11188891 -3385.12952694]
------
Step:24, Action:South
State  208
Old Q Values:  [-3012.01934359  5816.62315291 -1855.11188891 -3385.12952694]
New Q values:  [-3012.01934359  4700.0897575  -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  -304
xxxxx
x. gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5624.54939454 -9022.41491635 -7525.7277781   7913.46832111]
------
Step:25, Action:West
State  288
Old Q Values:  [ 5624.54939454 -9022.41491635 -7525.7277781   7913.46832111]
New Q values:  [ 5624.54939454 -9022.41491635 -7525.7277781   6337.43931598]
Reward: 9  Episode Reward:  -295
xxxxx
x.  x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4377.01488954 10555.50662511]
------
Step:26, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  4377.01488954 10555.50662511]
New Q values:  [  37.74111519 -168.92307549 4377.01488954 5831.94040506]
Reward: 9  Episode Reward:  -286
xxxxx
x. gx
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[5347.79251673 2256.66526474  425.90861234 1875.31501677]
------
Step:27, Action:North
State  257
Old Q Values:  [5347.79251673 2256.66526474  425.90861234 1875.31501677]
New Q values:  [28043.43570856  2256.66526474   425.90861234  1875.31501677]
Reward: 9  Episode Reward:  -277
xxxxx
x.  x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[17045.79524512  4025.17604709 86329.72900624     0.        ]
------
Step:28, Action:East
State  177
Old Q Values:  [17045.79524512  4025.17604709 86329.72900624     0.        ]
New Q values:  [17045.79524512  4025.17604709 35843.43295086     0.        ]
Reward: 9  Episode Reward:  -268
xxxxx
x. gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2203.87243301 -3909.58186816  4353.80449455]
------
Step:29, Action:West
State  192
Old Q Values:  [ 3.89777037e-01  1.24313534e+03  3.76029253e+03 -2.93607289e+01]
New Q values:  [3.89777037e-01 1.24313534e+03 3.76029253e+03 1.07406856e+04]
Reward: -1  Episode Reward:  -269
xxxxx
x.g x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[17045.79524512  4025.17604709 35843.43295086     0.        ]
------
Step:30, Action:East
State  176
Old Q Values:  [    0.          1327.79507613 62160.50614241     0.        ]
New Q values:  [    0.          1327.79507613 28085.80813507     0.        ]
Reward: -1  Episode Reward:  -270
xxxxx
xg  x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.24313534e+03 3.76029253e+03 1.07406856e+04]
------
Step:31, Action:West
State  192
Old Q Values:  [3.89777037e-01 1.24313534e+03 3.76029253e+03 1.07406856e+04]
New Q values:  [3.89777037e-01 1.24313534e+03 3.76029253e+03 1.50487041e+04]
Reward: -1  Episode Reward:  -271
xxxxx
x.g x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[17045.79524512  4025.17604709 35843.43295086     0.        ]
------
Step:32, Action:East
State  177
Old Q Values:  [17045.79524512  4025.17604709 35843.43295086     0.        ]
New Q values:  [17045.79524512  4025.17604709 15642.91452871     0.        ]
Reward: -1  Episode Reward:  -272
xxxxx
x. gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2203.87243301 -3909.58186816  4353.80449455]
------
Step:33, Action:West
State  193
Old Q Values:  [-5922.26708831  2203.87243301 -3909.58186816  4353.80449455]
New Q values:  [-5922.26708831  2203.87243301 -3909.58186816  6854.66037136]
Reward: -1  Episode Reward:  -273
xxxxx
x.  x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[17045.79524512  4025.17604709 15642.91452871     0.        ]
------
Step:34, Action:North
State  177
Old Q Values:  [17045.79524512  4025.17604709 15642.91452871     0.        ]
New Q values:  [66823.71809805  4025.17604709 15642.91452871     0.        ]
Reward: 100009  Episode Reward:  99736
xxxxx
xa gx
x   x
x   x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -6661.88868685    76.59116932]
------
Step:1, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   587.75645223    11.11895536]
New Q values:  [ -253.44886264 -1902.20915811   587.75645223   777.23741549]
Reward: 9  Episode Reward:  9
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  429.40498185 2557.96611117 -272.09726687]
------
Step:2, Action:East
State  109
Old Q Values:  [-241.10880094   31.70141503 -171.53262854 -180.6       ]
New Q values:  [-241.10880094   31.70141503  -46.23570062 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -6661.88868685    76.59116932]
------
Step:3, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   587.75645223   777.23741549]
New Q values:  [ -253.44886264 -1902.20915811   587.75645223  1077.68479955]
Reward: -1  Episode Reward:  7
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  429.40498185 2557.96611117 -272.09726687]
------
Step:4, Action:East
State  111
Old Q Values:  [-177.44732869  429.40498185 2557.96611117 -272.09726687]
New Q values:  [-177.44732869  429.40498185 1345.89188433 -272.09726687]
Reward: -1  Episode Reward:  6
xxxxx
x a x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   587.75645223  1077.68479955]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   587.75645223  1077.68479955]
New Q values:  [ -253.44886264 -1902.20915811   587.75645223   834.24148512]
Reward: -1  Episode Reward:  5
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  429.40498185 1345.89188433 -272.09726687]
------
Step:6, Action:East
State  111
Old Q Values:  [-177.44732869  429.40498185 1345.89188433 -272.09726687]
New Q values:  [-177.44732869  429.40498185  563.81786908 -272.09726687]
Reward: -1  Episode Reward:  4
xxxxx
x a x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     86.87038449    26.55294101]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     86.87038449    26.55294101]
New Q values:  [ -281.736      -9545.4473624     54.85730678    26.55294101]
Reward: -1  Episode Reward:  3
xxxxx
x  ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          69.03050994]
------
Step:8, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          69.03050994]
New Q values:  [-180.6        -880.4555246  -180.6          43.46939601]
Reward: -1  Episode Reward:  2
xxxxx
x a x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     54.85730678    26.55294101]
------
Step:9, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   587.75645223   834.24148512]
New Q values:  [ -253.44886264 -1902.20915811   247.5433997    834.24148512]
Reward: -1  Episode Reward:  1
xxxxx
x  ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          43.46939601]
------
Step:10, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          43.46939601]
New Q values:  [-180.6        -880.4555246  -180.6          33.24495044]
Reward: -1  Episode Reward:  0
xxxxx
x a x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     54.85730678    26.55294101]
------
Step:11, Action:East
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   2449.16889229  -2193.79122428]
New Q values:  [-10156.11771313  -8069.05606225   2162.26705784  -2193.79122428]
Reward: -1  Episode Reward:  -1
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  3943.99833642 -6245.61866138   146.09425813]
------
Step:12, Action:South
State  136
Old Q Values:  [ -724.71310357  3943.99833642 -6245.61866138   146.09425813]
New Q values:  [ -724.71310357  2212.95404807 -6245.61866138   146.09425813]
Reward: 9  Episode Reward:  8
xxxxx
x  gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2097.66793883  2099.84904499 -6170.35693855 -1798.95296703]
------
Step:13, Action:South
State  208
Old Q Values:  [-3012.01934359  4700.0897575  -1855.11188891 -3385.12952694]
New Q values:  [-3012.01934359  3786.66769779 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  17
xxxxx
x   x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5624.54939454 -9022.41491635 -7525.7277781   6337.43931598]
------
Step:14, Action:West
State  288
Old Q Values:  [ 5624.54939454 -9022.41491635 -7525.7277781   6337.43931598]
New Q values:  [ 5624.54939454 -9022.41491635 -7525.7277781   4289.95784791]
Reward: 9  Episode Reward:  26
xxxxx
x   x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4377.01488954 5831.94040506]
------
Step:15, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2783.65212637   490.97237998]
New Q values:  [-2527.46239811 -8521.23367799  2783.65212637  8614.81966456]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[28043.43570856  2256.66526474   425.90861234  1875.31501677]
------
Step:16, Action:North
State  261
Old Q Values:  [  13.6423592   -40.34168621 -792.9733772   -35.88578819]
New Q values:  [ 428.00134599  -40.34168621 -792.9733772   -35.88578819]
Reward: 9  Episode Reward:  44
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 247.41814027  331.87249459 1390.48134102    0.        ]
------
Step:17, Action:East
State  179
Old Q Values:  [   0.         4614.46100011 6412.95064987    0.        ]
New Q values:  [    0.          4614.46100011 63069.34457436     0.        ]
Reward: 100009  Episode Reward:  100053
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4377.01488954 5831.94040506]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2783.65212637  8614.81966456]
New Q values:  [-2527.46239811 -8521.23367799  2783.65212637  3579.72826962]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 428.00134599  -40.34168621 -792.9733772   -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [ 428.00134599  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [ 202.20678655  -40.34168621 -792.9733772   -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   85.35416053    15.18059333 -4079.73450974  -180.6       ]
------
Step:3, Action:North
State  180
Old Q Values:  [-9523.04830585   266.69721195  1360.9092598      0.        ]
New Q values:  [-3799.14231332   266.69721195  1360.9092598      0.        ]
Reward: 9  Episode Reward:  27
xxxxx
xa. x
xg..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921    15.59003007  -180.6       ]
------
Step:4, Action:East
State  110
Old Q Values:  [ -180.6        -1554.81716921    15.59003007  -180.6       ]
New Q values:  [ -180.6        -1554.81716921  1265.57069661  -180.6       ]
Reward: 9  Episode Reward:  36
xxxxx
x a x
x ..x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.         2047.43645092 4179.78228195  204.22976196]
------
Step:5, Action:East
State  126
Old Q Values:  [   0.         2047.43645092 4179.78228195  204.22976196]
New Q values:  [   0.         2047.43645092 1681.28639791  204.22976196]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
x ..x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          33.24495044]
------
Step:6, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          33.24495044]
New Q values:  [-180.6        -880.4555246  -180.6         626.92891545]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x ..x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.         2047.43645092 1681.28639791  204.22976196]
------
Step:7, Action:South
State  126
Old Q Values:  [   0.         2047.43645092 1681.28639791  204.22976196]
New Q values:  [   0.          943.00468559 1681.28639791  204.22976196]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01  3.95433684e+02  5.59303454e+01  0.00000000e+00]
------
Step:8, Action:South
State  196
Old Q Values:  [-2469.90645144   946.29702925   174.55451539     0.        ]
New Q values:  [-2469.90645144  2553.98393291   174.55451539     0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        7253.55040405   35.60960053]
------
Step:9, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        7253.55040405   35.60960053]
New Q values:  [  16.82637525 -180.6        4594.18497998   35.60960053]
Reward: 9  Episode Reward:  51
xxxxx
xg  x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5624.54939454 -9022.41491635 -7525.7277781   4289.95784791]
------
Step:10, Action:North
State  288
Old Q Values:  [ 5624.54939454 -9022.41491635 -7525.7277781   4289.95784791]
New Q values:  [63391.22006715 -9022.41491635 -7525.7277781   4289.95784791]
Reward: 100009  Episode Reward:  100060
xxxxx
x   x
xg ax
x   x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     54.85730678    26.55294101]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     54.85730678    26.55294101]
New Q values:  [ -281.736      -9545.4473624    215.42159735    26.55294101]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         626.92891545]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         626.92891545]
New Q values:  [-180.6        -880.4555246  -180.6         314.79804538]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    215.42159735    26.55294101]
------
Step:3, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   247.5433997    834.24148512]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349   834.24148512]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         314.79804538]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         314.79804538]
New Q values:  [-180.6        -880.4555246  -180.6         189.94569736]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    215.42159735    26.55294101]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    215.42159735    26.55294101]
New Q values:  [ -281.736      -9545.4473624    142.55234815    26.55294101]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         189.94569736]
------
Step:6, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         189.94569736]
New Q values:  [-180.6        -880.4555246  -180.6         118.14398339]
Reward: -1  Episode Reward:  4
xxxxx
x.a x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    142.55234815    26.55294101]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    142.55234815    26.55294101]
New Q values:  [ -281.736      -9545.4473624     91.86413427    26.55294101]
Reward: -1  Episode Reward:  3
xxxxx
x. ax
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         118.14398339]
------
Step:8, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         118.14398339]
New Q values:  [-180.6        -880.4555246  -180.6         296.93003889]
Reward: -1  Episode Reward:  2
xxxxx
x.a x
x...x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   192.85677349   834.24148512]
------
Step:9, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349   834.24148512]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349   508.24195477]
Reward: 9  Episode Reward:  11
xxxxx
xa  x
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  429.40498185  563.81786908 -272.09726687]
------
Step:10, Action:East
State  109
Old Q Values:  [-241.10880094   31.70141503  -46.23570062 -180.6       ]
New Q values:  [-241.10880094   31.70141503    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x agx
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -6661.88868685    76.59116932]
------
Step:11, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   2162.26705784  -2193.79122428]
New Q values:  [-10156.11771313  -8069.05606225   2162.26705784   -868.6060652 ]
Reward: -1  Episode Reward:  9
xxxxx
xag x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   31.70141503    3.88307055 -180.6       ]
------
Step:12, Action:South
State  108
Old Q Values:  [-8463.16477134   796.65150969   438.8916002      0.        ]
New Q values:  [-8463.16477134   732.33338182   438.8916002      0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xg  x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3799.14231332   266.69721195  1360.9092598      0.        ]
------
Step:13, Action:East
State  189
Old Q Values:  [   9.84673294  322.31251728  379.97224941 -244.98066897]
New Q values:  [   9.84673294  322.31251728  923.58407964 -244.98066897]
Reward: 9  Episode Reward:  27
xxxxx
x g x
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2553.98393291   174.55451539     0.        ]
------
Step:14, Action:South
State  196
Old Q Values:  [-2469.90645144  2553.98393291   174.55451539     0.        ]
New Q values:  [-2469.90645144  2405.24906716   174.55451539     0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        4594.18497998   35.60960053]
------
Step:15, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        4594.18497998   35.60960053]
New Q values:  [ 1.68263752e+01 -1.80600000e+02  2.08604400e+04  3.56096005e+01]
Reward: 9  Episode Reward:  45
xxxxx
x g x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[63391.22006715 -9022.41491635 -7525.7277781   4289.95784791]
------
Step:16, Action:North
State  288
Old Q Values:  [63391.22006715 -9022.41491635 -7525.7277781   4289.95784791]
New Q values:  [86497.8883362  -9022.41491635 -7525.7277781   4289.95784791]
Reward: 100009  Episode Reward:  100054
xxxxx
x   x
x gax
x   x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 247.41814027  331.87249459 1390.48134102    0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [ 247.41814027  331.87249459 1390.48134102    0.        ]
New Q values:  [ 247.41814027  331.87249459 5039.77271259    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 14927.26725394  6267.88141429  2546.60363946]
------
Step:2, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -7.62964724e+03  1.66254771e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.97854041e+03  1.66254771e+03  1.20371620e+03]
Reward: -10001  Episode Reward:  -9992
xxxxx
x...x
x  .x
x.g.x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         296.93003889]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         296.93003889]
New Q values:  [-180.6        -880.4555246  -180.6         276.64460199]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   192.85677349   508.24195477]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     91.86413427    26.55294101]
New Q values:  [ -281.736      -9545.4473624     91.86413427    19.40550212]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573    7.29875984 -252.78192178]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869  429.40498185  563.81786908 -272.09726687]
New Q values:  [-177.44732869  454.23721663  563.81786908 -272.09726687]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  322.31251728  923.58407964 -244.98066897]
------
Step:4, Action:South
State  183
Old Q Values:  [ 247.41814027  331.87249459 5039.77271259    0.        ]
New Q values:  [ 247.41814027  198.8110338  5039.77271259    0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x . x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 202.20678655  -40.34168621 -792.9733772   -35.88578819]
------
Step:5, Action:North
State  260
Old Q Values:  [-5030.55992858 -8695.4397473   2434.65786329 -2601.74710518]
New Q values:  [-2003.71620792 -8695.4397473   2434.65786329 -2601.74710518]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa. x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -5.70379540e+03  3.03592117e+01  0.00000000e+00]
------
Step:6, Action:East
State  191
Old Q Values:  [  3.06655861 508.69888654  46.04536991   0.        ]
New Q values:  [  3.06655861 508.69888654  64.46351788   0.        ]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         135.48456638   6.60224881   0.        ]
------
Step:7, Action:East
State  203
Old Q Values:  [  3.60604218 917.47486444  16.53929541   0.        ]
New Q values:  [  3.60604218 917.47486444 258.19086565   0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[840.58382495 111.91184742   0.         784.96322284]
------
Step:8, Action:North
State  218
Old Q Values:  [840.58382495 111.91184742   0.         784.96322284]
New Q values:  [418.62691058 111.91184742   0.         784.96322284]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         276.64460199]
------
Step:9, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         276.64460199]
New Q values:  [-180.6        -880.4555246  -180.6         137.61708108]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     91.86413427    19.40550212]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     91.86413427    19.40550212]
New Q values:  [ -281.736      -9545.4473624     77.43077803    19.40550212]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         137.61708108]
------
Step:11, Action:West
State  136
Old Q Values:  [ -724.71310357  2212.95404807 -6245.61866138   146.09425813]
New Q values:  [ -724.71310357  2212.95404807 -6245.61866138   706.5178206 ]
Reward: -1  Episode Reward:  39
xxxxx
xga x
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225   2162.26705784   -868.6060652 ]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     77.43077803    19.40550212]
New Q values:  [ -281.736      -9545.4473624     71.65743554    19.40550212]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         137.61708108]
------
Step:13, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         137.61708108]
New Q values:  [-180.6        -880.4555246  -180.6          75.94406309]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     71.65743554    19.40550212]
------
Step:14, Action:East
State  126
Old Q Values:  [   0.          943.00468559 1681.28639791  204.22976196]
New Q values:  [  0.         943.00468559 694.69777809 204.22976196]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          75.94406309]
------
Step:15, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          75.94406309]
New Q values:  [-180.6        -880.4555246  -180.6         182.25021167]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   192.85677349   508.24195477]
------
Step:16, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349   508.24195477]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349   371.84214263]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x  gx
x ..x
xxxxx
Step:17, Action:West
State  111
Old Q Values:  [-177.44732869  454.23721663  563.81786908 -272.09726687]
New Q values:  [-177.44732869  454.23721663  563.81786908 -120.29354603]
Reward: -301  Episode Reward:  -267
xxxxx
xa  x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  454.23721663  563.81786908 -120.29354603]
------
Step:18, Action:East
State  110
Old Q Values:  [ -180.6        -1554.81716921  1265.57069661  -180.6       ]
New Q values:  [ -180.6        -1554.81716921   527.12550931  -180.6       ]
Reward: -1  Episode Reward:  -268
xxxxx
x a x
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     71.65743554    19.40550212]
------
Step:19, Action:East
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   2162.26705784   -868.6060652 ]
New Q values:  [-10156.11771313  -8069.05606225   1528.19303756   -868.6060652 ]
Reward: -1  Episode Reward:  -269
xxxxx
xg ax
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  2212.95404807 -6245.61866138   706.5178206 ]
------
Step:20, Action:South
State  136
Old Q Values:  [ -724.71310357  2212.95404807 -6245.61866138   706.5178206 ]
New Q values:  [ -724.71310357  1514.53633272 -6245.61866138   706.5178206 ]
Reward: -1  Episode Reward:  -270
xxxxx
x g x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2097.66793883  2099.84904499 -6170.35693855 -1798.95296703]
------
Step:21, Action:South
State  216
Old Q Values:  [ 2097.66793883  2099.84904499 -6170.35693855 -1798.95296703]
New Q values:  [ 2097.66793883 26794.70611886 -6170.35693855 -1798.95296703]
Reward: 9  Episode Reward:  -261
xxxxx
xg  x
x   x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[86497.8883362  -9022.41491635 -7525.7277781   4289.95784791]
------
Step:22, Action:North
State  288
Old Q Values:  [86497.8883362  -9022.41491635 -7525.7277781   4289.95784791]
New Q values:  [42636.96717014 -9022.41491635 -7525.7277781   4289.95784791]
Reward: -1  Episode Reward:  -262
xxxxx
x   x
xg ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2097.66793883 26794.70611886 -6170.35693855 -1798.95296703]
------
Step:23, Action:South
State  216
Old Q Values:  [ 2097.66793883 26794.70611886 -6170.35693855 -1798.95296703]
New Q values:  [ 2097.66793883 23508.37259858 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  -263
xxxxx
x   x
x g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[42636.96717014 -9022.41491635 -7525.7277781   4289.95784791]
------
Step:24, Action:North
State  288
Old Q Values:  [42636.96717014 -9022.41491635 -7525.7277781   4289.95784791]
New Q values:  [18106.69864763 -9022.41491635 -7525.7277781   4289.95784791]
Reward: -10001  Episode Reward:  -10264
xxxxx
x   x
x  gx
x . x
xxxxx
Episode # 400
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  3786.66769779 -1855.11188891 -3385.12952694]
------
Step:1, Action:South
State  208
Old Q Values:  [-3012.01934359  3786.66769779 -1855.11188891 -3385.12952694]
New Q values:  [-3012.01934359  6952.07667341 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18106.69864763 -9022.41491635 -7525.7277781   4289.95784791]
------
Step:2, Action:North
State  288
Old Q Values:  [18106.69864763 -9022.41491635 -7525.7277781   4289.95784791]
New Q values:  [ 9327.70246107 -9022.41491635 -7525.7277781   4289.95784791]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  6952.07667341 -1855.11188891 -3385.12952694]
------
Step:3, Action:South
State  208
Old Q Values:  [-3012.01934359  6952.07667341 -1855.11188891 -3385.12952694]
New Q values:  [-3012.01934359  5578.54140768 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9327.70246107 -9022.41491635 -7525.7277781   4289.95784791]
------
Step:4, Action:West
State  288
Old Q Values:  [ 9327.70246107 -9022.41491635 -7525.7277781   4289.95784791]
New Q values:  [ 9327.70246107 -9022.41491635 -7525.7277781   3470.96526068]
Reward: 9  Episode Reward:  16
xxxxx
x ..x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4377.01488954 5831.94040506]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2783.65212637  3579.72826962]
New Q values:  [-2527.46239811 -8521.23367799  2783.65212637  1497.95334381]
Reward: 9  Episode Reward:  25
xxxxx
x ..x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 202.20678655  -40.34168621 -792.9733772   -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [ 202.20678655  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [ 111.88896278  -40.34168621 -792.9733772   -35.88578819]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   85.35416053    15.18059333 -4079.73450974  -180.6       ]
------
Step:7, Action:North
State  183
Old Q Values:  [ 247.41814027  198.8110338  5039.77271259    0.        ]
New Q values:  [ 177.47919593  198.8110338  5039.77271259    0.        ]
Reward: -1  Episode Reward:  33
xxxxx
xa..x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         263.70646606    0.            0.        ]
------
Step:8, Action:South
State  102
Old Q Values:  [-1.80600000e+02 -8.39473682e+03  5.16000000e+00 -1.80600000e+02]
New Q values:  [ -180.6       -2257.0253383     5.16       -180.6      ]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xa. x
xg  x
xxxxx
Step:9, Action:North
State  180
Old Q Values:  [-3799.14231332   266.69721195  1360.9092598      0.        ]
New Q values:  [-1518.70892533   266.69721195  1360.9092598      0.        ]
Reward: -1  Episode Reward:  31
xxxxx
xa..x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[ -180.6       -2257.0253383     5.16       -180.6      ]
------
Step:10, Action:East
State  103
Old Q Values:  [-180.6         263.70646606    0.            0.        ]
New Q values:  [-180.6         263.70646606    5.4           0.        ]
Reward: 9  Episode Reward:  40
xxxxx
x a.x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[ -180.6 -6000.6     0.      0. ]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     71.65743554    19.40550212]
New Q values:  [ -281.736      -9545.4473624     88.73803772    19.40550212]
Reward: 9  Episode Reward:  49
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         182.25021167]
------
Step:12, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         182.25021167]
New Q values:  [-180.6        -880.4555246  -180.6         183.85272746]
Reward: -1  Episode Reward:  48
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   192.85677349   371.84214263]
------
Step:13, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349   371.84214263]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349   317.28221778]
Reward: -1  Episode Reward:  47
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  454.23721663  563.81786908 -120.29354603]
------
Step:14, Action:East
State  99
Old Q Values:  [    0.         26514.55127631 57148.72647269     0.        ]
New Q values:  [    0.         26514.55127631 58477.25245694     0.        ]
Reward: -1  Episode Reward:  46
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  59707.08388817 118727.87289289]
------
Step:15, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   1528.19303756   -868.6060652 ]
New Q values:  [-10156.11771313  -8069.05606225   1528.19303756   -338.53200157]
Reward: -1  Episode Reward:  45
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   31.70141503    3.88307055 -180.6       ]
------
Step:16, Action:South
State  99
Old Q Values:  [    0.         26514.55127631 58477.25245694     0.        ]
New Q values:  [    0.         30652.33593994 58477.25245694     0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 15642.91452871     0.        ]
------
Step:17, Action:North
State  181
Old Q Values:  [   85.35416053    15.18059333 -4079.73450974  -180.6       ]
New Q values:  [  202.68702493    15.18059333 -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  454.23721663  563.81786908 -120.29354603]
------
Step:18, Action:East
State  111
Old Q Values:  [-177.44732869  454.23721663  563.81786908 -120.29354603]
New Q values:  [-177.44732869  454.23721663  320.11181297 -120.29354603]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   192.85677349   317.28221778]
------
Step:19, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349   317.28221778]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349   262.5840521 ]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  454.23721663  320.11181297 -120.29354603]
------
Step:20, Action:South
State  109
Old Q Values:  [-241.10880094   31.70141503    3.88307055 -180.6       ]
New Q values:  [-241.10880094   72.88667349    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  40
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  202.68702493    15.18059333 -4079.73450974  -180.6       ]
------
Step:21, Action:North
State  181
Old Q Values:  [  202.68702493    15.18059333 -4079.73450974  -180.6       ]
New Q values:  [  216.74597496    15.18059333 -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  454.23721663  320.11181297 -120.29354603]
------
Step:22, Action:South
State  99
Old Q Values:  [    0.         30652.33593994 58477.25245694     0.        ]
New Q values:  [    0.         32307.44980539 58477.25245694     0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 15642.91452871     0.        ]
------
Step:23, Action:North
State  180
Old Q Values:  [-1518.70892533   266.69721195  1360.9092598      0.        ]
New Q values:  [-449.94591734  266.69721195 1360.9092598     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   527.12550931  -180.6       ]
------
Step:24, Action:East
State  108
Old Q Values:  [-8463.16477134   732.33338182   438.8916002      0.        ]
New Q values:  [-8463.16477134   732.33338182   633.41455135     0.        ]
Reward: -1  Episode Reward:  36
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225   1528.19303756   -338.53200157]
------
Step:25, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     88.73803772    19.40550212]
New Q values:  [ -281.736      -9545.4473624     90.05103332    19.40550212]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         183.85272746]
------
Step:26, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         183.85272746]
New Q values:  [-180.6        -880.4555246  -180.6          99.95640098]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     90.05103332    19.40550212]
------
Step:27, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     90.05103332    19.40550212]
New Q values:  [ -281.736      -9545.4473624     65.40733362    19.40550212]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          99.95640098]
------
Step:28, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          99.95640098]
New Q values:  [-180.6        -880.4555246  -180.6          59.00476048]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     65.40733362    19.40550212]
------
Step:29, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     65.40733362    19.40550212]
New Q values:  [ -281.736      -9545.4473624     43.26436159    19.40550212]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          59.00476048]
------
Step:30, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          59.00476048]
New Q values:  [-180.6        -880.4555246  -180.6          35.98121267]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     43.26436159    19.40550212]
------
Step:31, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     43.26436159    19.40550212]
New Q values:  [ -281.736      -9545.4473624     27.50010844    19.40550212]
Reward: -1  Episode Reward:  29
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          35.98121267]
------
Step:32, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          35.98121267]
New Q values:  [-180.6       -880.4555246 -180.6         22.0425176]
Reward: -1  Episode Reward:  28
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     27.50010844    19.40550212]
------
Step:33, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     27.50010844    19.40550212]
New Q values:  [ -281.736      -9545.4473624     17.01279865    19.40550212]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6         22.0425176]
------
Step:34, Action:West
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 85346.68671487]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 69756.43655382]
Reward: -1  Episode Reward:  26
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  59707.08388817 118727.87289289]
------
Step:35, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   1528.19303756   -338.53200157]
New Q values:  [-10156.11771313  -8069.05606225   1528.19303756   -114.14679858]
Reward: -1  Episode Reward:  25
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   72.88667349    3.88307055 -180.6       ]
------
Step:36, Action:South
State  99
Old Q Values:  [    0.         32307.44980539 58477.25245694     0.        ]
New Q values:  [    0.         32969.49535157 58477.25245694     0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 15642.91452871     0.        ]
------
Step:37, Action:North
State  181
Old Q Values:  [  216.74597496    15.18059333 -4079.73450974  -180.6       ]
New Q values:  [  107.96439203    15.18059333 -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  23
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   72.88667349    3.88307055 -180.6       ]
------
Step:38, Action:South
State  99
Old Q Values:  [    0.         32969.49535157 58477.25245694     0.        ]
New Q values:  [    0.         33234.31357004 58477.25245694     0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 15642.91452871     0.        ]
------
Step:39, Action:North
State  180
Old Q Values:  [-449.94591734  266.69721195 1360.9092598     0.        ]
New Q values:  [ -22.44071414  266.69721195 1360.9092598     0.        ]
Reward: -1  Episode Reward:  21
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   527.12550931  -180.6       ]
------
Step:40, Action:East
State  108
Old Q Values:  [-8463.16477134   732.33338182   633.41455135     0.        ]
New Q values:  [-8463.16477134   732.33338182   711.22373181     0.        ]
Reward: -1  Episode Reward:  20
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225   1528.19303756   -114.14679858]
------
Step:41, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     17.01279865    19.40550212]
New Q values:  [ -281.736      -9545.4473624     12.81787474    19.40550212]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6         22.0425176]
------
Step:42, Action:West
State  138
Old Q Values:  [-180.6       -880.4555246 -180.6         22.0425176]
New Q values:  [-180.6        -880.4555246  -180.6          14.03865768]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     12.81787474    19.40550212]
------
Step:43, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474    19.40550212]
New Q values:  [ -281.736      -9545.4473624     12.81787474   165.29985364]
Reward: -1  Episode Reward:  17
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   527.12550931  -180.6       ]
------
Step:44, Action:East
State  99
Old Q Values:  [    0.         33234.31357004 58477.25245694     0.        ]
New Q values:  [    0.         33234.31357004 59008.66285065     0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  59707.08388817 118727.87289289]
------
Step:45, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474   165.29985364]
New Q values:  [ -281.736      -9545.4473624     12.81787474   223.65759425]
Reward: -1  Episode Reward:  15
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1554.81716921   527.12550931  -180.6       ]
------
Step:46, Action:East
State  110
Old Q Values:  [ -180.6        -1554.81716921   527.12550931  -180.6       ]
New Q values:  [ -180.6        -1554.81716921   277.347482    -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     12.81787474   223.65759425]
------
Step:47, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474   223.65759425]
New Q values:  [ -281.736      -9545.4473624     12.81787474   225.13420269]
Reward: -1  Episode Reward:  13
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  454.23721663  320.11181297 -120.29354603]
------
Step:48, Action:South
State  110
Old Q Values:  [ -180.6        -1554.81716921   277.347482    -180.6       ]
New Q values:  [-180.6         478.94252253  277.347482   -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3671.56463405     0.        ]
------
Step:49, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 63069.34457436     0.        ]
New Q values:  [    0.          4614.46100011 85731.90214416     0.        ]
Reward: 100009  Episode Reward:  100021
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2783.65212637  1497.95334381]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2783.65212637  1497.95334381]
New Q values:  [-2527.46239811 -8521.23367799  3917.17158887  1497.95334381]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9327.70246107 -9022.41491635 -7525.7277781   3470.96526068]
------
Step:2, Action:North
State  288
Old Q Values:  [ 9327.70246107 -9022.41491635 -7525.7277781   3470.96526068]
New Q values:  [ 5410.04340673 -9022.41491635 -7525.7277781   3470.96526068]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  5578.54140768 -1855.11188891 -3385.12952694]
------
Step:3, Action:South
State  208
Old Q Values:  [-3012.01934359  5578.54140768 -1855.11188891 -3385.12952694]
New Q values:  [-3012.01934359  3853.82958509 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5410.04340673 -9022.41491635 -7525.7277781   3470.96526068]
------
Step:4, Action:North
State  288
Old Q Values:  [ 5410.04340673 -9022.41491635 -7525.7277781   3470.96526068]
New Q values:  [ 3319.56623822 -9022.41491635 -7525.7277781   3470.96526068]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  3853.82958509 -1855.11188891 -3385.12952694]
------
Step:5, Action:South
State  208
Old Q Values:  [-3012.01934359  3853.82958509 -1855.11188891 -3385.12952694]
New Q values:  [-3012.01934359  2582.22141224 -1855.11188891 -3385.12952694]
Reward: -1  Episode Reward:  15
xxxxx
xg .x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3319.56623822 -9022.41491635 -7525.7277781   3470.96526068]
------
Step:6, Action:West
State  288
Old Q Values:  [ 3319.56623822 -9022.41491635 -7525.7277781   3470.96526068]
New Q values:  [ 3319.56623822 -9022.41491635 -7525.7277781   2562.93758093]
Reward: -1  Episode Reward:  14
xxxxx
x. .x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3917.17158887  1497.95334381]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3917.17158887  1497.95334381]
New Q values:  [-2527.46239811 -8521.23367799  2562.13850701  1497.95334381]
Reward: -1  Episode Reward:  13
xxxxx
xg .x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3319.56623822 -9022.41491635 -7525.7277781   2562.93758093]
------
Step:8, Action:North
State  288
Old Q Values:  [ 3319.56623822 -9022.41491635 -7525.7277781   2562.93758093]
New Q values:  [ 2101.89291896 -9022.41491635 -7525.7277781   2562.93758093]
Reward: -1  Episode Reward:  12
xxxxx
x. .x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  2582.22141224 -1855.11188891 -3385.12952694]
------
Step:9, Action:South
State  210
Old Q Values:  [ 1.91033339e+04 -1.92583303e+01  1.38841071e+04  3.52184257e+00]
New Q values:  [1.91033339e+04 7.60577942e+02 1.38841071e+04 3.52184257e+00]
Reward: -1  Episode Reward:  11
xxxxx
x. .x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2101.89291896 -9022.41491635 -7525.7277781   2562.93758093]
------
Step:10, Action:West
State  288
Old Q Values:  [ 2101.89291896 -9022.41491635 -7525.7277781   2562.93758093]
New Q values:  [ 2101.89291896 -9022.41491635 -7525.7277781   1793.21658448]
Reward: -1  Episode Reward:  10
xxxxx
x. .x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2562.13850701  1497.95334381]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2562.13850701  1497.95334381]
New Q values:  [-2527.46239811 -8521.23367799  1654.82327849  1497.95334381]
Reward: -1  Episode Reward:  9
xxxxx
x. .x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2101.89291896 -9022.41491635 -7525.7277781   1793.21658448]
------
Step:12, Action:North
State  288
Old Q Values:  [ 2101.89291896 -9022.41491635 -7525.7277781   1793.21658448]
New Q values:  [ 6571.1573473  -9022.41491635 -7525.7277781   1793.21658448]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.91033339e+04 7.60577942e+02 1.38841071e+04 3.52184257e+00]
------
Step:13, Action:North
State  210
Old Q Values:  [1.91033339e+04 7.60577942e+02 1.38841071e+04 3.52184257e+00]
New Q values:  [7.65094517e+03 7.60577942e+02 1.38841071e+04 3.52184257e+00]
Reward: 9  Episode Reward:  17
xxxxx
x. ax
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          14.03865768]
------
Step:14, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          14.03865768]
New Q values:  [-180.6        -880.4555246  -180.6          72.55572388]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     12.81787474   225.13420269]
------
Step:15, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474   225.13420269]
New Q values:  [ -281.736      -9545.4473624     12.81787474   109.24698514]
Reward: 9  Episode Reward:  25
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8399.5673214     45.97768022  -180.6       ]
------
Step:16, Action:East
State  106
Old Q Values:  [ -180.6        -8399.5673214     45.97768022  -180.6       ]
New Q values:  [ -180.6        -8399.5673214     50.56516763  -180.6       ]
Reward: -1  Episode Reward:  24
xxxxx
x a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     12.81787474   109.24698514]
------
Step:17, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474   109.24698514]
New Q values:  [ -281.736      -9545.4473624     12.81787474    46.48311978]
Reward: -1  Episode Reward:  23
xxxxx
xa  x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573    7.29875984 -252.78192178]
------
Step:18, Action:South
State  105
Old Q Values:  [ -180.6          369.32582787 -5851.25726525     0.        ]
New Q values:  [ -180.6          185.51964876 -5851.25726525     0.        ]
Reward: 9  Episode Reward:  32
xxxxx
x g x
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  107.96439203    15.18059333 -4079.73450974  -180.6       ]
------
Step:19, Action:North
State  181
Old Q Values:  [  107.96439203    15.18059333 -4079.73450974  -180.6       ]
New Q values:  [   98.24165144    15.18059333 -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  31
xxxxx
xa gx
x . x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6          185.51964876 -5851.25726525     0.        ]
------
Step:20, Action:South
State  105
Old Q Values:  [ -180.6          185.51964876 -5851.25726525     0.        ]
New Q values:  [ -180.6          103.08035493 -5851.25726525     0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x g x
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   98.24165144    15.18059333 -4079.73450974  -180.6       ]
------
Step:21, Action:North
State  185
Old Q Values:  [-6.00000000e-01  0.00000000e+00  8.49498952e+02 -1.78980000e+02]
New Q values:  [   2.54432572    0.          849.49895246 -178.98      ]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573    7.29875984 -252.78192178]
------
Step:22, Action:South
State  106
Old Q Values:  [ -180.6        -8399.5673214     50.56516763  -180.6       ]
New Q values:  [ -180.6        -8952.15415062    50.56516763  -180.6       ]
Reward: -10001  Episode Reward:  -9972
xxxxx
x   x
xg. x
x.  x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   98.24165144    15.18059333 -4079.73450974  -180.6       ]
------
Step:1, Action:North
State  181
Old Q Values:  [   98.24165144    15.18059333 -4079.73450974  -180.6       ]
New Q values:  [  123.80860039    15.18059333 -4079.73450974  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         263.70646606    5.4           0.        ]
------
Step:2, Action:South
State  109
Old Q Values:  [-241.10880094   72.88667349    3.88307055 -180.6       ]
New Q values:  [-241.10880094   65.69724951    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x g.x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  123.80860039    15.18059333 -4079.73450974  -180.6       ]
------
Step:3, Action:North
State  181
Old Q Values:  [  123.80860039    15.18059333 -4079.73450974  -180.6       ]
New Q values:  [  128.03537998    15.18059333 -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
xa..x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         263.70646606    5.4           0.        ]
------
Step:4, Action:South
State  103
Old Q Values:  [-180.6         263.70646606    5.4           0.        ]
New Q values:  [-180.6       1616.8144002    5.4          0.       ]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
xa. x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 177.47919593  198.8110338  5039.77271259    0.        ]
------
Step:5, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  3671.56463405     0.        ]
New Q values:  [    0.         -5969.29177534  1972.79016803     0.        ]
Reward: 9  Episode Reward:  15
xxxxx
x ..x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.97854041e+03  1.66254771e+03  1.20371620e+03]
------
Step:6, Action:East
State  200
Old Q Values:  [  62.8218634  1206.93052108  715.61136736  181.20343395]
New Q values:  [  62.8218634  1206.93052108 7338.15632652  181.20343395]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2097.66793883 23508.37259858 -6170.35693855 -1798.95296703]
------
Step:7, Action:South
State  208
Old Q Values:  [-3012.01934359  2582.22141224 -1855.11188891 -3385.12952694]
New Q values:  [-3012.01934359  3009.63576909 -1855.11188891 -3385.12952694]
Reward: 9  Episode Reward:  23
xxxxx
xg..x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6571.1573473  -9022.41491635 -7525.7277781   1793.21658448]
------
Step:8, Action:North
State  288
Old Q Values:  [ 6571.1573473  -9022.41491635 -7525.7277781   1793.21658448]
New Q values:  [ 3530.75366965 -9022.41491635 -7525.7277781   1793.21658448]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  3009.63576909 -1855.11188891 -3385.12952694]
------
Step:9, Action:South
State  210
Old Q Values:  [7.65094517e+03 7.60577942e+02 1.38841071e+04 3.52184257e+00]
New Q values:  [7.65094517e+03 1.36285728e+03 1.38841071e+04 3.52184257e+00]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x   x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3530.75366965 -9022.41491635 -7525.7277781   1793.21658448]
------
Step:10, Action:North
State  288
Old Q Values:  [ 3530.75366965 -9022.41491635 -7525.7277781   1793.21658448]
New Q values:  [ 5576.93358313 -9022.41491635 -7525.7277781   1793.21658448]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.65094517e+03 1.36285728e+03 1.38841071e+04 3.52184257e+00]
------
Step:11, Action:East
State  208
Old Q Values:  [-3012.01934359  3009.63576909 -1855.11188891 -3385.12952694]
New Q values:  [-3012.01934359  3009.63576909   -19.75402484 -3385.12952694]
Reward: -301  Episode Reward:  -281
xxxxx
x ..x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  3009.63576909   -19.75402484 -3385.12952694]
------
Step:12, Action:South
State  210
Old Q Values:  [7.65094517e+03 1.36285728e+03 1.38841071e+04 3.52184257e+00]
New Q values:  [7.65094517e+03 2.21762299e+03 1.38841071e+04 3.52184257e+00]
Reward: -1  Episode Reward:  -282
xxxxx
x ..x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5576.93358313 -9022.41491635 -7525.7277781   1793.21658448]
------
Step:13, Action:North
State  288
Old Q Values:  [ 5576.93358313 -9022.41491635 -7525.7277781   1793.21658448]
New Q values:  [ 6395.40554852 -9022.41491635 -7525.7277781   1793.21658448]
Reward: -1  Episode Reward:  -283
xxxxx
x ..x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.65094517e+03 2.21762299e+03 1.38841071e+04 3.52184257e+00]
------
Step:14, Action:East
State  210
Old Q Values:  [7.65094517e+03 2.21762299e+03 1.38841071e+04 3.52184257e+00]
New Q values:  [7.65094517e+03 2.21762299e+03 9.53827494e+03 3.52184257e+00]
Reward: -301  Episode Reward:  -584
xxxxx
x ..x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.65094517e+03 2.21762299e+03 9.53827494e+03 3.52184257e+00]
------
Step:15, Action:East
State  208
Old Q Values:  [-3012.01934359  3009.63576909   -19.75402484 -3385.12952694]
New Q values:  [-3012.01934359  3009.63576909   714.38912079 -3385.12952694]
Reward: -301  Episode Reward:  -885
xxxxx
x ..x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  3009.63576909   714.38912079 -3385.12952694]
------
Step:16, Action:South
State  216
Old Q Values:  [ 2097.66793883 23508.37259858 -6170.35693855 -1798.95296703]
New Q values:  [ 2097.66793883 11321.37070399 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  -886
xxxxx
x g.x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6395.40554852 -9022.41491635 -7525.7277781   1793.21658448]
------
Step:17, Action:North
State  288
Old Q Values:  [ 6395.40554852 -9022.41491635 -7525.7277781   1793.21658448]
New Q values:  [ 3460.45295013 -9022.41491635 -7525.7277781   1793.21658448]
Reward: -1  Episode Reward:  -887
xxxxx
x ..x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  3009.63576909   714.38912079 -3385.12952694]
------
Step:18, Action:South
State  208
Old Q Values:  [-3012.01934359  3009.63576909   714.38912079 -3385.12952694]
New Q values:  [-3012.01934359  2241.39019268   714.38912079 -3385.12952694]
Reward: -1  Episode Reward:  -888
xxxxx
x ..x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3460.45295013 -9022.41491635 -7525.7277781   1793.21658448]
------
Step:19, Action:North
State  288
Old Q Values:  [ 3460.45295013 -9022.41491635 -7525.7277781   1793.21658448]
New Q values:  [ 2055.99823786 -9022.41491635 -7525.7277781   1793.21658448]
Reward: -1  Episode Reward:  -889
xxxxx
x ..x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  2241.39019268   714.38912079 -3385.12952694]
------
Step:20, Action:South
State  216
Old Q Values:  [ 2097.66793883 11321.37070399 -6170.35693855 -1798.95296703]
New Q values:  [ 2097.66793883  5144.74775295 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  -890
xxxxx
x g.x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2055.99823786 -9022.41491635 -7525.7277781   1793.21658448]
------
Step:21, Action:North
State  288
Old Q Values:  [ 2055.99823786 -9022.41491635 -7525.7277781   1793.21658448]
New Q values:  [ 1494.21635294 -9022.41491635 -7525.7277781   1793.21658448]
Reward: -1  Episode Reward:  -891
xxxxx
xg..x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  2241.39019268   714.38912079 -3385.12952694]
------
Step:22, Action:South
State  208
Old Q Values:  [-3012.01934359  2241.39019268   714.38912079 -3385.12952694]
New Q values:  [-3012.01934359  1433.92105241   714.38912079 -3385.12952694]
Reward: -1  Episode Reward:  -892
xxxxx
xg..x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1494.21635294 -9022.41491635 -7525.7277781   1793.21658448]
------
Step:23, Action:West
State  288
Old Q Values:  [ 1494.21635294 -9022.41491635 -7525.7277781   1793.21658448]
New Q values:  [ 1494.21635294 -9022.41491635 -7525.7277781   1219.13361734]
Reward: 9  Episode Reward:  -883
xxxxx
x ..x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1654.82327849  1497.95334381]
------
Step:24, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1654.82327849  1497.95334381]
New Q values:  [-2527.46239811 -8521.23367799  1109.59421728  1497.95334381]
Reward: -1  Episode Reward:  -884
xxxxx
x ..x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1494.21635294 -9022.41491635 -7525.7277781   1219.13361734]
------
Step:25, Action:North
State  288
Old Q Values:  [ 1494.21635294 -9022.41491635 -7525.7277781   1219.13361734]
New Q values:  [ 3458.56902186 -9022.41491635 -7525.7277781   1219.13361734]
Reward: -1  Episode Reward:  -885
xxxxx
x ..x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.65094517e+03 2.21762299e+03 9.53827494e+03 3.52184257e+00]
------
Step:26, Action:East
State  208
Old Q Values:  [-3012.01934359  1433.92105241   714.38912079 -3385.12952694]
New Q values:  [-3012.01934359  1433.92105241   535.33196404 -3385.12952694]
Reward: -301  Episode Reward:  -1186
xxxxx
x ..x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  1433.92105241   535.33196404 -3385.12952694]
------
Step:27, Action:South
State  208
Old Q Values:  [-3012.01934359  1433.92105241   535.33196404 -3385.12952694]
New Q values:  [-3012.01934359  1610.53912752   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  -1187
xxxxx
x g.x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3458.56902186 -9022.41491635 -7525.7277781   1219.13361734]
------
Step:28, Action:North
State  288
Old Q Values:  [ 3458.56902186 -9022.41491635 -7525.7277781   1219.13361734]
New Q values:  [ 1865.989347   -9022.41491635 -7525.7277781   1219.13361734]
Reward: -1  Episode Reward:  -1188
xxxxx
x ..x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  1610.53912752   535.33196404 -3385.12952694]
------
Step:29, Action:South
State  210
Old Q Values:  [7.65094517e+03 2.21762299e+03 9.53827494e+03 3.52184257e+00]
New Q values:  [7.65094517e+03 1.44624600e+03 9.53827494e+03 3.52184257e+00]
Reward: -1  Episode Reward:  -1189
xxxxx
x ..x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1865.989347   -9022.41491635 -7525.7277781   1219.13361734]
------
Step:30, Action:North
State  288
Old Q Values:  [ 1865.989347   -9022.41491635 -7525.7277781   1219.13361734]
New Q values:  [ 1228.95747706 -9022.41491635 -7525.7277781   1219.13361734]
Reward: -1  Episode Reward:  -1190
xxxxx
x ..x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  1610.53912752   535.33196404 -3385.12952694]
------
Step:31, Action:South
State  210
Old Q Values:  [7.65094517e+03 1.44624600e+03 9.53827494e+03 3.52184257e+00]
New Q values:  [7.65094517e+03 9.46585643e+02 9.53827494e+03 3.52184257e+00]
Reward: -1  Episode Reward:  -1191
xxxxx
x ..x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1228.95747706 -9022.41491635 -7525.7277781   1219.13361734]
------
Step:32, Action:North
State  288
Old Q Values:  [ 1228.95747706 -9022.41491635 -7525.7277781   1219.13361734]
New Q values:  [ 3352.46547151 -9022.41491635 -7525.7277781   1219.13361734]
Reward: -1  Episode Reward:  -1192
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.65094517e+03 9.46585643e+02 9.53827494e+03 3.52184257e+00]
------
Step:33, Action:East
State  210
Old Q Values:  [7.65094517e+03 9.46585643e+02 9.53827494e+03 3.52184257e+00]
New Q values:  [7.65094517e+03 9.46585643e+02 6.49619245e+03 3.52184257e+00]
Reward: -301  Episode Reward:  -1493
xxxxx
x ..x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.65094517e+03 9.46585643e+02 6.49619245e+03 3.52184257e+00]
------
Step:34, Action:North
State  210
Old Q Values:  [7.65094517e+03 9.46585643e+02 6.49619245e+03 3.52184257e+00]
New Q values:  [2.39927090e+04 9.46585643e+02 6.49619245e+03 3.52184257e+00]
Reward: 9  Episode Reward:  -1484
xxxxx
x .ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 69756.43655382]
------
Step:35, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          72.55572388]
New Q values:  [-180.6        -880.4555246  -180.6          48.36722548]
Reward: 9  Episode Reward:  -1475
xxxxx
x a x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     12.81787474    46.48311978]
------
Step:36, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474    46.48311978]
New Q values:  [ -281.736      -9545.4473624     12.81787474    21.37757363]
Reward: -1  Episode Reward:  -1476
xxxxx
xa  x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573    7.29875984 -252.78192178]
------
Step:37, Action:South
State  105
Old Q Values:  [ -180.6          103.08035493 -5851.25726525     0.        ]
New Q values:  [ -180.6          295.48182771 -5851.25726525     0.        ]
Reward: -1  Episode Reward:  -1477
xxxxx
x g x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   2.54432572    0.          849.49895246 -178.98      ]
------
Step:38, Action:East
State  185
Old Q Values:  [   2.54432572    0.          849.49895246 -178.98      ]
New Q values:  [   2.54432572    0.          837.83042287 -178.98      ]
Reward: -1  Episode Reward:  -1478
xxxxx
x  gx
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.66210281e+03 -8.94356769e+03  2.40000000e-02]
------
Step:39, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  1.66210281e+03 -8.94356769e+03  2.40000000e-02]
New Q values:  [ 2.33354578e+00  2.41382324e+03 -8.94356769e+03  2.40000000e-02]
Reward: -1  Episode Reward:  -1479
xxxxx
x   x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4377.01488954 5831.94040506]
------
Step:40, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1109.59421728  1497.95334381]
New Q values:  [-2527.46239811 -8521.23367799  1109.59421728 69017.61205009]
Reward: 100009  Episode Reward:  98530
xxxxx
x   x
x g x
xa  x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4377.01488954 5831.94040506]
------
Step:1, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4377.01488954 5831.94040506]
New Q values:  [  37.74111519 -168.92307549 4377.01488954 2371.74285086]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.. x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 111.88896278  -40.34168621 -792.9733772   -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [ 111.88896278  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [  88.5661991   -40.34168621 -792.9733772   -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  128.03537998    15.18059333 -4079.73450974  -180.6       ]
------
Step:3, Action:North
State  183
Old Q Values:  [ 177.47919593  198.8110338  5039.77271259    0.        ]
New Q values:  [ 561.43599843  198.8110338  5039.77271259    0.        ]
Reward: 9  Episode Reward:  27
xxxxx
xa..x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6       1616.8144002    5.4          0.       ]
------
Step:4, Action:South
State  103
Old Q Values:  [-180.6       1616.8144002    5.4          0.       ]
New Q values:  [-180.6         684.53637407    5.4           0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  128.03537998    15.18059333 -4079.73450974  -180.6       ]
------
Step:5, Action:North
State  181
Old Q Values:  [  128.03537998    15.18059333 -4079.73450974  -180.6       ]
New Q values:  [   70.32332684    15.18059333 -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
xa.gx
x . x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   65.69724951    3.88307055 -180.6       ]
------
Step:6, Action:South
State  103
Old Q Values:  [-180.6         684.53637407    5.4           0.        ]
New Q values:  [-180.6         294.31154768    5.4           0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   70.32332684    15.18059333 -4079.73450974  -180.6       ]
------
Step:7, Action:North
State  181
Old Q Values:  [   70.32332684    15.18059333 -4079.73450974  -180.6       ]
New Q values:  [  115.82279504    15.18059333 -4079.73450974  -180.6       ]
Reward: -1  Episode Reward:  23
xxxxx
xa..x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         294.31154768    5.4           0.        ]
------
Step:8, Action:South
State  109
Old Q Values:  [-241.10880094   65.69724951    3.88307055 -180.6       ]
New Q values:  [-241.10880094   60.42573832    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  22
xxxxx
x g.x
xa. x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  115.82279504    15.18059333 -4079.73450974  -180.6       ]
------
Step:9, Action:North
State  180
Old Q Values:  [ -22.44071414  266.69721195 1360.9092598     0.        ]
New Q values:  [-5887.27445228   266.69721195  1360.9092598      0.        ]
Reward: -10001  Episode Reward:  -9979
xxxxx
xg..x
x . x
x  .x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  88.5661991   -40.34168621 -792.9733772   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [  88.5661991   -40.34168621 -792.9733772   -35.88578819]
New Q values:  [1552.75829342  -40.34168621 -792.9733772   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa. x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 561.43599843  198.8110338  5039.77271259    0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [  115.82279504    15.18059333 -4079.73450974  -180.6       ]
New Q values:  [ 115.82279504   15.18059333  429.90430751 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2203.87243301 -3909.58186816  6854.66037136]
------
Step:3, Action:West
State  192
Old Q Values:  [3.89777037e-01 1.24313534e+03 3.76029253e+03 1.50487041e+04]
New Q values:  [3.89777037e-01 1.24313534e+03 3.76029253e+03 6.14785294e+03]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 115.82279504   15.18059333  429.90430751 -180.6       ]
------
Step:4, Action:North
State  181
Old Q Values:  [ 115.82279504   15.18059333  429.90430751 -180.6       ]
New Q values:  [ 140.02258232   15.18059333  429.90430751 -180.6       ]
Reward: 9  Episode Reward:  26
xxxxx
xa..x
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         294.31154768    5.4           0.        ]
------
Step:5, Action:South
State  109
Old Q Values:  [-241.10880094   60.42573832    3.88307055 -180.6       ]
New Q values:  [-241.10880094  300.64551922    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
x .gx
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  322.31251728  923.58407964 -244.98066897]
------
Step:6, Action:East
State  189
Old Q Values:  [   9.84673294  322.31251728  923.58407964 -244.98066897]
New Q values:  [   9.84673294  322.31251728 1090.408352   -244.98066897]
Reward: -1  Episode Reward:  24
xxxxx
x g.x
x a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2405.24906716   174.55451539     0.        ]
------
Step:7, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.24313534e+03 3.76029253e+03 6.14785294e+03]
New Q values:  [3.89777037e-01 6.76078614e+03 3.76029253e+03 6.14785294e+03]
Reward: 9  Episode Reward:  33
xxxxx
x ..x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1.68263752e+01 -1.80600000e+02  2.08604400e+04  3.56096005e+01]
------
Step:8, Action:East
State  276
Old Q Values:  [ 1.68263752e+01 -1.80600000e+02  2.08604400e+04  3.56096005e+01]
New Q values:  [  16.82637525 -180.6        9355.31564631   35.60960053]
Reward: 9  Episode Reward:  42
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3352.46547151 -9022.41491635 -7525.7277781   1219.13361734]
------
Step:9, Action:North
State  288
Old Q Values:  [ 3352.46547151 -9022.41491635 -7525.7277781   1219.13361734]
New Q values:  [ 1823.54792686 -9022.41491635 -7525.7277781   1219.13361734]
Reward: -1  Episode Reward:  41
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  1610.53912752   535.33196404 -3385.12952694]
------
Step:10, Action:South
State  208
Old Q Values:  [-3012.01934359  1610.53912752   535.33196404 -3385.12952694]
New Q values:  [-3012.01934359  1190.68002907   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  40
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1823.54792686 -9022.41491635 -7525.7277781   1219.13361734]
------
Step:11, Action:North
State  288
Old Q Values:  [ 1823.54792686 -9022.41491635 -7525.7277781   1219.13361734]
New Q values:  [ 1086.02317947 -9022.41491635 -7525.7277781   1219.13361734]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3012.01934359  1190.68002907   535.33196404 -3385.12952694]
------
Step:12, Action:South
State  208
Old Q Values:  [-3012.01934359  1190.68002907   535.33196404 -3385.12952694]
New Q values:  [-3012.01934359   841.41209683   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  38
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1086.02317947 -9022.41491635 -7525.7277781   1219.13361734]
------
Step:13, Action:West
State  288
Old Q Values:  [ 1086.02317947 -9022.41491635 -7525.7277781   1219.13361734]
New Q values:  [ 1086.02317947 -9022.41491635 -7525.7277781   1800.1579138 ]
Reward: -1  Episode Reward:  37
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4377.01488954 2371.74285086]
------
Step:14, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 4377.01488954 2371.74285086]
New Q values:  [  37.74111519 -168.92307549 2290.25332996 2371.74285086]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1086.02317947 -9022.41491635 -7525.7277781   1800.1579138 ]
------
Step:15, Action:West
State  288
Old Q Values:  [ 1086.02317947 -9022.41491635 -7525.7277781   1800.1579138 ]
New Q values:  [ 1086.02317947 -9022.41491635 -7525.7277781   1430.98602078]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2290.25332996 2371.74285086]
------
Step:16, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2290.25332996 2371.74285086]
New Q values:  [  37.74111519 -168.92307549 2290.25332996 1413.92462837]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1552.75829342  -40.34168621 -792.9733772   -35.88578819]
------
Step:17, Action:North
State  261
Old Q Values:  [1552.75829342  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [2132.43513115  -40.34168621 -792.9733772   -35.88578819]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 561.43599843  198.8110338  5039.77271259    0.        ]
------
Step:18, Action:East
State  183
Old Q Values:  [ 561.43599843  198.8110338  5039.77271259    0.        ]
New Q values:  [ 561.43599843  198.8110338  2514.07339945    0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.97854041e+03  1.66254771e+03  1.20371620e+03]
------
Step:19, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.97854041e+03  1.66254771e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.97854041e+03  7.86223180e+03  1.20371620e+03]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.39927090e+04 9.46585643e+02 6.49619245e+03 3.52184257e+00]
------
Step:20, Action:North
State  210
Old Q Values:  [2.39927090e+04 9.46585643e+02 6.49619245e+03 3.52184257e+00]
New Q values:  [3.05294146e+04 9.46585643e+02 6.49619245e+03 3.52184257e+00]
Reward: 9  Episode Reward:  40
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 69756.43655382]
------
Step:21, Action:West
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 69756.43655382]
New Q values:  [ 29415.41153072   2346.20646678   -180.00807518 123526.33648939]
Reward: 100009  Episode Reward:  100049
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1109.59421728 69017.61205009]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1109.59421728 69017.61205009]
New Q values:  [-2527.46239811 -8521.23367799  1109.59421728 28252.17535938]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2132.43513115  -40.34168621 -792.9733772   -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [2132.43513115  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [ 987.34534471  -40.34168621 -792.9733772   -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 140.02258232   15.18059333  429.90430751 -180.6       ]
------
Step:3, Action:North
State  181
Old Q Values:  [ 140.02258232   15.18059333  429.90430751 -180.6       ]
New Q values:  [ 151.60268869   15.18059333  429.90430751 -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
xag.x
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  300.64551922    3.88307055 -180.6       ]
------
Step:4, Action:South
State  103
Old Q Values:  [-180.6         294.31154768    5.4           0.        ]
New Q values:  [-180.6         246.09591133    5.4           0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 151.60268869   15.18059333  429.90430751 -180.6       ]
------
Step:5, Action:North
State  181
Old Q Values:  [ 151.60268869   15.18059333  429.90430751 -180.6       ]
New Q values:  [ 133.86984888   15.18059333  429.90430751 -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
xa..x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         246.09591133    5.4           0.        ]
------
Step:6, Action:South
State  103
Old Q Values:  [-180.6         246.09591133    5.4           0.        ]
New Q values:  [-180.6         852.06038437    5.4           0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 561.43599843  198.8110338  2514.07339945    0.        ]
------
Step:7, Action:East
State  181
Old Q Values:  [ 133.86984888   15.18059333  429.90430751 -180.6       ]
New Q values:  [ 133.86984888   15.18059333 2227.75983441 -180.6       ]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2203.87243301 -3909.58186816  6854.66037136]
------
Step:8, Action:West
State  193
Old Q Values:  [-5922.26708831  2203.87243301 -3909.58186816  6854.66037136]
New Q values:  [-5922.26708831  2203.87243301 -3909.58186816  3409.59209887]
Reward: -1  Episode Reward:  22
xxxxx
x .gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 133.86984888   15.18059333 2227.75983441 -180.6       ]
------
Step:9, Action:East
State  181
Old Q Values:  [ 133.86984888   15.18059333 2227.75983441 -180.6       ]
New Q values:  [ 133.86984888   15.18059333 1913.38156342 -180.6       ]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2203.87243301 -3909.58186816  3409.59209887]
------
Step:10, Action:West
State  199
Old Q Values:  [  22.48535485 1478.00809166  549.89931413    0.        ]
New Q values:  [  22.48535485 1478.00809166  549.89931413  753.62201984]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 561.43599843  198.8110338  2514.07339945    0.        ]
------
Step:11, Action:East
State  183
Old Q Values:  [ 561.43599843  198.8110338  2514.07339945    0.        ]
New Q values:  [ 561.43599843  198.8110338  1123.659465      0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x ..x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01  3.95433684e+02  5.59303454e+01  0.00000000e+00]
------
Step:12, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.97854041e+03  7.86223180e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.97854041e+03  1.23091171e+04  1.20371620e+03]
Reward: 9  Episode Reward:  28
xxxxx
x ..x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.05294146e+04 9.46585643e+02 6.49619245e+03 3.52184257e+00]
------
Step:13, Action:North
State  208
Old Q Values:  [-3012.01934359   841.41209683   535.33196404 -3385.12952694]
New Q values:  [35858.49320938   841.41209683   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  37
xxxxx
x .ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 123526.33648939]
------
Step:14, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          48.36722548]
New Q values:  [-180.6        -880.4555246  -180.6         307.64829587]
Reward: 9  Episode Reward:  46
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         943.00468559 694.69777809 204.22976196]
------
Step:15, Action:East
State  126
Old Q Values:  [  0.         943.00468559 694.69777809 204.22976196]
New Q values:  [  0.         943.00468559 369.5736     204.22976196]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         307.64829587]
------
Step:16, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         307.64829587]
New Q values:  [-180.6        -880.4555246  -180.6         405.36072403]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         943.00468559 369.5736     204.22976196]
------
Step:17, Action:East
State  126
Old Q Values:  [  0.         943.00468559 369.5736     204.22976196]
New Q values:  [  0.         943.00468559 268.83765721 204.22976196]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         405.36072403]
------
Step:18, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         405.36072403]
New Q values:  [-180.6        -880.4555246  -180.6         444.44569529]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         943.00468559 268.83765721 204.22976196]
------
Step:19, Action:South
State  126
Old Q Values:  [  0.         943.00468559 268.83765721 204.22976196]
New Q values:  [  0.         417.24724415 268.83765721 204.22976196]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         135.48456638   6.60224881   0.        ]
------
Step:20, Action:East
State  204
Old Q Values:  [ 0.00000000e+00  3.88317139e+03 -3.21532439e-01  3.99755260e+02]
New Q values:  [   0.         3883.17139005 1542.69571291  399.75525955]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2097.66793883  5144.74775295 -6170.35693855 -1798.95296703]
------
Step:21, Action:South
State  208
Old Q Values:  [35858.49320938   841.41209683   535.33196404 -3385.12952694]
New Q values:  [35858.49320938 60771.26064496   535.33196404 -3385.12952694]
Reward: 100009  Episode Reward:  100049
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         444.44569529]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         444.44569529]
New Q values:  [-180.6       -880.4555246 -180.6        189.5915502]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     12.81787474    21.37757363]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474    21.37757363]
New Q values:  [ -281.736      -9545.4473624     12.81787474   157.63378621]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6         478.94252253  277.347482   -180.6       ]
------
Step:3, Action:South
State  110
Old Q Values:  [-180.6         478.94252253  277.347482   -180.6       ]
New Q values:  [ -180.6        -5296.79794901   277.347482    -180.6       ]
Reward: -9991  Episode Reward:  -9973
xxxxx
x   x
xg .x
x...x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1086.02317947 -9022.41491635 -7525.7277781   1430.98602078]
------
Step:1, Action:West
State  288
Old Q Values:  [ 1086.02317947 -9022.41491635 -7525.7277781   1430.98602078]
New Q values:  [ 1086.02317947 -9022.41491635 -7525.7277781   3053.44701613]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. .x
x.g x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1086.02317947 -9022.41491635 -7525.7277781   3053.44701613]
------
Step:1, Action:West
State  288
Old Q Values:  [ 1086.02317947 -9022.41491635 -7525.7277781   3053.44701613]
New Q values:  [ 1086.02317947 -9022.41491635 -7525.7277781   9702.43141426]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1109.59421728 28252.17535938]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1109.59421728 28252.17535938]
New Q values:  [-2527.46239811 -8521.23367799  1109.59421728 11602.47374717]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x ..x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 987.34534471  -40.34168621 -792.9733772   -35.88578819]
------
Step:3, Action:North
State  260
Old Q Values:  [-2003.71620792 -8695.4397473   2434.65786329 -2601.74710518]
New Q values:  [ -210.24943276 -8695.4397473   2434.65786329 -2601.74710518]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xa..x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  1972.79016803     0.        ]
------
Step:4, Action:East
State  180
Old Q Values:  [-5887.27445228   266.69721195  1360.9092598      0.        ]
New Q values:  [-5887.27445228   266.69721195  2577.99954528     0.        ]
Reward: 9  Episode Reward:  26
xxxxx
x...x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.76078614e+03 3.76029253e+03 6.14785294e+03]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.76078614e+03 3.76029253e+03 6.14785294e+03]
New Q values:  [3.89777037e-01 6.18445658e+03 3.76029253e+03 6.14785294e+03]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1109.59421728 11602.47374717]
------
Step:6, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2290.25332996 1413.92462837]
New Q values:  [  37.74111519 -168.92307549 2290.25332996  861.17345476]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 987.34534471  -40.34168621 -792.9733772   -35.88578819]
------
Step:7, Action:North
State  261
Old Q Values:  [ 987.34534471  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [ 968.35260691  -40.34168621 -792.9733772   -35.88578819]
Reward: -1  Episode Reward:  23
xxxxx
x..gx
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 133.86984888   15.18059333 1913.38156342 -180.6       ]
------
Step:8, Action:East
State  181
Old Q Values:  [ 133.86984888   15.18059333 1913.38156342 -180.6       ]
New Q values:  [ 133.86984888   15.18059333 1486.32734552 -180.6       ]
Reward: -1  Episode Reward:  22
xxxxx
x.g.x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2405.24906716   174.55451539     0.        ]
------
Step:9, Action:South
State  196
Old Q Values:  [-2469.90645144  2405.24906716   174.55451539     0.        ]
New Q values:  [-2469.90645144  3768.09432076   174.55451539     0.        ]
Reward: -1  Episode Reward:  21
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        9355.31564631   35.60960053]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1109.59421728 11602.47374717]
New Q values:  [-2527.46239811 -8521.23367799  3353.96711119 11602.47374717]
Reward: -1  Episode Reward:  20
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1086.02317947 -9022.41491635 -7525.7277781   9702.43141426]
------
Step:11, Action:West
State  288
Old Q Values:  [ 1086.02317947 -9022.41491635 -7525.7277781   9702.43141426]
New Q values:  [ 1086.02317947 -9022.41491635 -7525.7277781   7361.11468986]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3353.96711119 11602.47374717]
------
Step:12, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        9355.31564631   35.60960053]
New Q values:  [  16.82637525 -180.6        9355.31564631  304.14962228]
Reward: -1  Episode Reward:  18
xxxxx
x.g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 968.35260691  -40.34168621 -792.9733772   -35.88578819]
------
Step:13, Action:North
State  260
Old Q Values:  [ -210.24943276 -8695.4397473   2434.65786329 -2601.74710518]
New Q values:  [  688.70009048 -8695.4397473   2434.65786329 -2601.74710518]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5887.27445228   266.69721195  2577.99954528     0.        ]
------
Step:14, Action:East
State  180
Old Q Values:  [-5887.27445228   266.69721195  2577.99954528     0.        ]
New Q values:  [-5887.27445228   266.69721195  2885.9367919      0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.18445658e+03 3.76029253e+03 6.14785294e+03]
------
Step:15, Action:South
State  196
Old Q Values:  [-2469.90645144  3768.09432076   174.55451539     0.        ]
New Q values:  [-2469.90645144  4313.2324222    174.55451539     0.        ]
Reward: -1  Episode Reward:  15
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        9355.31564631  304.14962228]
------
Step:16, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3353.96711119 11602.47374717]
New Q values:  [-2527.46239811 -8521.23367799  3549.32125143 11602.47374717]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1086.02317947 -9022.41491635 -7525.7277781   7361.11468986]
------
Step:17, Action:West
State  288
Old Q Values:  [ 1086.02317947 -9022.41491635 -7525.7277781   7361.11468986]
New Q values:  [ 1086.02317947 -9022.41491635 -7525.7277781   6424.58800009]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3549.32125143 11602.47374717]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3549.32125143 11602.47374717]
New Q values:  [-2527.46239811 -8521.23367799  3346.5049006  11602.47374717]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1086.02317947 -9022.41491635 -7525.7277781   6424.58800009]
------
Step:19, Action:North
State  288
Old Q Values:  [ 1086.02317947 -9022.41491635 -7525.7277781   6424.58800009]
New Q values:  [ 9598.63364574 -9022.41491635 -7525.7277781   6424.58800009]
Reward: 9  Episode Reward:  21
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.05294146e+04 9.46585643e+02 6.49619245e+03 3.52184257e+00]
------
Step:20, Action:North
State  210
Old Q Values:  [3.05294146e+04 9.46585643e+02 6.49619245e+03 3.52184257e+00]
New Q values:  [4.92750668e+04 9.46585643e+02 6.49619245e+03 3.52184257e+00]
Reward: 9  Episode Reward:  30
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 123526.33648939]
------
Step:21, Action:West
State  130
Old Q Values:  [ 29415.41153072   2346.20646678   -180.00807518 123526.33648939]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 86548.11783042]
Reward: 9  Episode Reward:  39
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.23773944e+05]
------
Step:22, Action:West
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.23773944e+05]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.27217577e+05]
Reward: 100009  Episode Reward:  100048
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   192.85677349   262.5840521 ]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474   157.63378621]
New Q values:  [ -281.736      -9545.4473624     12.81787474    71.8378402 ]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573    7.29875984 -252.78192178]
------
Step:2, Action:South
State  110
Old Q Values:  [ -180.6        -5296.79794901   277.347482    -180.6       ]
New Q values:  [ -180.6        -7247.53814204   277.347482    -180.6       ]
Reward: -9991  Episode Reward:  -9982
xxxxx
x  .x
xg. x
x...x
xxxxx
xxxxx
x..ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6        189.5915502]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6       -880.4555246 -180.6        189.5915502]
New Q values:  [-180.6        -880.4555246  -180.6         102.78797214]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     12.81787474    71.8378402 ]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474    71.8378402 ]
New Q values:  [ -281.736      -9545.4473624     12.81787474   170.40630107]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  454.23721663  320.11181297 -120.29354603]
------
Step:3, Action:South
State  110
Old Q Values:  [ -180.6        -7247.53814204   277.347482    -180.6       ]
New Q values:  [ -180.6        -8027.83421924   277.347482    -180.6       ]
Reward: -9991  Episode Reward:  -9973
xxxxx
x   x
xg..x
x ..x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  454.23721663  320.11181297 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6         852.06038437    5.4           0.        ]
New Q values:  [-180.6        792.1223574    5.4          0.       ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xag.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 133.86984888   15.18059333 1486.32734552 -180.6       ]
------
Step:2, Action:North
State  181
Old Q Values:  [ 133.86984888   15.18059333 1486.32734552 -180.6       ]
New Q values:  [ 290.58464677   15.18059333 1486.32734552 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
xa..x
x .gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        792.1223574    5.4          0.       ]
------
Step:3, Action:South
State  103
Old Q Values:  [-180.6        792.1223574    5.4          0.       ]
New Q values:  [-180.6         653.34678246    5.4           0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 561.43599843  198.8110338  1123.659465      0.        ]
------
Step:4, Action:East
State  183
Old Q Values:  [ 561.43599843  198.8110338  1123.659465      0.        ]
New Q values:  [561.43599843 198.8110338  573.49389123   0.        ]
Reward: 9  Episode Reward:  16
xxxxx
x ..x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01  3.95433684e+02  5.59303454e+01  0.00000000e+00]
------
Step:5, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.18445658e+03 3.76029253e+03 6.14785294e+03]
New Q values:  [3.89777037e-01 6.18445658e+03 1.97408952e+04 6.14785294e+03]
Reward: 9  Episode Reward:  25
xxxxx
x ..x
x gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[35858.49320938 60771.26064496   535.33196404 -3385.12952694]
------
Step:6, Action:South
State  210
Old Q Values:  [4.92750668e+04 9.46585643e+02 6.49619245e+03 3.52184257e+00]
New Q values:  [4.92750668e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9598.63364574 -9022.41491635 -7525.7277781   6424.58800009]
------
Step:7, Action:North
State  288
Old Q Values:  [ 9598.63364574 -9022.41491635 -7525.7277781   6424.58800009]
New Q values:  [18621.37349192 -9022.41491635 -7525.7277781   6424.58800009]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.92750668e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
------
Step:8, Action:North
State  208
Old Q Values:  [35858.49320938 60771.26064496   535.33196404 -3385.12952694]
New Q values:  [40313.23263288 60771.26064496   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  42
xxxxx
x .ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 86548.11783042]
------
Step:9, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         102.78797214]
New Q values:  [-180.6        -880.4555246  -180.6         125.29040449]
Reward: 9  Episode Reward:  51
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   192.85677349   262.5840521 ]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474   170.40630107]
New Q values:  [ -281.736      -9545.4473624     12.81787474    70.94684615]
Reward: -1  Episode Reward:  50
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   11.28108573    7.29875984 -252.78192178]
------
Step:11, Action:South
State  107
Old Q Values:  [-252.35169558   11.28108573    7.29875984 -252.78192178]
New Q values:  [-252.35169558    6.85351223    7.29875984 -252.78192178]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[-0.11058345  0.          9.80359313  0.        ]
------
Step:12, Action:East
State  187
Old Q Values:  [-0.11058345  0.          9.80359313  0.        ]
New Q values:  [-0.11058345  0.         11.94899818  0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    28.75853641     0.        ]
------
Step:13, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.97854041e+03  1.23091171e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.97854041e+03  1.97055669e+04  1.20371620e+03]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.92750668e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
------
Step:14, Action:North
State  216
Old Q Values:  [ 2097.66793883  5144.74775295 -6170.35693855 -1798.95296703]
New Q values:  [  876.05429688  5144.74775295 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  46
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         125.29040449]
------
Step:15, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         125.29040449]
New Q values:  [-180.6        -880.4555246  -180.6          70.80021564]
Reward: -1  Episode Reward:  45
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     12.81787474    70.94684615]
------
Step:16, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   1528.19303756   -114.14679858]
New Q values:  [-10156.11771313  -8069.05606225   1528.19303756     42.38582888]
Reward: -1  Episode Reward:  44
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6          295.48182771 -5851.25726525     0.        ]
------
Step:17, Action:South
State  107
Old Q Values:  [-252.35169558    6.85351223    7.29875984 -252.78192178]
New Q values:  [-252.35169558  253.49053175    7.29875984 -252.78192178]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   2.54432572    0.          837.83042287 -178.98      ]
------
Step:18, Action:North
State  187
Old Q Values:  [-0.11058345  0.         11.94899818  0.        ]
New Q values:  [75.40292614  0.         11.94899818  0.        ]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  253.49053175    7.29875984 -252.78192178]
------
Step:19, Action:South
State  98
Old Q Values:  [    0.           279.48400894 31682.35331312     0.        ]
New Q values:  [    0.         26470.16520588 31682.35331312     0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:20, Action:East
State  184
Old Q Values:  [   0.            0.         1214.25535342    0.        ]
New Q values:  [   0.            0.         2686.54903933    0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1206.93052108 7338.15632652  181.20343395]
------
Step:21, Action:East
State  200
Old Q Values:  [  62.8218634  1206.93052108 7338.15632652  181.20343395]
New Q values:  [  62.8218634  1206.93052108 4478.08685649  181.20343395]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  876.05429688  5144.74775295 -6170.35693855 -1798.95296703]
------
Step:22, Action:South
State  216
Old Q Values:  [  876.05429688  5144.74775295 -6170.35693855 -1798.95296703]
New Q values:  [  876.05429688  7643.71114876 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18621.37349192 -9022.41491635 -7525.7277781   6424.58800009]
------
Step:23, Action:North
State  288
Old Q Values:  [18621.37349192 -9022.41491635 -7525.7277781   6424.58800009]
New Q values:  [ 9741.0627414  -9022.41491635 -7525.7277781   6424.58800009]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  876.05429688  7643.71114876 -6170.35693855 -1798.95296703]
------
Step:24, Action:South
State  218
Old Q Values:  [418.62691058 111.91184742   0.         784.96322284]
New Q values:  [ 418.62691058 2966.48356139    0.          784.96322284]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9741.0627414  -9022.41491635 -7525.7277781   6424.58800009]
------
Step:25, Action:North
State  288
Old Q Values:  [ 9741.0627414  -9022.41491635 -7525.7277781   6424.58800009]
New Q values:  [ 4785.77016498 -9022.41491635 -7525.7277781   6424.58800009]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 418.62691058 2966.48356139    0.          784.96322284]
------
Step:26, Action:West
State  218
Old Q Values:  [ 418.62691058 2966.48356139    0.          784.96322284]
New Q values:  [ 418.62691058 2966.48356139    0.          322.01285006]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    28.75853641     0.        ]
------
Step:27, Action:East
State  200
Old Q Values:  [  62.8218634  1206.93052108 4478.08685649  181.20343395]
New Q values:  [  62.8218634  1206.93052108 4083.74808722  181.20343395]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  876.05429688  7643.71114876 -6170.35693855 -1798.95296703]
------
Step:28, Action:South
State  216
Old Q Values:  [  876.05429688  7643.71114876 -6170.35693855 -1798.95296703]
New Q values:  [  876.05429688  4984.26085953 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4785.77016498 -9022.41491635 -7525.7277781   6424.58800009]
------
Step:29, Action:West
State  288
Old Q Values:  [ 4785.77016498 -9022.41491635 -7525.7277781   6424.58800009]
New Q values:  [ 4785.77016498 -9022.41491635 -7525.7277781   3256.31119902]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2290.25332996  861.17345476]
------
Step:30, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 2290.25332996  861.17345476]
New Q values:  [  37.74111519 -168.92307549 2351.23238148  861.17345476]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4785.77016498 -9022.41491635 -7525.7277781   3256.31119902]
------
Step:31, Action:West
State  288
Old Q Values:  [ 4785.77016498 -9022.41491635 -7525.7277781   3256.31119902]
New Q values:  [ 4785.77016498 -9022.41491635 -7525.7277781   2007.29419405]
Reward: -1  Episode Reward:  29
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2351.23238148  861.17345476]
------
Step:32, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 2351.23238148  861.17345476]
New Q values:  [  37.74111519 -168.92307549 2375.62400208  861.17345476]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4785.77016498 -9022.41491635 -7525.7277781   2007.29419405]
------
Step:33, Action:West
State  288
Old Q Values:  [ 4785.77016498 -9022.41491635 -7525.7277781   2007.29419405]
New Q values:  [ 4785.77016498 -9022.41491635 -7525.7277781   1515.00487825]
Reward: -1  Episode Reward:  27
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2375.62400208  861.17345476]
------
Step:34, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3346.5049006  11602.47374717]
New Q values:  [-2527.46239811 -8521.23367799  2773.73300973 11602.47374717]
Reward: -1  Episode Reward:  26
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4785.77016498 -9022.41491635 -7525.7277781   1515.00487825]
------
Step:35, Action:North
State  288
Old Q Values:  [ 4785.77016498 -9022.41491635 -7525.7277781   1515.00487825]
New Q values:  [ 3408.98632385 -9022.41491635 -7525.7277781   1515.00487825]
Reward: -1  Episode Reward:  25
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  876.05429688  4984.26085953 -6170.35693855 -1798.95296703]
------
Step:36, Action:South
State  216
Old Q Values:  [  876.05429688  4984.26085953 -6170.35693855 -1798.95296703]
New Q values:  [  876.05429688  3015.80024097 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  24
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3408.98632385 -9022.41491635 -7525.7277781   1515.00487825]
------
Step:37, Action:North
State  288
Old Q Values:  [ 3408.98632385 -9022.41491635 -7525.7277781   1515.00487825]
New Q values:  [ 2267.73460183 -9022.41491635 -7525.7277781   1515.00487825]
Reward: -1  Episode Reward:  23
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  876.05429688  3015.80024097 -6170.35693855 -1798.95296703]
------
Step:38, Action:South
State  216
Old Q Values:  [  876.05429688  3015.80024097 -6170.35693855 -1798.95296703]
New Q values:  [  876.05429688  1886.04047694 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  22
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2267.73460183 -9022.41491635 -7525.7277781   1515.00487825]
------
Step:39, Action:North
State  288
Old Q Values:  [ 2267.73460183 -9022.41491635 -7525.7277781   1515.00487825]
New Q values:  [ 1472.30598381 -9022.41491635 -7525.7277781   1515.00487825]
Reward: -1  Episode Reward:  21
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  876.05429688  1886.04047694 -6170.35693855 -1798.95296703]
------
Step:40, Action:South
State  216
Old Q Values:  [  876.05429688  1886.04047694 -6170.35693855 -1798.95296703]
New Q values:  [  876.05429688  1208.31765425 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  20
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1472.30598381 -9022.41491635 -7525.7277781   1515.00487825]
------
Step:41, Action:West
State  288
Old Q Values:  [ 1472.30598381 -9022.41491635 -7525.7277781   1515.00487825]
New Q values:  [ 1472.30598381 -9022.41491635 -7525.7277781   1318.08915192]
Reward: -1  Episode Reward:  19
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2375.62400208  861.17345476]
------
Step:42, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 2375.62400208  861.17345476]
New Q values:  [  37.74111519 -168.92307549 1391.34139598  861.17345476]
Reward: -1  Episode Reward:  18
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1472.30598381 -9022.41491635 -7525.7277781   1318.08915192]
------
Step:43, Action:West
State  288
Old Q Values:  [ 1472.30598381 -9022.41491635 -7525.7277781   1318.08915192]
New Q values:  [ 1472.30598381 -9022.41491635 -7525.7277781    944.03807956]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1391.34139598  861.17345476]
------
Step:44, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2773.73300973 11602.47374717]
New Q values:  [-2527.46239811 -8521.23367799  2773.73300973 73059.42021144]
Reward: 100009  Episode Reward:  100026
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x.a.x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     12.81787474    70.94684615]
------
Step:1, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349   262.5840521 ]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349   186.48078037]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  253.49053175    7.29875984 -252.78192178]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  454.23721663  320.11181297 -120.29354603]
New Q values:  [-177.44732869  359.14305402  320.11181297 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[561.43599843 198.8110338  573.49389123   0.        ]
------
Step:3, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  1972.79016803     0.        ]
New Q values:  [    0.         -5969.29177534   913.14617244     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01  3.95433684e+02  5.59303454e+01  0.00000000e+00]
------
Step:4, Action:South
State  198
Old Q Values:  [-2.78872080e-01  3.95433684e+02  5.59303454e+01  0.00000000e+00]
New Q values:  [-2.78872080e-01 -3.03583183e+03  5.59303454e+01  0.00000000e+00]
Reward: -10001  Episode Reward:  -9974
xxxxx
x  .x
x  .x
x.g.x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2203.87243301 -3909.58186816  3409.59209887]
------
Step:1, Action:West
State  192
Old Q Values:  [3.89777037e-01 6.18445658e+03 1.97408952e+04 6.14785294e+03]
New Q values:  [3.89777037e-01 6.18445658e+03 1.97408952e+04 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 290.58464677   15.18059333 1486.32734552 -180.6       ]
------
Step:2, Action:North
State  181
Old Q Values:  [ 290.58464677   15.18059333 1486.32734552 -180.6       ]
New Q values:  [ 317.63789345   15.18059333 1486.32734552 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
xa..x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         653.34678246    5.4           0.        ]
------
Step:3, Action:South
State  103
Old Q Values:  [-180.6         653.34678246    5.4           0.        ]
New Q values:  [-180.6         432.78688035    5.4           0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[561.43599843 198.8110338  573.49389123   0.        ]
------
Step:4, Action:East
State  181
Old Q Values:  [ 317.63789345   15.18059333 1486.32734552 -180.6       ]
New Q values:  [ 317.63789345   15.18059333 1318.07791142 -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  2.41382324e+03 -8.94356769e+03  2.40000000e-02]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.18445658e+03 1.97408952e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 2.43970087e+04 1.97408952e+04 2.91043938e+03]
Reward: 9  Episode Reward:  25
xxxxx
x ..x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2773.73300973 73059.42021144]
------
Step:6, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        9355.31564631  304.14962228]
New Q values:  [  16.82637525 -180.6        9355.31564631  417.56563099]
Reward: 9  Episode Reward:  34
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 968.35260691  -40.34168621 -792.9733772   -35.88578819]
------
Step:7, Action:North
State  260
Old Q Values:  [  688.70009048 -8695.4397473   2434.65786329 -2601.74710518]
New Q values:  [ 1140.66107376 -8695.4397473   2434.65786329 -2601.74710518]
Reward: -1  Episode Reward:  33
xxxxx
xg..x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5887.27445228   266.69721195  2885.9367919      0.        ]
------
Step:8, Action:East
State  181
Old Q Values:  [ 317.63789345   15.18059333 1318.07791142 -180.6       ]
New Q values:  [ 317.63789345   15.18059333 1820.60089123 -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x g.x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4313.2324222    174.55451539     0.        ]
------
Step:9, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.43970087e+04 1.97408952e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.25647982e+04 1.97408952e+04 2.91043938e+03]
Reward: -1  Episode Reward:  31
xxxxx
xg..x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        9355.31564631  417.56563099]
------
Step:10, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        9355.31564631  417.56563099]
New Q values:  [  16.82637525 -180.6        4189.21805367  417.56563099]
Reward: 9  Episode Reward:  40
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1472.30598381 -9022.41491635 -7525.7277781    944.03807956]
------
Step:11, Action:North
State  288
Old Q Values:  [ 1472.30598381 -9022.41491635 -7525.7277781    944.03807956]
New Q values:  [18819.70058701 -9022.41491635 -7525.7277781    944.03807956]
Reward: -1  Episode Reward:  39
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[40313.23263288 60771.26064496   535.33196404 -3385.12952694]
------
Step:12, Action:South
State  208
Old Q Values:  [40313.23263288 60771.26064496   535.33196404 -3385.12952694]
New Q values:  [40313.23263288 29953.81443409   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18819.70058701 -9022.41491635 -7525.7277781    944.03807956]
------
Step:13, Action:North
State  288
Old Q Values:  [18819.70058701 -9022.41491635 -7525.7277781    944.03807956]
New Q values:  [19621.25002467 -9022.41491635 -7525.7277781    944.03807956]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[40313.23263288 29953.81443409   535.33196404 -3385.12952694]
------
Step:14, Action:North
State  210
Old Q Values:  [4.92750668e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
New Q values:  [4.56798621e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
Reward: 9  Episode Reward:  46
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 86548.11783042]
------
Step:15, Action:West
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 86548.11783042]
New Q values:  [ 29415.41153072   2346.20646678   -180.00807518 130243.00900004]
Reward: 100009  Episode Reward:  100055
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.56798621e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
------
Step:1, Action:North
State  210
Old Q Values:  [4.56798621e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
New Q values:  [1.82985849e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          70.80021564]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          70.80021564]
New Q values:  [-180.6       -880.4555246 -180.6         55.0041401]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     12.81787474    70.94684615]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474    70.94684615]
New Q values:  [ -281.736      -9545.4473624     12.81787474   141.52165467]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  359.14305402  320.11181297 -120.29354603]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869  359.14305402  320.11181297 -120.29354603]
New Q values:  [-177.44732869  476.17972721  320.11181297 -120.29354603]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  322.31251728 1090.408352   -244.98066897]
------
Step:5, Action:South
State  189
Old Q Values:  [   9.84673294  322.31251728 1090.408352   -244.98066897]
New Q values:  [   9.84673294  418.83078898 1090.408352   -244.98066897]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 968.35260691  -40.34168621 -792.9733772   -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [ 968.35260691  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [ 713.86354837  -40.34168621 -792.9733772   -35.88578819]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  418.83078898 1090.408352   -244.98066897]
------
Step:7, Action:East
State  189
Old Q Values:  [   9.84673294  418.83078898 1090.408352   -244.98066897]
New Q values:  [   9.84673294  418.83078898 1165.71031401 -244.98066897]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  2.41382324e+03 -8.94356769e+03  2.40000000e-02]
------
Step:8, Action:South
State  205
Old Q Values:  [  0.           0.           0.         198.38683706]
New Q values:  [  0.         121.94855967   0.         198.38683706]
Reward: 9  Episode Reward:  52
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         388.49519889   0.        ]
------
Step:9, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 1391.34139598  861.17345476]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  6.64483116e+04  8.61173455e+02]
Reward: 100009  Episode Reward:  100061
xxxxx
x   x
x  gx
x  ax
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 713.86354837  -40.34168621 -792.9733772   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [ 713.86354837  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [ 837.12568671  -40.34168621 -792.9733772   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 317.63789345   15.18059333 1820.60089123 -180.6       ]
------
Step:2, Action:East
State  180
Old Q Values:  [-5887.27445228   266.69721195  2885.9367919      0.        ]
New Q values:  [-5887.27445228   266.69721195  7082.04327787     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.25647982e+04 1.97408952e+04 2.91043938e+03]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.25647982e+04 1.97408952e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.25647982e+04 1.99957279e+04 2.91043938e+03]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[40313.23263288 29953.81443409   535.33196404 -3385.12952694]
------
Step:4, Action:North
State  208
Old Q Values:  [40313.23263288 29953.81443409   535.33196404 -3385.12952694]
New Q values:  [55197.59575316 29953.81443409   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  26
xxxxx
x..ax
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 130243.00900004]
------
Step:5, Action:West
State  136
Old Q Values:  [ -724.71310357  1514.53633272 -6245.61866138   706.5178206 ]
New Q values:  [ -724.71310357  1514.53633272 -6245.61866138 23522.63800824]
Reward: -9991  Episode Reward:  -9965
xxxxx
x.g x
x   x
x ..x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2773.73300973 73059.42021144]
------
Step:1, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        4189.21805367  417.56563099]
New Q values:  [  16.82637525 -180.6        4189.21805367  902.82361138]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1140.66107376 -8695.4397473   2434.65786329 -2601.74710518]
------
Step:2, Action:East
State  260
Old Q Values:  [ 1140.66107376 -8695.4397473   2434.65786329 -2601.74710518]
New Q values:  [ 1140.66107376 -8695.4397473  22891.08920875 -2601.74710518]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2773.73300973 73059.42021144]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2773.73300973 73059.42021144]
New Q values:  [-2527.46239811 -8521.23367799  7001.26821129 73059.42021144]
Reward: 9  Episode Reward:  17
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19621.25002467 -9022.41491635 -7525.7277781    944.03807956]
------
Step:4, Action:North
State  288
Old Q Values:  [19621.25002467 -9022.41491635 -7525.7277781    944.03807956]
New Q values:  [24413.17873582 -9022.41491635 -7525.7277781    944.03807956]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[55197.59575316 29953.81443409   535.33196404 -3385.12952694]
------
Step:5, Action:North
State  208
Old Q Values:  [55197.59575316 29953.81443409   535.33196404 -3385.12952694]
New Q values:  [31222.21946499 29953.81443409   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  35
xxxxx
x.gax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[14546.31824237  1504.73148864 -8652.84       30459.27054576]
------
Step:6, Action:North
State  128
Old Q Values:  [14546.31824237  1504.73148864 -8652.84       30459.27054576]
New Q values:  [ 8775.70846068  1504.73148864 -8652.84       30459.27054576]
Reward: -10301  Episode Reward:  -10266
xxxxx
x..gx
x.  x
x   x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 317.63789345   15.18059333 1820.60089123 -180.6       ]
------
Step:1, Action:East
State  189
Old Q Values:  [   9.84673294  418.83078898 1165.71031401 -244.98066897]
New Q values:  [   9.84673294  418.83078898 1494.56175526 -244.98066897]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2203.87243301 -3909.58186816  3409.59209887]
------
Step:2, Action:West
State  193
Old Q Values:  [-5922.26708831  2203.87243301 -3909.58186816  3409.59209887]
New Q values:  [-5922.26708831  2203.87243301 -3909.58186816  1909.41710691]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 317.63789345   15.18059333 1820.60089123 -180.6       ]
------
Step:3, Action:East
State  181
Old Q Values:  [ 317.63789345   15.18059333 1820.60089123 -180.6       ]
New Q values:  [ 317.63789345   15.18059333  726.35871789 -180.6       ]
Reward: -10001  Episode Reward:  -9993
xxxxx
x...x
x g x
x...x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -8027.83421924   277.347482    -180.6       ]
------
Step:1, Action:East
State  108
Old Q Values:  [-8463.16477134   732.33338182   711.22373181     0.        ]
New Q values:  [-8463.16477134   732.33338182   748.34740399     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xga.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225   1528.19303756     42.38582888]
------
Step:2, Action:East
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   1528.19303756     42.38582888]
New Q values:  [-10156.11771313  -8069.05606225   7673.4686175      42.38582888]
Reward: 9  Episode Reward:  18
xxxxx
x gax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  1514.53633272 -6245.61866138 23522.63800824]
------
Step:3, Action:South
State  136
Old Q Values:  [ -724.71310357  1514.53633272 -6245.61866138 23522.63800824]
New Q values:  [ -724.71310357   973.70982936 -6245.61866138 23522.63800824]
Reward: 9  Episode Reward:  27
xxxxx
x  gx
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  876.05429688  1208.31765425 -6170.35693855 -1798.95296703]
------
Step:4, Action:South
State  216
Old Q Values:  [  876.05429688  1208.31765425 -6170.35693855 -1798.95296703]
New Q values:  [  876.05429688  7812.68068244 -6170.35693855 -1798.95296703]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x .gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[24413.17873582 -9022.41491635 -7525.7277781    944.03807956]
------
Step:5, Action:West
State  288
Old Q Values:  [24413.17873582 -9022.41491635 -7525.7277781    944.03807956]
New Q values:  [24413.17873582 -9022.41491635 -7525.7277781  20317.50870156]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x . x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  6.64483116e+04  8.61173455e+02]
------
Step:6, Action:West
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  6.64483116e+04  8.61173455e+02]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  6.64483116e+04  6.01007088e+02]
Reward: 9  Episode Reward:  54
xxxxx
x   x
x . x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 837.12568671  -40.34168621 -792.9733772   -35.88578819]
------
Step:7, Action:North
State  261
Old Q Values:  [ 837.12568671  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [ 506.29844205  -40.34168621 -792.9733772   -35.88578819]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[561.43599843 198.8110338  573.49389123   0.        ]
------
Step:8, Action:East
State  177
Old Q Values:  [66823.71809805  4025.17604709 15642.91452871     0.        ]
New Q values:  [66823.71809805  4025.17604709 66261.28417289     0.        ]
Reward: 90009  Episode Reward:  90062
xxxxx
x   x
x g x
x   x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  300.64551922    3.88307055 -180.6       ]
------
Step:1, Action:South
State  108
Old Q Values:  [-8463.16477134   732.33338182   748.34740399     0.        ]
New Q values:  [-8463.16477134  2422.94633609   748.34740399     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5887.27445228   266.69721195  7082.04327787     0.        ]
------
Step:2, Action:East
State  180
Old Q Values:  [-5887.27445228   266.69721195  7082.04327787     0.        ]
New Q values:  [-5887.27445228   266.69721195  4132.18703781     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4313.2324222    174.55451539     0.        ]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144  4313.2324222    174.55451539     0.        ]
New Q values:  [-2469.90645144  2987.45838498   174.55451539     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        4189.21805367  902.82361138]
------
Step:4, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        4189.21805367  902.82361138]
New Q values:  [  16.82637525 -180.6        9005.04084221  902.82361138]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[24413.17873582 -9022.41491635 -7525.7277781  20317.50870156]
------
Step:5, Action:North
State  288
Old Q Values:  [24413.17873582 -9022.41491635 -7525.7277781  20317.50870156]
New Q values:  [19137.33733383 -9022.41491635 -7525.7277781  20317.50870156]
Reward: 9  Episode Reward:  45
xxxxx
x  .x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31222.21946499 29953.81443409   535.33196404 -3385.12952694]
------
Step:6, Action:North
State  216
Old Q Values:  [  876.05429688  7812.68068244 -6170.35693855 -1798.95296703]
New Q values:  [  372.32296078  7812.68068244 -6170.35693855 -1798.95296703]
Reward: 9  Episode Reward:  54
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6         55.0041401]
------
Step:7, Action:West
State  130
Old Q Values:  [ 29415.41153072   2346.20646678   -180.00807518 130243.00900004]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 87714.96546788]
Reward: -1  Episode Reward:  53
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  59707.08388817 118727.87289289]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474   141.52165467]
New Q values:  [ -281.736      -9545.4473624     12.81787474    71.17821216]
Reward: -1  Episode Reward:  52
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062    50.56516763  -180.6       ]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558  253.49053175    7.29875984 -252.78192178]
New Q values:  [-252.35169558  253.49053175   23.67296758 -252.78192178]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     12.81787474    71.17821216]
------
Step:10, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349   186.48078037]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349   150.03947167]
Reward: -1  Episode Reward:  50
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  253.49053175   23.67296758 -252.78192178]
------
Step:11, Action:South
State  107
Old Q Values:  [-252.35169558  253.49053175   23.67296758 -252.78192178]
New Q values:  [-252.35169558  352.14533956   23.67296758 -252.78192178]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   2.54432572    0.          837.83042287 -178.98      ]
------
Step:12, Action:North
State  187
Old Q Values:  [75.40292614  0.         11.94899818  0.        ]
New Q values:  [135.20477233   0.          11.94899818   0.        ]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  352.14533956   23.67296758 -252.78192178]
------
Step:13, Action:South
State  98
Old Q Values:  [    0.         26470.16520588 31682.35331312     0.        ]
New Q values:  [    0.         36946.43768466 31682.35331312     0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:14, Action:East
State  187
Old Q Values:  [135.20477233   0.          11.94899818   0.        ]
New Q values:  [135.20477233   0.          12.80716019   0.        ]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    28.75853641     0.        ]
------
Step:15, Action:East
State  203
Old Q Values:  [  3.60604218 917.47486444 258.19086565   0.        ]
New Q values:  [  3.60604218 917.47486444 992.62141468   0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 418.62691058 2966.48356139    0.          322.01285006]
------
Step:16, Action:North
State  216
Old Q Values:  [  372.32296078  7812.68068244 -6170.35693855 -1798.95296703]
New Q values:  [  164.83042634  7812.68068244 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  44
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       -880.4555246 -180.6         55.0041401]
------
Step:17, Action:West
State  138
Old Q Values:  [-180.6       -880.4555246 -180.6         55.0041401]
New Q values:  [-180.6        -880.4555246  -180.6          79.25868809]
Reward: -1  Episode Reward:  43
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   192.85677349   150.03947167]
------
Step:18, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     12.81787474    71.17821216]
New Q values:  [ -281.736      -9545.4473624     28.30475632    71.17821216]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          79.25868809]
------
Step:19, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          79.25868809]
New Q values:  [-180.6        -880.4555246  -180.6          52.45693888]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632    71.17821216]
------
Step:20, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   7673.4686175      42.38582888]
New Q values:  [-10156.11771313  -8069.05606225   7673.4686175     104.99887987]
Reward: -1  Episode Reward:  40
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6          295.48182771 -5851.25726525     0.        ]
------
Step:21, Action:South
State  107
Old Q Values:  [-252.35169558  352.14533956   23.67296758 -252.78192178]
New Q values:  [-252.35169558  391.60726269   23.67296758 -252.78192178]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   2.54432572    0.          837.83042287 -178.98      ]
------
Step:22, Action:North
State  184
Old Q Values:  [   0.            0.         2686.54903933    0.        ]
New Q values:  [  14.56955029    0.         2686.54903933    0.        ]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062    50.56516763  -180.6       ]
------
Step:23, Action:East
State  107
Old Q Values:  [-252.35169558  391.60726269   23.67296758 -252.78192178]
New Q values:  [-252.35169558  391.60726269   30.22265068 -252.78192178]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632    71.17821216]
------
Step:24, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632    71.17821216]
New Q values:  [ -281.736      -9545.4473624     28.30475632    43.04083515]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062    50.56516763  -180.6       ]
------
Step:25, Action:East
State  98
Old Q Values:  [    0.         36946.43768466 31682.35331312     0.        ]
New Q values:  [    0.         36946.43768466 48290.70319312     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  59707.08388817 118727.87289289]
------
Step:26, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632    43.04083515]
New Q values:  [ -281.736      -9545.4473624     28.30475632    31.78588435]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062    50.56516763  -180.6       ]
------
Step:27, Action:East
State  104
Old Q Values:  [-8.65284000e+03  6.84575004e+00  6.00141309e+02 -8.65284000e+03]
New Q values:  [-8.65284000e+03  6.84575004e+00  2.54149711e+03 -8.65284000e+03]
Reward: -1  Episode Reward:  33
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225   7673.4686175     104.99887987]
------
Step:28, Action:East
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   7673.4686175     104.99887987]
New Q values:  [-10156.11771313  -8069.05606225  10125.57884947    104.99887987]
Reward: -1  Episode Reward:  32
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357   973.70982936 -6245.61866138 23522.63800824]
------
Step:29, Action:South
State  136
Old Q Values:  [ -724.71310357   973.70982936 -6245.61866138 23522.63800824]
New Q values:  [ -724.71310357  2732.68813648 -6245.61866138 23522.63800824]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  164.83042634  7812.68068244 -6170.35693855 -1798.95296703]
------
Step:30, Action:South
State  216
Old Q Values:  [  164.83042634  7812.68068244 -6170.35693855 -1798.95296703]
New Q values:  [  164.83042634  9219.72488345 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19137.33733383 -9022.41491635 -7525.7277781  20317.50870156]
------
Step:31, Action:West
State  288
Old Q Values:  [19137.33733383 -9022.41491635 -7525.7277781  20317.50870156]
New Q values:  [19137.33733383 -9022.41491635 -7525.7277781  28060.89695036]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  6.64483116e+04  6.01007088e+02]
------
Step:32, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7001.26821129 73059.42021144]
New Q values:  [-2527.46239811 -8521.23367799  7001.26821129 97642.19879714]
Reward: 100009  Episode Reward:  100038
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19137.33733383 -9022.41491635 -7525.7277781  28060.89695036]
------
Step:1, Action:West
State  288
Old Q Values:  [19137.33733383 -9022.41491635 -7525.7277781  28060.89695036]
New Q values:  [19137.33733383 -9022.41491635 -7525.7277781  40522.41841929]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7001.26821129 97642.19879714]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7001.26821129 97642.19879714]
New Q values:  [-2527.46239811 -8521.23367799  7001.26821129 45923.60628148]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1140.66107376 -8695.4397473  22891.08920875 -2601.74710518]
------
Step:3, Action:East
State  261
Old Q Values:  [ 506.29844205  -40.34168621 -792.9733772   -35.88578819]
New Q values:  [  506.29844205   -40.34168621 13459.29253357   -35.88578819]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7001.26821129 45923.60628148]
------
Step:4, Action:West
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  6.64483116e+04  6.01007088e+02]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  6.64483116e+04  4.27759060e+03]
Reward: -1  Episode Reward:  6
xxxxx
x..gx
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  506.29844205   -40.34168621 13459.29253357   -35.88578819]
------
Step:5, Action:East
State  261
Old Q Values:  [  506.29844205   -40.34168621 13459.29253357   -35.88578819]
New Q values:  [  506.29844205   -40.34168621 25317.61048316   -35.88578819]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x..gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  6.64483116e+04  4.27759060e+03]
------
Step:6, Action:East
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  6.64483116e+04  4.27759060e+03]
New Q values:  [   37.74111519  -168.92307549 32735.4501521   4277.59059524]
Reward: -10001  Episode Reward:  -9996
xxxxx
x...x
x...x
x  gx
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 317.63789345   15.18059333  726.35871789 -180.6       ]
------
Step:1, Action:North
State  183
Old Q Values:  [561.43599843 198.8110338  573.49389123   0.        ]
New Q values:  [359.81046348 198.8110338  573.49389123   0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         432.78688035    5.4           0.        ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  476.17972721  320.11181297 -120.29354603]
New Q values:  [-177.44732869  407.77950625  320.11181297 -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 317.63789345   15.18059333  726.35871789 -180.6       ]
------
Step:3, Action:North
State  181
Old Q Values:  [ 317.63789345   15.18059333  726.35871789 -180.6       ]
New Q values:  [ 216.64881314   15.18059333  726.35871789 -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
xag.x
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  300.64551922    3.88307055 -180.6       ]
------
Step:4, Action:South
State  108
Old Q Values:  [-8463.16477134  2422.94633609   748.34740399     0.        ]
New Q values:  [-8463.16477134  2208.23464578   748.34740399     0.        ]
Reward: -1  Episode Reward:  6
xxxxx
xg..x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5887.27445228   266.69721195  4132.18703781     0.        ]
------
Step:5, Action:East
State  180
Old Q Values:  [-5887.27445228   266.69721195  4132.18703781     0.        ]
New Q values:  [-5887.27445228   266.69721195  2548.51233062     0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2987.45838498   174.55451539     0.        ]
------
Step:6, Action:South
State  196
Old Q Values:  [-2469.90645144  2987.45838498   174.55451539     0.        ]
New Q values:  [-2469.90645144  3901.89560665   174.55451539     0.        ]
Reward: 9  Episode Reward:  14
xxxxx
x ..x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        9005.04084221  902.82361138]
------
Step:7, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        9005.04084221  902.82361138]
New Q values:  [   16.82637525  -180.6        15764.14186267   902.82361138]
Reward: 9  Episode Reward:  23
xxxxx
x g.x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19137.33733383 -9022.41491635 -7525.7277781  40522.41841929]
------
Step:8, Action:West
State  288
Old Q Values:  [19137.33733383 -9022.41491635 -7525.7277781  40522.41841929]
New Q values:  [19137.33733383 -9022.41491635 -7525.7277781  29985.44925216]
Reward: -1  Episode Reward:  22
xxxxx
xg..x
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7001.26821129 45923.60628148]
------
Step:9, Action:West
State  276
Old Q Values:  [   16.82637525  -180.6        15764.14186267   902.82361138]
New Q values:  [   16.82637525  -180.6        15764.14186267  7961.8125895 ]
Reward: 9  Episode Reward:  31
xxxxx
x g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  506.29844205   -40.34168621 25317.61048316   -35.88578819]
------
Step:10, Action:East
State  260
Old Q Values:  [ 1140.66107376 -8695.4397473  22891.08920875 -2601.74710518]
New Q values:  [ 1140.66107376 -8695.4397473  13885.0782423  -2601.74710518]
Reward: -1  Episode Reward:  30
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525  -180.6        15764.14186267  7961.8125895 ]
------
Step:11, Action:East
State  276
Old Q Values:  [   16.82637525  -180.6        15764.14186267  7961.8125895 ]
New Q values:  [   16.82637525  -180.6        15300.69152072  7961.8125895 ]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19137.33733383 -9022.41491635 -7525.7277781  29985.44925216]
------
Step:12, Action:West
State  288
Old Q Values:  [19137.33733383 -9022.41491635 -7525.7277781  29985.44925216]
New Q values:  [19137.33733383 -9022.41491635 -7525.7277781  16583.78715708]
Reward: -1  Episode Reward:  28
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525  -180.6        15300.69152072  7961.8125895 ]
------
Step:13, Action:East
State  276
Old Q Values:  [   16.82637525  -180.6        15300.69152072  7961.8125895 ]
New Q values:  [   16.82637525  -180.6        11860.87780843  7961.8125895 ]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19137.33733383 -9022.41491635 -7525.7277781  16583.78715708]
------
Step:14, Action:North
State  288
Old Q Values:  [19137.33733383 -9022.41491635 -7525.7277781  16583.78715708]
New Q values:  [13149.91040021 -9022.41491635 -7525.7277781  16583.78715708]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.82985849e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
------
Step:15, Action:North
State  210
Old Q Values:  [1.82985849e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
New Q values:  [3.36393236e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
Reward: 9  Episode Reward:  45
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 87714.96546788]
------
Step:16, Action:West
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 87714.96546788]
New Q values:  [ 29415.41153072   2346.20646678   -180.00807518 130709.74805502]
Reward: 100009  Episode Reward:  100054
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  506.29844205   -40.34168621 25317.61048316   -35.88578819]
------
Step:1, Action:East
State  261
Old Q Values:  [  506.29844205   -40.34168621 25317.61048316   -35.88578819]
New Q values:  [  506.29844205   -40.34168621 23909.52607771   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7001.26821129 45923.60628148]
------
Step:2, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 32735.4501521   4277.59059524]
New Q values:  [   37.74111519  -168.92307549 32735.4501521   8883.29406141]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x..gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  506.29844205   -40.34168621 23909.52607771   -35.88578819]
------
Step:3, Action:East
State  261
Old Q Values:  [  506.29844205   -40.34168621 23909.52607771   -35.88578819]
New Q values:  [  506.29844205   -40.34168621 19383.84547671   -35.88578819]
Reward: -1  Episode Reward:  7
xxxxx
x..gx
x.. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 32735.4501521   8883.29406141]
------
Step:4, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 32735.4501521   8883.29406141]
New Q values:  [   37.74111519  -168.92307549 18074.71620796  8883.29406141]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13149.91040021 -9022.41491635 -7525.7277781  16583.78715708]
------
Step:5, Action:West
State  288
Old Q Values:  [13149.91040021 -9022.41491635 -7525.7277781  16583.78715708]
New Q values:  [13149.91040021 -9022.41491635 -7525.7277781  12055.32972522]
Reward: -1  Episode Reward:  15
xxxxx
x..gx
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 18074.71620796  8883.29406141]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7001.26821129 45923.60628148]
New Q values:  [-2527.46239811 -8521.23367799  6744.88040458 45923.60628148]
Reward: -1  Episode Reward:  14
xxxxx
x.g.x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13149.91040021 -9022.41491635 -7525.7277781  12055.32972522]
------
Step:7, Action:North
State  288
Old Q Values:  [13149.91040021 -9022.41491635 -7525.7277781  12055.32972522]
New Q values:  [14626.02999958 -9022.41491635 -7525.7277781  12055.32972522]
Reward: -1  Episode Reward:  13
xxxxx
x..gx
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31222.21946499 29953.81443409   535.33196404 -3385.12952694]
------
Step:8, Action:South
State  208
Old Q Values:  [31222.21946499 29953.81443409   535.33196404 -3385.12952694]
New Q values:  [31222.21946499 16368.73477351   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14626.02999958 -9022.41491635 -7525.7277781  12055.32972522]
------
Step:9, Action:West
State  288
Old Q Values:  [14626.02999958 -9022.41491635 -7525.7277781  12055.32972522]
New Q values:  [14626.02999958 -9022.41491635 -7525.7277781  10243.94675248]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x.. x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 18074.71620796  8883.29406141]
------
Step:10, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6744.88040458 45923.60628148]
New Q values:  [-2527.46239811 -8521.23367799  6744.88040458 24183.99615561]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  506.29844205   -40.34168621 19383.84547671   -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [  506.29844205   -40.34168621 19383.84547671   -35.88578819]
New Q values:  [  425.82699219   -40.34168621 19383.84547671   -35.88578819]
Reward: 9  Episode Reward:  19
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 216.64881314   15.18059333  726.35871789 -180.6       ]
------
Step:12, Action:North
State  183
Old Q Values:  [359.81046348 198.8110338  573.49389123   0.        ]
New Q values:  [279.1602495  198.8110338  573.49389123   0.        ]
Reward: 9  Episode Reward:  28
xxxxx
xa..x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         432.78688035    5.4           0.        ]
------
Step:13, Action:South
State  103
Old Q Values:  [-180.6         432.78688035    5.4           0.        ]
New Q values:  [-180.6         344.56291951    5.4           0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[279.1602495  198.8110338  573.49389123   0.        ]
------
Step:14, Action:East
State  181
Old Q Values:  [ 216.64881314   15.18059333  726.35871789 -180.6       ]
New Q values:  [ 216.64881314   15.18059333  957.10521706 -180.6       ]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2203.87243301 -3909.58186816  1909.41710691]
------
Step:15, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.25647982e+04 1.99957279e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.22805181e+04 1.99957279e+04 2.91043938e+03]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6744.88040458 24183.99615561]
------
Step:16, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6744.88040458 24183.99615561]
New Q values:  [-2527.46239811 -8521.23367799  6744.88040458 15488.15210526]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  425.82699219   -40.34168621 19383.84547671   -35.88578819]
------
Step:17, Action:North
State  261
Old Q Values:  [  425.82699219   -40.34168621 19383.84547671   -35.88578819]
New Q values:  [  341.77896424   -40.34168621 19383.84547671   -35.88578819]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[279.1602495  198.8110338  573.49389123   0.        ]
------
Step:18, Action:East
State  181
Old Q Values:  [ 216.64881314   15.18059333  957.10521706 -180.6       ]
New Q values:  [ 216.64881314   15.18059333 1043.40381673 -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2203.87243301 -3909.58186816  1909.41710691]
------
Step:19, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.22805181e+04 1.99957279e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 9.55805288e+03 1.99957279e+04 2.91043938e+03]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6744.88040458 15488.15210526]
------
Step:20, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6744.88040458 15488.15210526]
New Q values:  [-2527.46239811 -8521.23367799  6744.88040458 12009.81448512]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  341.77896424   -40.34168621 19383.84547671   -35.88578819]
------
Step:21, Action:North
State  261
Old Q Values:  [  341.77896424   -40.34168621 19383.84547671   -35.88578819]
New Q values:  [  308.15975307   -40.34168621 19383.84547671   -35.88578819]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[279.1602495  198.8110338  573.49389123   0.        ]
------
Step:22, Action:East
State  181
Old Q Values:  [ 216.64881314   15.18059333 1043.40381673 -180.6       ]
New Q values:  [ 216.64881314   15.18059333 1077.92325659 -180.6       ]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2203.87243301 -3909.58186816  1909.41710691]
------
Step:23, Action:South
State  192
Old Q Values:  [3.89777037e-01 9.55805288e+03 1.99957279e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 7.42556550e+03 1.99957279e+04 2.91043938e+03]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6744.88040458 12009.81448512]
------
Step:24, Action:West
State  276
Old Q Values:  [   16.82637525  -180.6        11860.87780843  7961.8125895 ]
New Q values:  [   16.82637525  -180.6        11860.87780843  8999.27867882]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  308.15975307   -40.34168621 19383.84547671   -35.88578819]
------
Step:25, Action:East
State  261
Old Q Values:  [  308.15975307   -40.34168621 19383.84547671   -35.88578819]
New Q values:  [  308.15975307   -40.34168621 13175.35305308   -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 18074.71620796  8883.29406141]
------
Step:26, Action:East
State  276
Old Q Values:  [   16.82637525  -180.6        11860.87780843  8999.27867882]
New Q values:  [  16.82637525 -180.6        9131.56012325 8999.27867882]
Reward: -1  Episode Reward:  24
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14626.02999958 -9022.41491635 -7525.7277781  10243.94675248]
------
Step:27, Action:North
State  288
Old Q Values:  [14626.02999958 -9022.41491635 -7525.7277781  10243.94675248]
New Q values:  [15216.47783933 -9022.41491635 -7525.7277781  10243.94675248]
Reward: -1  Episode Reward:  23
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31222.21946499 16368.73477351   535.33196404 -3385.12952694]
------
Step:28, Action:South
State  208
Old Q Values:  [31222.21946499 16368.73477351   535.33196404 -3385.12952694]
New Q values:  [31222.21946499 11111.8372612    535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  22
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15216.47783933 -9022.41491635 -7525.7277781  10243.94675248]
------
Step:29, Action:North
State  288
Old Q Values:  [15216.47783933 -9022.41491635 -7525.7277781  10243.94675248]
New Q values:  [15452.65697523 -9022.41491635 -7525.7277781  10243.94675248]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31222.21946499 11111.8372612    535.33196404 -3385.12952694]
------
Step:30, Action:North
State  208
Old Q Values:  [31222.21946499 11111.8372612    535.33196404 -3385.12952694]
New Q values:  [51707.2122025  11111.8372612    535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  30
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 130709.74805502]
------
Step:31, Action:West
State  130
Old Q Values:  [ 29415.41153072   2346.20646678   -180.00807518 130709.74805502]
New Q values:  [ 29415.41153072   2346.20646678   -180.00807518 147907.66108988]
Reward: 100009  Episode Reward:  100039
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5887.27445228   266.69721195  2548.51233062     0.        ]
------
Step:1, Action:East
State  180
Old Q Values:  [-5887.27445228   266.69721195  2548.51233062     0.        ]
New Q values:  [-5887.27445228   266.69721195  2195.37361424     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  3901.89560665   174.55451539     0.        ]
------
Step:2, Action:South
State  196
Old Q Values:  [-2469.90645144  3901.89560665   174.55451539     0.        ]
New Q values:  [-2469.90645144  4305.62627964   174.55451539     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
xg .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        9131.56012325 8999.27867882]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6744.88040458 12009.81448512]
New Q values:  [-2527.46239811 -8521.23367799  7339.1492544  12009.81448512]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15452.65697523 -9022.41491635 -7525.7277781  10243.94675248]
------
Step:4, Action:North
State  288
Old Q Values:  [15452.65697523 -9022.41491635 -7525.7277781  10243.94675248]
New Q values:  [21698.62645084 -9022.41491635 -7525.7277781  10243.94675248]
Reward: 9  Episode Reward:  36
xxxxx
x g.x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[51707.2122025  11111.8372612    535.33196404 -3385.12952694]
------
Step:5, Action:North
State  208
Old Q Values:  [51707.2122025  11111.8372612    535.33196404 -3385.12952694]
New Q values:  [23826.06604473 11111.8372612    535.33196404 -3385.12952694]
Reward: -9991  Episode Reward:  -9955
xxxxx
x .gx
x   x
x.  x
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632    31.78588435]
------
Step:1, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225  10125.57884947    104.99887987]
New Q values:  [-10156.11771313  -8069.05606225  10125.57884947  -5290.13005432]
Reward: -9991  Episode Reward:  -9991
xxxxx
xg .x
x ..x
x...x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1140.66107376 -8695.4397473  13885.0782423  -2601.74710518]
------
Step:1, Action:East
State  260
Old Q Values:  [ 1140.66107376 -8695.4397473  13885.0782423  -2601.74710518]
New Q values:  [ 1140.66107376 -8695.4397473   8298.89933389 -2601.74710518]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        9131.56012325 8999.27867882]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7339.1492544  12009.81448512]
New Q values:  [-2527.46239811 -8521.23367799  9450.64763701 12009.81448512]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x...x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21698.62645084 -9022.41491635 -7525.7277781  10243.94675248]
------
Step:3, Action:North
State  288
Old Q Values:  [21698.62645084 -9022.41491635 -7525.7277781  10243.94675248]
New Q values:  [15832.67039376 -9022.41491635 -7525.7277781  10243.94675248]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23826.06604473 11111.8372612    535.33196404 -3385.12952694]
------
Step:4, Action:North
State  210
Old Q Values:  [3.36393236e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
New Q values:  [1.34768665e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
Reward: 9  Episode Reward:  36
xxxxx
x .ax
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          52.45693888]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          52.45693888]
New Q values:  [-180.6        -880.4555246  -180.6          35.91854086]
Reward: 9  Episode Reward:  45
xxxxx
x a x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632    31.78588435]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632    31.78588435]
New Q values:  [ -281.736      -9545.4473624     28.30475632   129.59653255]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  391.60726269   30.22265068 -252.78192178]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869  407.77950625  320.11181297 -120.29354603]
New Q values:  [-177.44732869  340.55996987  320.11181297 -120.29354603]
Reward: 9  Episode Reward:  53
xxxxx
x   x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[279.1602495  198.8110338  573.49389123   0.        ]
------
Step:8, Action:East
State  177
Old Q Values:  [66823.71809805  4025.17604709 66261.28417289     0.        ]
New Q values:  [66823.71809805  4025.17604709 86508.63203056     0.        ]
Reward: 90009  Episode Reward:  90062
xxxxx
x   x
x g x
x   x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 216.64881314   15.18059333 1077.92325659 -180.6       ]
------
Step:1, Action:North
State  183
Old Q Values:  [279.1602495  198.8110338  573.49389123   0.        ]
New Q values:  [220.43297565 198.8110338  573.49389123   0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         344.56291951    5.4           0.        ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  340.55996987  320.11181297 -120.29354603]
New Q values:  [-177.44732869  459.00096493  320.11181297 -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 216.64881314   15.18059333 1077.92325659 -180.6       ]
------
Step:3, Action:North
State  181
Old Q Values:  [ 216.64881314   15.18059333 1077.92325659 -180.6       ]
New Q values:  [ 189.42840111   15.18059333 1077.92325659 -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
xa..x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         344.56291951    5.4           0.        ]
------
Step:4, Action:South
State  103
Old Q Values:  [-180.6         344.56291951    5.4           0.        ]
New Q values:  [-180.6         309.27333517    5.4           0.        ]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[220.43297565 198.8110338  573.49389123   0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [220.43297565 198.8110338  573.49389123   0.        ]
New Q values:  [220.43297565 198.8110338  245.5766601    0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -3.03583183e+03  5.59303454e+01  0.00000000e+00]
------
Step:6, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.97854041e+03  1.97055669e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.97854041e+03  1.19306867e+04  1.20371620e+03]
Reward: 9  Episode Reward:  14
xxxxx
x ..x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.34768665e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
------
Step:7, Action:North
State  218
Old Q Values:  [ 418.62691058 2966.48356139    0.          322.01285006]
New Q values:  [ 183.62632649 2966.48356139    0.          322.01285006]
Reward: 9  Episode Reward:  23
xxxxx
x .ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          35.91854086]
------
Step:8, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          35.91854086]
New Q values:  [-180.6        -880.4555246  -180.6          58.64637611]
Reward: 9  Episode Reward:  32
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632   129.59653255]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632   129.59653255]
New Q values:  [ -281.736      -9545.4473624     28.30475632   168.72079182]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  391.60726269   30.22265068 -252.78192178]
------
Step:10, Action:South
State  110
Old Q Values:  [ -180.6        -8027.83421924   277.347482    -180.6       ]
New Q values:  [ -180.6        -3202.62592418   277.347482    -180.6       ]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xa  x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -5.70379540e+03  3.03592117e+01  0.00000000e+00]
------
Step:11, Action:East
State  190
Old Q Values:  [ 1.04129094e+00 -5.70379540e+03  3.03592117e+01  0.00000000e+00]
New Q values:  [ 1.04129094e+00 -5.70379540e+03  2.01712456e+01  0.00000000e+00]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458    28.75853641     0.        ]
------
Step:12, Action:East
State  202
Old Q Values:  [    0.         -5884.35407458    28.75853641     0.        ]
New Q values:  [    0.         -5884.35407458   900.84848298     0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 183.62632649 2966.48356139    0.          322.01285006]
------
Step:13, Action:South
State  216
Old Q Values:  [  164.83042634  9219.72488345 -6170.35693855 -1798.95296703]
New Q values:  [  164.83042634  8443.09107151 -6170.35693855 -1798.95296703]
Reward: 9  Episode Reward:  37
xxxxx
x   x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15832.67039376 -9022.41491635 -7525.7277781  10243.94675248]
------
Step:14, Action:North
State  288
Old Q Values:  [15832.67039376 -9022.41491635 -7525.7277781  10243.94675248]
New Q values:  [ 8865.39547895 -9022.41491635 -7525.7277781  10243.94675248]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  164.83042634  8443.09107151 -6170.35693855 -1798.95296703]
------
Step:15, Action:South
State  216
Old Q Values:  [  164.83042634  8443.09107151 -6170.35693855 -1798.95296703]
New Q values:  [  164.83042634  6449.82045435 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8865.39547895 -9022.41491635 -7525.7277781  10243.94675248]
------
Step:16, Action:West
State  288
Old Q Values:  [ 8865.39547895 -9022.41491635 -7525.7277781  10243.94675248]
New Q values:  [ 8865.39547895 -9022.41491635 -7525.7277781   7705.92304653]
Reward: 9  Episode Reward:  44
xxxxx
x   x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9450.64763701 12009.81448512]
------
Step:17, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9450.64763701 12009.81448512]
New Q values:  [-2527.46239811 -8521.23367799  9450.64763701 71709.69468125]
Reward: 100009  Episode Reward:  100053
xxxxx
xg  x
x   x
xa  x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  459.00096493  320.11181297 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6         309.27333517    5.4           0.        ]
New Q values:  [-180.6         452.48631105    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 189.42840111   15.18059333 1077.92325659 -180.6       ]
------
Step:2, Action:North
State  181
Old Q Values:  [ 189.42840111   15.18059333 1077.92325659 -180.6       ]
New Q values:  [ 212.87164992   15.18059333 1077.92325659 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
xa..x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  459.00096493  320.11181297 -120.29354603]
------
Step:3, Action:South
State  103
Old Q Values:  [-180.6         452.48631105    5.4           0.        ]
New Q values:  [-180.6         254.06752245    5.4           0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[220.43297565 198.8110338  245.5766601    0.        ]
------
Step:4, Action:East
State  181
Old Q Values:  [ 212.87164992   15.18059333 1077.92325659 -180.6       ]
New Q values:  [ 212.87164992   15.18059333 1160.71627585 -180.6       ]
Reward: 9  Episode Reward:  16
xxxxx
x ..x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  2.41382324e+03 -8.94356769e+03  2.40000000e-02]
------
Step:5, Action:South
State  193
Old Q Values:  [-5922.26708831  2203.87243301 -3909.58186816  1909.41710691]
New Q values:  [-5922.26708831  6309.36383559 -3909.58186816  1909.41710691]
Reward: 9  Episode Reward:  25
xxxxx
x .gx
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 18074.71620796  8883.29406141]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9450.64763701 71709.69468125]
New Q values:  [-2527.46239811 -8521.23367799  6445.27769849 71709.69468125]
Reward: 9  Episode Reward:  34
xxxxx
x g.x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8865.39547895 -9022.41491635 -7525.7277781   7705.92304653]
------
Step:7, Action:North
State  288
Old Q Values:  [ 8865.39547895 -9022.41491635 -7525.7277781   7705.92304653]
New Q values:  [10693.378005   -9022.41491635 -7525.7277781   7705.92304653]
Reward: -1  Episode Reward:  33
xxxxx
x .gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23826.06604473 11111.8372612    535.33196404 -3385.12952694]
------
Step:8, Action:South
State  208
Old Q Values:  [23826.06604473 11111.8372612    535.33196404 -3385.12952694]
New Q values:  [23826.06604473  7652.14830598   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10693.378005   -9022.41491635 -7525.7277781   7705.92304653]
------
Step:9, Action:West
State  288
Old Q Values:  [10693.378005   -9022.41491635 -7525.7277781   7705.92304653]
New Q values:  [10693.378005   -9022.41491635 -7525.7277781  24594.67762299]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6445.27769849 71709.69468125]
------
Step:10, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6445.27769849 71709.69468125]
New Q values:  [-2527.46239811 -8521.23367799  6445.27769849 32641.88378842]
Reward: 9  Episode Reward:  40
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  308.15975307   -40.34168621 13175.35305308   -35.88578819]
------
Step:11, Action:North
State  260
Old Q Values:  [ 1140.66107376 -8695.4397473   8298.89933389 -2601.74710518]
New Q values:  [  729.60828124 -8695.4397473   8298.89933389 -2601.74710518]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   913.14617244     0.        ]
------
Step:12, Action:East
State  180
Old Q Values:  [-5887.27445228   266.69721195  2195.37361424     0.        ]
New Q values:  [-5887.27445228   266.69721195  6876.2678071      0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 7.42556550e+03 1.99957279e+04 2.91043938e+03]
------
Step:13, Action:East
State  192
Old Q Values:  [3.89777037e-01 7.42556550e+03 1.99957279e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 7.42556550e+03 1.51455110e+04 2.91043938e+03]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23826.06604473  7652.14830598   535.33196404 -3385.12952694]
------
Step:14, Action:North
State  208
Old Q Values:  [23826.06604473  7652.14830598   535.33196404 -3385.12952694]
New Q values:  [53908.12474486  7652.14830598   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  46
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 147907.66108988]
------
Step:15, Action:West
State  130
Old Q Values:  [ 29415.41153072   2346.20646678   -180.00807518 147907.66108988]
New Q values:  [ 29415.41153072   2346.20646678   -180.00807518 154786.82630382]
Reward: 100009  Episode Reward:  100055
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.34768665e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
------
Step:1, Action:North
State  210
Old Q Values:  [1.34768665e+04 3.26362435e+03 6.49619245e+03 3.52184257e+00]
New Q values:  [5.41374052e+03 3.26362435e+03 6.49619245e+03 3.52184257e+00]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          58.64637611]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          58.64637611]
New Q values:  [-180.6        -880.4555246  -180.6          79.47478799]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632   168.72079182]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632   168.72079182]
New Q values:  [ -281.736      -9545.4473624     28.30475632   156.09256133]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -3202.62592418   277.347482    -180.6       ]
------
Step:4, Action:East
State  110
Old Q Values:  [ -180.6        -3202.62592418   277.347482    -180.6       ]
New Q values:  [ -180.6        -3202.62592418   157.1667612   -180.6       ]
Reward: -1  Episode Reward:  26
xxxxx
x a x
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632   156.09256133]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632   156.09256133]
New Q values:  [ -281.736      -9545.4473624     28.30475632   108.98705289]
Reward: -1  Episode Reward:  25
xxxxx
xa  x
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -3202.62592418   157.1667612   -180.6       ]
------
Step:6, Action:East
State  107
Old Q Values:  [-252.35169558  391.60726269   30.22265068 -252.78192178]
New Q values:  [-252.35169558  391.60726269   44.18517614 -252.78192178]
Reward: -1  Episode Reward:  24
xxxxx
x a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632   108.98705289]
------
Step:7, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349   150.03947167]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349   176.89796747]
Reward: -1  Episode Reward:  23
xxxxx
xa  x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  391.60726269   44.18517614 -252.78192178]
------
Step:8, Action:South
State  111
Old Q Values:  [-177.44732869  459.00096493  320.11181297 -120.29354603]
New Q values:  [-177.44732869  262.673384    320.11181297 -120.29354603]
Reward: 9  Episode Reward:  32
xxxxx
x   x
xa. x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[220.43297565 198.8110338  245.5766601    0.        ]
------
Step:9, Action:East
State  185
Old Q Values:  [   2.54432572    0.          837.83042287 -178.98      ]
New Q values:  [ 2.54432572e+00  0.00000000e+00 -4.43434340e+03 -1.78980000e+02]
Reward: -9991  Episode Reward:  -9959
xxxxx
x   x
x g x
x. .x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632   108.98705289]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632   108.98705289]
New Q values:  [ -281.736      -9545.4473624     28.30475632   145.02836505]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  262.673384    320.11181297 -120.29354603]
------
Step:2, Action:East
State  110
Old Q Values:  [ -180.6        -3202.62592418   157.1667612   -180.6       ]
New Q values:  [ -180.6        -3202.62592418   105.77521399  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632   145.02836505]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632   145.02836505]
New Q values:  [ -281.736      -9545.4473624     28.30475632    89.14391022]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -3202.62592418   105.77521399  -180.6       ]
------
Step:4, Action:East
State  111
Old Q Values:  [-177.44732869  262.673384    320.11181297 -120.29354603]
New Q values:  [-177.44732869  262.673384    154.18789825 -120.29354603]
Reward: -1  Episode Reward:  6
xxxxx
x a.x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632    89.14391022]
------
Step:5, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225  10125.57884947  -5290.13005432]
New Q values:  [-10156.11771313  -8069.05606225  10125.57884947  -2026.45836596]
Reward: -1  Episode Reward:  5
xxxxx
xag.x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  300.64551922    3.88307055 -180.6       ]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869  262.673384    154.18789825 -120.29354603]
New Q values:  [-177.44732869  458.68423635  154.18789825 -120.29354603]
Reward: 9  Episode Reward:  14
xxxxx
x  .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 212.87164992   15.18059333 1160.71627585 -180.6       ]
------
Step:7, Action:North
State  183
Old Q Values:  [220.43297565 198.8110338  245.5766601    0.        ]
New Q values:  [225.17846117 198.8110338  245.5766601    0.        ]
Reward: -1  Episode Reward:  13
xxxxx
xa .x
x ..x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  458.68423635  154.18789825 -120.29354603]
------
Step:8, Action:South
State  110
Old Q Values:  [ -180.6        -3202.62592418   105.77521399  -180.6       ]
New Q values:  [ -180.6        -1007.70651794   105.77521399  -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
x  .x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   913.14617244     0.        ]
------
Step:9, Action:East
State  180
Old Q Values:  [-5887.27445228   266.69721195  6876.2678071      0.        ]
New Q values:  [-5887.27445228   266.69721195  4047.59500673     0.        ]
Reward: 9  Episode Reward:  21
xxxxx
x  .x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4305.62627964   174.55451539     0.        ]
------
Step:10, Action:South
State  198
Old Q Values:  [-2.78872080e-01 -3.03583183e+03  5.59303454e+01  0.00000000e+00]
New Q values:  [-2.78872080e-01  1.53053530e+03  5.59303454e+01  0.00000000e+00]
Reward: 9  Episode Reward:  30
xxxxx
x  .x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        9131.56012325 8999.27867882]
------
Step:11, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        9131.56012325 8999.27867882]
New Q values:  [   16.82637525  -180.6        11036.4273362   8999.27867882]
Reward: 9  Episode Reward:  39
xxxxx
x  .x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10693.378005   -9022.41491635 -7525.7277781  24594.67762299]
------
Step:12, Action:North
State  288
Old Q Values:  [10693.378005   -9022.41491635 -7525.7277781  24594.67762299]
New Q values:  [ 6231.60893848 -9022.41491635 -7525.7277781  24594.67762299]
Reward: 9  Episode Reward:  48
xxxxx
x  .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.41374052e+03 3.26362435e+03 6.49619245e+03 3.52184257e+00]
------
Step:13, Action:East
State  210
Old Q Values:  [5.41374052e+03 3.26362435e+03 6.49619245e+03 3.52184257e+00]
New Q values:  [5.41374052e+03 3.26362435e+03 4.36673472e+03 3.52184257e+00]
Reward: -301  Episode Reward:  -253
xxxxx
x  .x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.41374052e+03 3.26362435e+03 4.36673472e+03 3.52184257e+00]
------
Step:14, Action:North
State  210
Old Q Values:  [5.41374052e+03 3.26362435e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [1.08606944e+05 3.26362435e+03 4.36673472e+03 3.52184257e+00]
Reward: 100009  Episode Reward:  99756
xxxxx
x  ax
x   x
xg  x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  308.15975307   -40.34168621 13175.35305308   -35.88578819]
------
Step:1, Action:East
State  261
Old Q Values:  [  308.15975307   -40.34168621 13175.35305308   -35.88578819]
New Q values:  [  308.15975307   -40.34168621 10697.95608362   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 18074.71620796  8883.29406141]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6445.27769849 32641.88378842]
New Q values:  [-2527.46239811 -8521.23367799  9961.91436629 32641.88378842]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6231.60893848 -9022.41491635 -7525.7277781  24594.67762299]
------
Step:3, Action:West
State  288
Old Q Values:  [ 6231.60893848 -9022.41491635 -7525.7277781  24594.67762299]
New Q values:  [ 6231.60893848 -9022.41491635 -7525.7277781  19629.83618572]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9961.91436629 32641.88378842]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9961.91436629 32641.88378842]
New Q values:  [-2527.46239811 -8521.23367799  9961.91436629 21469.18422794]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[28043.43570856  2256.66526474   425.90861234  1875.31501677]
------
Step:5, Action:North
State  261
Old Q Values:  [  308.15975307   -40.34168621 10697.95608362   -35.88578819]
New Q values:  [  476.87878398   -40.34168621 10697.95608362   -35.88578819]
Reward: 9  Episode Reward:  25
xxxxx
x.g x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 212.87164992   15.18059333 1160.71627585 -180.6       ]
------
Step:6, Action:East
State  181
Old Q Values:  [ 212.87164992   15.18059333 1160.71627585 -180.6       ]
New Q values:  [ 212.87164992   15.18059333 -986.66020107 -180.6       ]
Reward: -9991  Episode Reward:  -9966
xxxxx
x.. x
x g.x
x   x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[53908.12474486  7652.14830598   535.33196404 -3385.12952694]
------
Step:1, Action:North
State  216
Old Q Values:  [  164.83042634  6449.82045435 -6170.35693855 -1798.95296703]
New Q values:  [   95.17460693  6449.82045435 -6170.35693855 -1798.95296703]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          79.47478799]
------
Step:2, Action:West
State  136
Old Q Values:  [ -724.71310357  2732.68813648 -6245.61866138 23522.63800824]
New Q values:  [ -724.71310357  2732.68813648 -6245.61866138  6452.12885814]
Reward: -9991  Episode Reward:  -9982
xxxxx
x.g x
x . x
x...x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9961.91436629 21469.18422794]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9961.91436629 21469.18422794]
New Q values:  [-2527.46239811 -8521.23367799  9879.11660223 21469.18422794]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6231.60893848 -9022.41491635 -7525.7277781  19629.83618572]
------
Step:2, Action:West
State  288
Old Q Values:  [ 6231.60893848 -9022.41491635 -7525.7277781  19629.83618572]
New Q values:  [ 6231.60893848 -9022.41491635 -7525.7277781  14292.08974267]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9879.11660223 21469.18422794]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9879.11660223 21469.18422794]
New Q values:  [-2527.46239811 -8521.23367799  9879.11660223 11076.74349134]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xg..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  729.60828124 -8695.4397473   8298.89933389 -2601.74710518]
------
Step:4, Action:East
State  260
Old Q Values:  [  729.60828124 -8695.4397473   8298.89933389 -2601.74710518]
New Q values:  [  729.60828124 -8695.4397473   6641.98278096 -2601.74710518]
Reward: -1  Episode Reward:  6
xxxxx
xg..x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9879.11660223 11076.74349134]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9879.11660223 11076.74349134]
New Q values:  [-2527.46239811 -8521.23367799  9879.11660223  7639.48422162]
Reward: -1  Episode Reward:  5
xxxxx
x.g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  476.87878398   -40.34168621 10697.95608362   -35.88578819]
------
Step:6, Action:East
State  261
Old Q Values:  [  476.87878398   -40.34168621 10697.95608362   -35.88578819]
New Q values:  [ 476.87878398  -40.34168621 9700.99729584  -35.88578819]
Reward: -1  Episode Reward:  4
xxxxx
x..gx
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 18074.71620796  8883.29406141]
------
Step:7, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 18074.71620796  8883.29406141]
New Q values:  [   37.74111519  -168.92307549 11516.91340599  8883.29406141]
Reward: -1  Episode Reward:  3
xxxxx
x...x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6231.60893848 -9022.41491635 -7525.7277781  14292.08974267]
------
Step:8, Action:West
State  288
Old Q Values:  [ 6231.60893848 -9022.41491635 -7525.7277781  14292.08974267]
New Q values:  [ 6231.60893848 -9022.41491635 -7525.7277781   8679.97087774]
Reward: -1  Episode Reward:  2
xxxxx
x...x
x.g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9879.11660223  7639.48422162]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9879.11660223  7639.48422162]
New Q values:  [-2527.46239811 -8521.23367799  6555.03790421  7639.48422162]
Reward: -1  Episode Reward:  1
xxxxx
x...x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6231.60893848 -9022.41491635 -7525.7277781   8679.97087774]
------
Step:10, Action:North
State  288
Old Q Values:  [ 6231.60893848 -9022.41491635 -7525.7277781   8679.97087774]
New Q values:  [18670.48099885 -9022.41491635 -7525.7277781   8679.97087774]
Reward: 9  Episode Reward:  10
xxxxx
x...x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[53908.12474486  7652.14830598   535.33196404 -3385.12952694]
------
Step:11, Action:North
State  208
Old Q Values:  [53908.12474486  7652.14830598   535.33196404 -3385.12952694]
New Q values:  [68004.69778909  7652.14830598   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  19
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 154786.82630382]
------
Step:12, Action:West
State  136
Old Q Values:  [ -724.71310357  2732.68813648 -6245.61866138  6452.12885814]
New Q values:  [ -724.71310357  2732.68813648 -6245.61866138  5623.9251981 ]
Reward: 9  Episode Reward:  28
xxxxx
xga x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225  10125.57884947  -2026.45836596]
------
Step:13, Action:East
State  114
Old Q Values:  [  -180.6          4272.38349051  59707.08388817 118727.87289289]
New Q values:  [  -180.6          4272.38349051  70318.28144641 118727.87289289]
Reward: -1  Episode Reward:  27
xxxxx
x. ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 29415.41153072   2346.20646678   -180.00807518 154786.82630382]
------
Step:14, Action:West
State  130
Old Q Values:  [ 29415.41153072   2346.20646678   -180.00807518 154786.82630382]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 97532.4923894 ]
Reward: -1  Episode Reward:  26
xxxxx
x.a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  70318.28144641 118727.87289289]
------
Step:15, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632    89.14391022]
New Q values:  [ -281.736      -9545.4473624     28.30475632    72.79012828]
Reward: 9  Episode Reward:  35
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -180.6        -1007.70651794   105.77521399  -180.6       ]
------
Step:16, Action:East
State  106
Old Q Values:  [ -180.6        -8952.15415062    50.56516763  -180.6       ]
New Q values:  [ -180.6        -8952.15415062    41.46310554  -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632    72.79012828]
------
Step:17, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632    72.79012828]
New Q values:  [ -281.736      -9545.4473624     28.30475632   145.99823012]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  391.60726269   44.18517614 -252.78192178]
------
Step:18, Action:South
State  110
Old Q Values:  [ -180.6        -1007.70651794   105.77521399  -180.6       ]
New Q values:  [-180.6        -123.73875545  105.77521399 -180.6       ]
Reward: 9  Episode Reward:  42
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   913.14617244     0.        ]
------
Step:19, Action:East
State  176
Old Q Values:  [    0.          1327.79507613 28085.80813507     0.        ]
New Q values:  [    0.          1327.79507613 75783.37654262     0.        ]
Reward: 100009  Episode Reward:  100051
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   913.14617244     0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [225.17846117 198.8110338  245.5766601    0.        ]
New Q values:  [ 225.17846117  198.8110338  3682.83667531    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.97854041e+03  1.19306867e+04  1.20371620e+03]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.97854041e+03  1.19306867e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.97854041e+03  3.73597579e+04  1.20371620e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.08606944e+05 3.26362435e+03 4.36673472e+03 3.52184257e+00]
------
Step:3, Action:North
State  208
Old Q Values:  [68004.69778909  7652.14830598   535.33196404 -3385.12952694]
New Q values:  [56467.02683245  7652.14830598   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 97532.4923894 ]
------
Step:4, Action:West
State  136
Old Q Values:  [ -724.71310357  2732.68813648 -6245.61866138  5623.9251981 ]
New Q values:  [ -724.71310357  2732.68813648 -6245.61866138  5292.64373408]
Reward: 9  Episode Reward:  36
xxxxx
xga x
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225  10125.57884947  -2026.45836596]
------
Step:5, Action:East
State  112
Old Q Values:  [    0.         18809.06432124 13004.59207411 97448.7696    ]
New Q values:  [    0.         18809.06432124  6789.02994987 97448.7696    ]
Reward: -1  Episode Reward:  35
xxxxx
x.gax
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -724.71310357  2732.68813648 -6245.61866138  5292.64373408]
------
Step:6, Action:South
State  136
Old Q Values:  [ -724.71310357  2732.68813648 -6245.61866138  5292.64373408]
New Q values:  [ -724.71310357  3027.4213909  -6245.61866138  5292.64373408]
Reward: -1  Episode Reward:  34
xxxxx
xg  x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   95.17460693  6449.82045435 -6170.35693855 -1798.95296703]
------
Step:7, Action:South
State  208
Old Q Values:  [56467.02683245  7652.14830598   535.33196404 -3385.12952694]
New Q values:  [56467.02683245  8667.40362205   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  43
xxxxx
x.  x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18670.48099885 -9022.41491635 -7525.7277781   8679.97087774]
------
Step:8, Action:North
State  288
Old Q Values:  [18670.48099885 -9022.41491635 -7525.7277781   8679.97087774]
New Q values:  [24407.70044928 -9022.41491635 -7525.7277781   8679.97087774]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
x gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[56467.02683245  8667.40362205   535.33196404 -3385.12952694]
------
Step:9, Action:North
State  208
Old Q Values:  [56467.02683245  8667.40362205   535.33196404 -3385.12952694]
New Q values:  [31723.99189671  8667.40362205   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  41
xxxxx
x.gax
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068  1504.73148864 -8652.84       30459.27054576]
------
Step:10, Action:North
State  136
Old Q Values:  [ -724.71310357  3027.4213909  -6245.61866138  5292.64373408]
New Q values:  [ 1117.30787879  3027.4213909  -6245.61866138  5292.64373408]
Reward: -301  Episode Reward:  -260
xxxxx
xg ax
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  3027.4213909  -6245.61866138  5292.64373408]
------
Step:11, Action:West
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 97532.4923894 ]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 74630.75882363]
Reward: -1  Episode Reward:  -261
xxxxx
x.a x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  70318.28144641 118727.87289289]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632   145.99823012]
New Q values:  [ -281.736      -9545.4473624     28.30475632    95.53185625]
Reward: 9  Episode Reward:  -252
xxxxx
xa  x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6        -123.73875545  105.77521399 -180.6       ]
------
Step:13, Action:East
State  110
Old Q Values:  [-180.6        -123.73875545  105.77521399 -180.6       ]
New Q values:  [-180.6        -123.73875545   70.36964247 -180.6       ]
Reward: -1  Episode Reward:  -253
xxxxx
x a x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632    95.53185625]
------
Step:14, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632    95.53185625]
New Q values:  [ -281.736      -9545.4473624     28.30475632    58.72363524]
Reward: -1  Episode Reward:  -254
xxxxx
xa  x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-180.6        -123.73875545   70.36964247 -180.6       ]
------
Step:15, Action:East
State  110
Old Q Values:  [-180.6        -123.73875545   70.36964247 -180.6       ]
New Q values:  [-180.6        -123.73875545   45.16494756 -180.6       ]
Reward: -1  Episode Reward:  -255
xxxxx
x a x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     28.30475632    58.72363524]
------
Step:16, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632    58.72363524]
New Q values:  [ -281.736      -9545.4473624     28.30475632    36.43893836]
Reward: -1  Episode Reward:  -256
xxxxx
xa  x
x   x
xg. x
xxxxx
Step:17, Action:North
State  110
Old Q Values:  [-180.6        -123.73875545   45.16494756 -180.6       ]
New Q values:  [-239.29051573 -123.73875545   45.16494756 -180.6       ]
Reward: -301  Episode Reward:  -557
xxxxx
xa  x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 -123.73875545   45.16494756 -180.6       ]
------
Step:18, Action:East
State  99
Old Q Values:  [    0.         33234.31357004 59008.66285065     0.        ]
New Q values:  [    0.         33234.31357004 59221.22700813     0.        ]
Reward: -1  Episode Reward:  -558
xxxxx
x a x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  70318.28144641 118727.87289289]
------
Step:19, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349   176.89796747]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349   207.7644579 ]
Reward: -1  Episode Reward:  -559
xxxxx
xa  x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  458.68423635  154.18789825 -120.29354603]
------
Step:20, Action:South
State  99
Old Q Values:  [    0.         33234.31357004 59221.22700813     0.        ]
New Q values:  [    0.         39012.69607126 59221.22700813     0.        ]
Reward: -1  Episode Reward:  -560
xxxxx
x   x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.          4614.46100011 85731.90214416     0.        ]
------
Step:21, Action:East
State  189
Old Q Values:  [   9.84673294  418.83078898 1494.56175526 -244.98066897]
New Q values:  [    9.84673294   418.83078898 -4177.65087173  -244.98066897]
Reward: -10001  Episode Reward:  -10561
xxxxx
x   x
x g x
x . x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6555.03790421  7639.48422162]
------
Step:1, Action:West
State  276
Old Q Values:  [   16.82637525  -180.6        11036.4273362   8999.27867882]
New Q values:  [   16.82637525  -180.6        11036.4273362   6515.41066028]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 476.87878398  -40.34168621 9700.99729584  -35.88578819]
------
Step:2, Action:East
State  261
Old Q Values:  [ 476.87878398  -40.34168621 9700.99729584  -35.88578819]
New Q values:  [ 476.87878398  -40.34168621 7190.72711919  -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525  -180.6        11036.4273362   6515.41066028]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6555.03790421  7639.48422162]
New Q values:  [-2527.46239811 -8521.23367799  9949.72529647  7639.48422162]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[24407.70044928 -9022.41491635 -7525.7277781   8679.97087774]
------
Step:4, Action:North
State  288
Old Q Values:  [24407.70044928 -9022.41491635 -7525.7277781   8679.97087774]
New Q values:  [19285.67774872 -9022.41491635 -7525.7277781   8679.97087774]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31723.99189671  8667.40362205   535.33196404 -3385.12952694]
------
Step:5, Action:North
State  208
Old Q Values:  [31723.99189671  8667.40362205   535.33196404 -3385.12952694]
New Q values:  [14282.78987891  8667.40362205   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  35
xxxxx
x gax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  3027.4213909  -6245.61866138  5292.64373408]
------
Step:6, Action:South
State  136
Old Q Values:  [ 1117.30787879  3027.4213909  -6245.61866138  5292.64373408]
New Q values:  [ 1117.30787879  5495.20552003 -6245.61866138  5292.64373408]
Reward: -1  Episode Reward:  34
xxxxx
x .gx
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14282.78987891  8667.40362205   535.33196404 -3385.12952694]
------
Step:7, Action:South
State  208
Old Q Values:  [14282.78987891  8667.40362205   535.33196404 -3385.12952694]
New Q values:  [14282.78987891  9252.06477344   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19285.67774872 -9022.41491635 -7525.7277781   8679.97087774]
------
Step:8, Action:West
State  288
Old Q Values:  [19285.67774872 -9022.41491635 -7525.7277781   8679.97087774]
New Q values:  [19285.67774872 -9022.41491635 -7525.7277781   6926.46237289]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 11516.91340599  8883.29406141]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9949.72529647  7639.48422162]
New Q values:  [-2527.46239811 -8521.23367799  9764.9934432   7639.48422162]
Reward: -1  Episode Reward:  31
xxxxx
x g x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19285.67774872 -9022.41491635 -7525.7277781   6926.46237289]
------
Step:10, Action:North
State  288
Old Q Values:  [19285.67774872 -9022.41491635 -7525.7277781   6926.46237289]
New Q values:  [11998.50806316 -9022.41491635 -7525.7277781   6926.46237289]
Reward: -1  Episode Reward:  30
xxxxx
x .gx
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14282.78987891  9252.06477344   535.33196404 -3385.12952694]
------
Step:11, Action:South
State  208
Old Q Values:  [14282.78987891  9252.06477344   535.33196404 -3385.12952694]
New Q values:  [14282.78987891  7299.77832832   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  29
xxxxx
x . x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11998.50806316 -9022.41491635 -7525.7277781   6926.46237289]
------
Step:12, Action:West
State  288
Old Q Values:  [11998.50806316 -9022.41491635 -7525.7277781   6926.46237289]
New Q values:  [11998.50806316 -9022.41491635 -7525.7277781   6225.05897095]
Reward: -1  Episode Reward:  28
xxxxx
x .gx
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 11516.91340599  8883.29406141]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9764.9934432   7639.48422162]
New Q values:  [-2527.46239811 -8521.23367799  7504.94979623  7639.48422162]
Reward: -1  Episode Reward:  27
xxxxx
x g x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11998.50806316 -9022.41491635 -7525.7277781   6225.05897095]
------
Step:14, Action:North
State  288
Old Q Values:  [11998.50806316 -9022.41491635 -7525.7277781   6225.05897095]
New Q values:  [ 9083.64018894 -9022.41491635 -7525.7277781   6225.05897095]
Reward: -1  Episode Reward:  26
xxxxx
x g x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14282.78987891  7299.77832832   535.33196404 -3385.12952694]
------
Step:15, Action:North
State  208
Old Q Values:  [14282.78987891  7299.77832832   535.33196404 -3385.12952694]
New Q values:  [ 1361.07760757  7299.77832832   535.33196404 -3385.12952694]
Reward: -10001  Episode Reward:  -9975
xxxxx
x .gx
x.. x
x   x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 476.87878398  -40.34168621 7190.72711919  -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [ 476.87878398  -40.34168621 7190.72711919  -35.88578819]
New Q values:  [ 260.01300857  -40.34168621 7190.72711919  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 212.87164992   15.18059333 -986.66020107 -180.6       ]
------
Step:2, Action:North
State  181
Old Q Values:  [ 212.87164992   15.18059333 -986.66020107 -180.6       ]
New Q values:  [ 180.74231573   15.18059333 -986.66020107 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
xag.x
x ..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  300.64551922    3.88307055 -180.6       ]
------
Step:3, Action:South
State  109
Old Q Values:  [-241.10880094  300.64551922    3.88307055 -180.6       ]
New Q values:  [-241.10880094  173.88090241    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x .gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 180.74231573   15.18059333 -986.66020107 -180.6       ]
------
Step:4, Action:North
State  181
Old Q Values:  [ 180.74231573   15.18059333 -986.66020107 -180.6       ]
New Q values:  [ 123.86119702   15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
xag.x
x ..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  173.88090241    3.88307055 -180.6       ]
------
Step:5, Action:South
State  109
Old Q Values:  [-241.10880094  173.88090241    3.88307055 -180.6       ]
New Q values:  [-241.10880094  106.11072007    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
x .gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 123.86119702   15.18059333 -986.66020107 -180.6       ]
------
Step:6, Action:North
State  181
Old Q Values:  [ 123.86119702   15.18059333 -986.66020107 -180.6       ]
New Q values:  [  80.77769483   15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
xag.x
x ..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  106.11072007    3.88307055 -180.6       ]
------
Step:7, Action:South
State  103
Old Q Values:  [-180.6         254.06752245    5.4           0.        ]
New Q values:  [-180.6         125.26031743    5.4           0.        ]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  80.77769483   15.18059333 -986.66020107 -180.6       ]
------
Step:8, Action:North
State  180
Old Q Values:  [-5887.27445228   266.69721195  4047.59500673     0.        ]
New Q values:  [-2341.96029664   266.69721195  4047.59500673     0.        ]
Reward: -1  Episode Reward:  12
xxxxx
xa..x
xg..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 -123.73875545   45.16494756 -180.6       ]
------
Step:9, Action:East
State  111
Old Q Values:  [-177.44732869  458.68423635  154.18789825 -120.29354603]
New Q values:  [-177.44732869  458.68423635  192.24933255 -120.29354603]
Reward: 9  Episode Reward:  21
xxxxx
x a.x
x g.x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         417.24724415 268.83765721 204.22976196]
------
Step:10, Action:East
State  124
Old Q Values:  [  0.           5.4         76.25477662 341.12160345]
New Q values:  [   0.            5.4        1684.46356666  341.12160345]
Reward: 9  Episode Reward:  30
xxxxx
x gax
x ..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  5495.20552003 -6245.61866138  5292.64373408]
------
Step:11, Action:South
State  136
Old Q Values:  [ 1117.30787879  5495.20552003 -6245.61866138  5292.64373408]
New Q values:  [ 1117.30787879  4393.41570651 -6245.61866138  5292.64373408]
Reward: 9  Episode Reward:  39
xxxxx
x  gx
x .ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1361.07760757  7299.77832832   535.33196404 -3385.12952694]
------
Step:12, Action:South
State  208
Old Q Values:  [ 1361.07760757  7299.77832832   535.33196404 -3385.12952694]
New Q values:  [ 1361.07760757  5650.40338801   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9083.64018894 -9022.41491635 -7525.7277781   6225.05897095]
------
Step:13, Action:West
State  288
Old Q Values:  [ 9083.64018894 -9022.41491635 -7525.7277781   6225.05897095]
New Q values:  [ 9083.64018894 -9022.41491635 -7525.7277781   5944.49761018]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 11516.91340599  8883.29406141]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7504.94979623  7639.48422162]
New Q values:  [-2527.46239811 -8521.23367799  5726.47197517  7639.48422162]
Reward: -1  Episode Reward:  46
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9083.64018894 -9022.41491635 -7525.7277781   5944.49761018]
------
Step:15, Action:North
State  288
Old Q Values:  [ 9083.64018894 -9022.41491635 -7525.7277781   5944.49761018]
New Q values:  [ 5327.97709198 -9022.41491635 -7525.7277781   5944.49761018]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1361.07760757  5650.40338801   535.33196404 -3385.12952694]
------
Step:16, Action:South
State  208
Old Q Values:  [ 1361.07760757  5650.40338801   535.33196404 -3385.12952694]
New Q values:  [ 1361.07760757  4042.91063826   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5327.97709198 -9022.41491635 -7525.7277781   5944.49761018]
------
Step:17, Action:West
State  288
Old Q Values:  [ 5327.97709198 -9022.41491635 -7525.7277781   5944.49761018]
New Q values:  [ 5327.97709198 -9022.41491635 -7525.7277781   4669.04431056]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5726.47197517  7639.48422162]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5726.47197517  7639.48422162]
New Q values:  [-2527.46239811 -8521.23367799  3888.38191766  7639.48422162]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5327.97709198 -9022.41491635 -7525.7277781   4669.04431056]
------
Step:19, Action:North
State  288
Old Q Values:  [ 5327.97709198 -9022.41491635 -7525.7277781   4669.04431056]
New Q values:  [34712.67406664 -9022.41491635 -7525.7277781   4669.04431056]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.08606944e+05 3.26362435e+03 4.36673472e+03 3.52184257e+00]
------
Step:20, Action:North
State  208
Old Q Values:  [ 1361.07760757  4042.91063826   535.33196404 -3385.12952694]
New Q values:  [  567.67347943  4042.91063826   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          79.47478799]
------
Step:21, Action:West
State  136
Old Q Values:  [ 1117.30787879  4393.41570651 -6245.61866138  5292.64373408]
New Q values:  [ 1117.30787879  4393.41570651 -6245.61866138  5154.13114847]
Reward: -1  Episode Reward:  39
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225  10125.57884947  -2026.45836596]
------
Step:22, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     28.30475632    36.43893836]
New Q values:  [ -281.736      -9545.4473624     34.56433893    36.43893836]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          79.47478799]
------
Step:23, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          79.47478799]
New Q values:  [-180.6        -880.4555246  -180.6          42.12159671]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     34.56433893    36.43893836]
------
Step:24, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     34.56433893    36.43893836]
New Q values:  [ -281.736      -9545.4473624     34.56433893   151.58084625]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  458.68423635  192.24933255 -120.29354603]
------
Step:25, Action:South
State  99
Old Q Values:  [    0.         39012.69607126 59221.22700813     0.        ]
New Q values:  [    0.         41557.06803767 59221.22700813     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 86508.63203056     0.        ]
------
Step:26, Action:North
State  180
Old Q Values:  [-2341.96029664   266.69721195  4047.59500673     0.        ]
New Q values:  [-923.83463439  266.69721195 4047.59500673    0.        ]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 -123.73875545   45.16494756 -180.6       ]
------
Step:27, Action:East
State  108
Old Q Values:  [-8463.16477134  2208.23464578   748.34740399     0.        ]
New Q values:  [-8463.16477134  2208.23464578  3336.41261644     0.        ]
Reward: -1  Episode Reward:  33
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225  10125.57884947  -2026.45836596]
------
Step:28, Action:East
State  120
Old Q Values:  [-10156.11771313  -8069.05606225  10125.57884947  -2026.45836596]
New Q values:  [-10156.11771313  -8069.05606225   5595.87088433  -2026.45836596]
Reward: -1  Episode Reward:  32
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  4393.41570651 -6245.61866138  5154.13114847]
------
Step:29, Action:South
State  136
Old Q Values:  [ 1117.30787879  4393.41570651 -6245.61866138  5154.13114847]
New Q values:  [ 1117.30787879  2969.63947408 -6245.61866138  5154.13114847]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  567.67347943  4042.91063826   535.33196404 -3385.12952694]
------
Step:30, Action:South
State  208
Old Q Values:  [  567.67347943  4042.91063826   535.33196404 -3385.12952694]
New Q values:  [  567.67347943 12030.36647529   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[34712.67406664 -9022.41491635 -7525.7277781   4669.04431056]
------
Step:31, Action:West
State  288
Old Q Values:  [34712.67406664 -9022.41491635 -7525.7277781   4669.04431056]
New Q values:  [34712.67406664 -9022.41491635 -7525.7277781   5322.09174602]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 11516.91340599  8883.29406141]
------
Step:32, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3888.38191766  7639.48422162]
New Q values:  [-2527.46239811 -8521.23367799  3888.38191766  5212.41182441]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 260.01300857  -40.34168621 7190.72711919  -35.88578819]
------
Step:33, Action:North
State  257
Old Q Values:  [28043.43570856  2256.66526474   425.90861234  1875.31501677]
New Q values:  [37169.36389259  2256.66526474   425.90861234  1875.31501677]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 86508.63203056     0.        ]
------
Step:34, Action:North
State  181
Old Q Values:  [  80.77769483   15.18059333 -986.66020107 -180.6       ]
New Q values:  [  63.54429395   15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  26
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  106.11072007    3.88307055 -180.6       ]
------
Step:35, Action:South
State  108
Old Q Values:  [-8463.16477134  2208.23464578  3336.41261644     0.        ]
New Q values:  [-8463.16477134  2096.97236033  3336.41261644     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-923.83463439  266.69721195 4047.59500673    0.        ]
------
Step:36, Action:East
State  177
Old Q Values:  [66823.71809805  4025.17604709 86508.63203056     0.        ]
New Q values:  [66823.71809805  4025.17604709 99152.50610081     0.        ]
Reward: 100009  Episode Reward:  100034
xxxxx
x g x
x a x
x   x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 260.01300857  -40.34168621 7190.72711919  -35.88578819]
------
Step:1, Action:East
State  261
Old Q Values:  [ 260.01300857  -40.34168621 7190.72711919  -35.88578819]
New Q values:  [ 260.01300857  -40.34168621 4445.414395    -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3888.38191766  5212.41182441]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3888.38191766  5212.41182441]
New Q values:  [-2527.46239811 -8521.23367799  3888.38191766  4076.95956405]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xg. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  729.60828124 -8695.4397473   6641.98278096 -2601.74710518]
------
Step:3, Action:East
State  260
Old Q Values:  [  729.60828124 -8695.4397473   6641.98278096 -2601.74710518]
New Q values:  [  729.60828124 -8695.4397473   3879.2809816  -2601.74710518]
Reward: -1  Episode Reward:  7
xxxxx
xg..x
x.. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3888.38191766  4076.95956405]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3888.38191766  4076.95956405]
New Q values:  [-2527.46239811 -8521.23367799  3888.38191766  2963.80814412]
Reward: -1  Episode Reward:  6
xxxxx
x.g.x
x.. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 260.01300857  -40.34168621 4445.414395    -35.88578819]
------
Step:5, Action:East
State  261
Old Q Values:  [ 260.01300857  -40.34168621 4445.414395    -35.88578819]
New Q values:  [ 260.01300857  -40.34168621 5232.6397798   -35.88578819]
Reward: -1  Episode Reward:  5
xxxxx
x..gx
x.. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 11516.91340599  8883.29406141]
------
Step:6, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 11516.91340599  8883.29406141]
New Q values:  [   37.74111519  -168.92307549 15025.96758239  8883.29406141]
Reward: 9  Episode Reward:  14
xxxxx
x...x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[34712.67406664 -9022.41491635 -7525.7277781   5322.09174602]
------
Step:7, Action:West
State  288
Old Q Values:  [34712.67406664 -9022.41491635 -7525.7277781   5322.09174602]
New Q values:  [34712.67406664 -9022.41491635 -7525.7277781   3294.75127371]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3888.38191766  2963.80814412]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3888.38191766  2963.80814412]
New Q values:  [-2527.46239811 -8521.23367799 11968.55498706  2963.80814412]
Reward: -1  Episode Reward:  12
xxxxx
x.g.x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[34712.67406664 -9022.41491635 -7525.7277781   3294.75127371]
------
Step:9, Action:North
State  288
Old Q Values:  [34712.67406664 -9022.41491635 -7525.7277781   3294.75127371]
New Q values:  [17493.57956924 -9022.41491635 -7525.7277781   3294.75127371]
Reward: -1  Episode Reward:  11
xxxxx
x..gx
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  567.67347943 12030.36647529   535.33196404 -3385.12952694]
------
Step:10, Action:South
State  208
Old Q Values:  [  567.67347943 12030.36647529   535.33196404 -3385.12952694]
New Q values:  [  567.67347943 10059.62046089   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  10
xxxxx
x.g.x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17493.57956924 -9022.41491635 -7525.7277781   3294.75127371]
------
Step:11, Action:North
State  288
Old Q Values:  [17493.57956924 -9022.41491635 -7525.7277781   3294.75127371]
New Q values:  [10014.71796596 -9022.41491635 -7525.7277781   3294.75127371]
Reward: -1  Episode Reward:  9
xxxxx
x...x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  567.67347943 10059.62046089   535.33196404 -3385.12952694]
------
Step:12, Action:South
State  210
Old Q Values:  [1.08606944e+05 3.26362435e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [1.08606944e+05 4.30926513e+03 4.36673472e+03 3.52184257e+00]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10014.71796596 -9022.41491635 -7525.7277781   3294.75127371]
------
Step:13, Action:North
State  288
Old Q Values:  [10014.71796596 -9022.41491635 -7525.7277781   3294.75127371]
New Q values:  [36587.37041623 -9022.41491635 -7525.7277781   3294.75127371]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.08606944e+05 4.30926513e+03 4.36673472e+03 3.52184257e+00]
------
Step:14, Action:North
State  208
Old Q Values:  [  567.67347943 10059.62046089   535.33196404 -3385.12952694]
New Q values:  [22621.69703886 10059.62046089   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  16
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 74630.75882363]
------
Step:15, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          42.12159671]
New Q values:  [-180.6        -880.4555246  -180.6          67.72289256]
Reward: 9  Episode Reward:  25
xxxxx
x.a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     34.56433893   151.58084625]
------
Step:16, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     34.56433893   151.58084625]
New Q values:  [ -281.736      -9545.4473624     34.56433893   183.51451731]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  391.60726269   44.18517614 -252.78192178]
------
Step:17, Action:South
State  99
Old Q Values:  [    0.         41557.06803767 59221.22700813     0.        ]
New Q values:  [    0.         46373.97904531 59221.22700813     0.        ]
Reward: 9  Episode Reward:  43
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 99152.50610081     0.        ]
------
Step:18, Action:North
State  181
Old Q Values:  [  63.54429395   15.18059333 -986.66020107 -180.6       ]
New Q values:  [ 162.42298849   15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  458.68423635  192.24933255 -120.29354603]
------
Step:19, Action:South
State  109
Old Q Values:  [-241.10880094  106.11072007    3.88307055 -180.6       ]
New Q values:  [-241.10880094   90.57118457    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 162.42298849   15.18059333 -986.66020107 -180.6       ]
------
Step:20, Action:North
State  181
Old Q Values:  [ 162.42298849   15.18059333 -986.66020107 -180.6       ]
New Q values:  [  91.54055077   15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  40
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   90.57118457    3.88307055 -180.6       ]
------
Step:21, Action:South
State  109
Old Q Values:  [-241.10880094   90.57118457    3.88307055 -180.6       ]
New Q values:  [-241.10880094   63.09063906    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  91.54055077   15.18059333 -986.66020107 -180.6       ]
------
Step:22, Action:North
State  181
Old Q Values:  [  91.54055077   15.18059333 -986.66020107 -180.6       ]
New Q values:  [ 173.62149121   15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  458.68423635  192.24933255 -120.29354603]
------
Step:23, Action:South
State  109
Old Q Values:  [-241.10880094   63.09063906    3.88307055 -180.6       ]
New Q values:  [-241.10880094   76.72270299    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 173.62149121   15.18059333 -986.66020107 -180.6       ]
------
Step:24, Action:North
State  181
Old Q Values:  [ 173.62149121   15.18059333 -986.66020107 -180.6       ]
New Q values:  [ 206.45386739   15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  458.68423635  192.24933255 -120.29354603]
------
Step:25, Action:South
State  99
Old Q Values:  [    0.         46373.97904531 59221.22700813     0.        ]
New Q values:  [    0.         48294.74344837 59221.22700813     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 99152.50610081     0.        ]
------
Step:26, Action:North
State  181
Old Q Values:  [ 206.45386739   15.18059333 -986.66020107 -180.6       ]
New Q values:  [ 104.99835785   15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   76.72270299    3.88307055 -180.6       ]
------
Step:27, Action:South
State  99
Old Q Values:  [    0.         48294.74344837 59221.22700813     0.        ]
New Q values:  [    0.         49063.04920959 59221.22700813     0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 99152.50610081     0.        ]
------
Step:28, Action:North
State  181
Old Q Values:  [ 104.99835785   15.18059333 -986.66020107 -180.6       ]
New Q values:  [ 179.00461405   15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  458.68423635  192.24933255 -120.29354603]
------
Step:29, Action:South
State  111
Old Q Values:  [-177.44732869  458.68423635  192.24933255 -120.29354603]
New Q values:  [-177.44732869 1287.72469714  192.24933255 -120.29354603]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 225.17846117  198.8110338  3682.83667531    0.        ]
------
Step:30, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 85731.90214416     0.        ]
New Q values:  [     0.           4614.46100011 105506.08823113      0.        ]
Reward: 100009  Episode Reward:  100040
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22621.69703886 10059.62046089   535.33196404 -3385.12952694]
------
Step:1, Action:North
State  216
Old Q Values:  [   95.17460693  6449.82045435 -6170.35693855 -1798.95296703]
New Q values:  [   63.78671054  6449.82045435 -6170.35693855 -1798.95296703]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          67.72289256]
------
Step:2, Action:West
State  136
Old Q Values:  [ 1117.30787879  2969.63947408 -6245.61866138  5154.13114847]
New Q values:  [ 1117.30787879  2969.63947408 -6245.61866138 -2254.18627531]
Reward: -9991  Episode Reward:  -9982
xxxxx
x.g x
x . x
x...x
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[36587.37041623 -9022.41491635 -7525.7277781   3294.75127371]
------
Step:1, Action:North
State  288
Old Q Values:  [36587.37041623 -9022.41491635 -7525.7277781   3294.75127371]
New Q values:  [21426.85727815 -9022.41491635 -7525.7277781   3294.75127371]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22621.69703886 10059.62046089   535.33196404 -3385.12952694]
------
Step:2, Action:North
State  210
Old Q Values:  [1.08606944e+05 4.30926513e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [4.34684945e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          67.72289256]
------
Step:3, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          67.72289256]
New Q values:  [-180.6        -880.4555246  -180.6          87.54351221]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
xg. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     34.56433893   183.51451731]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     34.56433893   183.51451731]
New Q values:  [ -281.736      -9545.4473624     34.56433893    92.35529119]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 -123.73875545   45.16494756 -180.6       ]
------
Step:5, Action:East
State  107
Old Q Values:  [-252.35169558  391.60726269   44.18517614 -252.78192178]
New Q values:  [-252.35169558  391.60726269   44.78065781 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     34.56433893    92.35529119]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     34.56433893    92.35529119]
New Q values:  [ -281.736      -9545.4473624     34.56433893    49.89160074]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 -123.73875545   45.16494756 -180.6       ]
------
Step:7, Action:East
State  110
Old Q Values:  [-239.29051573 -123.73875545   45.16494756 -180.6       ]
New Q values:  [-239.29051573 -123.73875545   32.43345925 -180.6       ]
Reward: -1  Episode Reward:  33
xxxxx
x a x
xg. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     34.56433893    49.89160074]
------
Step:8, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   5595.87088433  -2026.45836596]
New Q values:  [-10156.11771313  -8069.05606225   5595.87088433  -5810.25956145]
Reward: -10001  Episode Reward:  -9968
xxxxx
xg  x
x.. x
x . x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 11968.55498706  2963.80814412]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 11968.55498706  2963.80814412]
New Q values:  [-2527.46239811 -8521.23367799 11220.87917827  2963.80814412]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21426.85727815 -9022.41491635 -7525.7277781   3294.75127371]
------
Step:2, Action:North
State  288
Old Q Values:  [21426.85727815 -9022.41491635 -7525.7277781   3294.75127371]
New Q values:  [15362.65202292 -9022.41491635 -7525.7277781   3294.75127371]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x .ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22621.69703886 10059.62046089   535.33196404 -3385.12952694]
------
Step:3, Action:North
State  208
Old Q Values:  [22621.69703886 10059.62046089   535.33196404 -3385.12952694]
New Q values:  [31443.30646263 10059.62046089   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 74630.75882363]
------
Step:4, Action:West
State  136
Old Q Values:  [ 1117.30787879  2969.63947408 -6245.61866138 -2254.18627531]
New Q values:  [ 1117.30787879  2969.63947408 -6245.61866138 -5217.51324483]
Reward: -9991  Episode Reward:  -9964
xxxxx
x.g x
x . x
x.  x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 7.42556550e+03 1.51455110e+04 2.91043938e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [  62.8218634  1206.93052108 4083.74808722  181.20343395]
New Q values:  [  62.8218634  1206.93052108 3573.84537119  181.20343395]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   63.78671054  6449.82045435 -6170.35693855 -1798.95296703]
------
Step:2, Action:South
State  208
Old Q Values:  [31443.30646263 10059.62046089   535.33196404 -3385.12952694]
New Q values:  [31443.30646263  8638.04379123   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15362.65202292 -9022.41491635 -7525.7277781   3294.75127371]
------
Step:3, Action:North
State  288
Old Q Values:  [15362.65202292 -9022.41491635 -7525.7277781   3294.75127371]
New Q values:  [ 9577.45274796 -9022.41491635 -7525.7277781   3294.75127371]
Reward: -10001  Episode Reward:  -9983
xxxxx
x ..x
x. gx
x.. x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 14927.26725394  6267.88141429  2546.60363946]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831  6309.36383559 -3909.58186816  1909.41710691]
New Q values:  [-5922.26708831  7036.93580895 -3909.58186816  1909.41710691]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 15025.96758239  8883.29406141]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 11220.87917827  2963.80814412]
New Q values:  [-2527.46239811 -8521.23367799  7360.98749569  2963.80814412]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9577.45274796 -9022.41491635 -7525.7277781   3294.75127371]
------
Step:3, Action:North
State  288
Old Q Values:  [ 9577.45274796 -9022.41491635 -7525.7277781   3294.75127371]
New Q values:  [13269.37303797 -9022.41491635 -7525.7277781   3294.75127371]
Reward: 9  Episode Reward:  17
xxxxx
x.g.x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31443.30646263  8638.04379123   535.33196404 -3385.12952694]
------
Step:4, Action:North
State  208
Old Q Values:  [31443.30646263  8638.04379123   535.33196404 -3385.12952694]
New Q values:  [15720.50374878  8638.04379123   535.33196404 -3385.12952694]
Reward: -9991  Episode Reward:  -9974
xxxxx
x..gx
x.  x
x.  x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          87.54351221]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          87.54351221]
New Q values:  [-180.6        -880.4555246  -180.6          55.38488511]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     34.56433893    49.89160074]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     34.56433893    49.89160074]
New Q values:  [ -281.736      -9545.4473624     34.56433893   411.67404944]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1287.72469714  192.24933255 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 1287.72469714  192.24933255 -120.29354603]
New Q values:  [-177.44732869 1625.34088145  192.24933255 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 225.17846117  198.8110338  3682.83667531    0.        ]
------
Step:4, Action:East
State  183
Old Q Values:  [ 225.17846117  198.8110338  3682.83667531    0.        ]
New Q values:  [ 225.17846117  198.8110338  1937.69526132    0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01  1.53053530e+03  5.59303454e+01  0.00000000e+00]
------
Step:5, Action:East
State  206
Old Q Values:  [  0.         135.48456638   6.60224881   0.        ]
New Q values:  [  0.         135.48456638 897.98596794   0.        ]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 183.62632649 2966.48356139    0.          322.01285006]
------
Step:6, Action:South
State  218
Old Q Values:  [ 183.62632649 2966.48356139    0.          322.01285006]
New Q values:  [ 183.62632649 5172.80533595    0.          322.01285006]
Reward: 9  Episode Reward:  54
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13269.37303797 -9022.41491635 -7525.7277781   3294.75127371]
------
Step:7, Action:North
State  288
Old Q Values:  [13269.37303797 -9022.41491635 -7525.7277781   3294.75127371]
New Q values:  [18347.69756746 -9022.41491635 -7525.7277781   3294.75127371]
Reward: -1  Episode Reward:  53
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.34684945e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
------
Step:8, Action:North
State  216
Old Q Values:  [   63.78671054  6449.82045435 -6170.35693855 -1798.95296703]
New Q values:  [   41.53014975  6449.82045435 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  52
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          55.38488511]
------
Step:9, Action:West
State  136
Old Q Values:  [ 1117.30787879  2969.63947408 -6245.61866138 -5217.51324483]
New Q values:  [ 1117.30787879  2969.63947408 -6245.61866138  -408.84403263]
Reward: -1  Episode Reward:  51
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225   5595.87088433  -5810.25956145]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     34.56433893   411.67404944]
New Q values:  [ -281.736      -9545.4473624     29.8412011    411.67404944]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          55.38488511]
------
Step:11, Action:West
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 74630.75882363]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 65470.06539732]
Reward: -1  Episode Reward:  49
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  70318.28144641 118727.87289289]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011    411.67404944]
New Q values:  [ -281.736      -9545.4473624     29.8412011    281.55179858]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  391.60726269   44.78065781 -252.78192178]
------
Step:13, Action:South
State  98
Old Q Values:  [    0.         36946.43768466 48290.70319312     0.        ]
New Q values:  [    0.         41136.94667617 48290.70319312     0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:14, Action:East
State  184
Old Q Values:  [  14.56955029    0.         2686.54903933    0.        ]
New Q values:  [  14.56955029    0.         2146.17322709    0.        ]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1206.93052108 3573.84537119  181.20343395]
------
Step:15, Action:East
State  200
Old Q Values:  [  62.8218634  1206.93052108 3573.84537119  181.20343395]
New Q values:  [  62.8218634  1206.93052108 3363.88428478  181.20343395]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   41.53014975  6449.82045435 -6170.35693855 -1798.95296703]
------
Step:16, Action:South
State  216
Old Q Values:  [   41.53014975  6449.82045435 -6170.35693855 -1798.95296703]
New Q values:  [   41.53014975  8083.63745198 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18347.69756746 -9022.41491635 -7525.7277781   3294.75127371]
------
Step:17, Action:West
State  288
Old Q Values:  [18347.69756746 -9022.41491635 -7525.7277781   3294.75127371]
New Q values:  [18347.69756746 -9022.41491635 -7525.7277781   3525.59675819]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7360.98749569  2963.80814412]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7360.98749569  2963.80814412]
New Q values:  [-2527.46239811 -8521.23367799  8448.10426851  2963.80814412]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18347.69756746 -9022.41491635 -7525.7277781   3525.59675819]
------
Step:19, Action:North
State  288
Old Q Values:  [18347.69756746 -9022.41491635 -7525.7277781   3525.59675819]
New Q values:  [ 9763.57026258 -9022.41491635 -7525.7277781   3525.59675819]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   41.53014975  8083.63745198 -6170.35693855 -1798.95296703]
------
Step:20, Action:South
State  216
Old Q Values:  [   41.53014975  8083.63745198 -6170.35693855 -1798.95296703]
New Q values:  [   41.53014975  6161.92605956 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9763.57026258 -9022.41491635 -7525.7277781   3525.59675819]
------
Step:21, Action:North
State  288
Old Q Values:  [ 9763.57026258 -9022.41491635 -7525.7277781   3525.59675819]
New Q values:  [16945.3764573  -9022.41491635 -7525.7277781   3525.59675819]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.34684945e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
------
Step:22, Action:North
State  218
Old Q Values:  [ 183.62632649 5172.80533595    0.          322.01285006]
New Q values:  [  89.46599613 5172.80533595    0.          322.01285006]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          55.38488511]
------
Step:23, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          55.38488511]
New Q values:  [-180.6        -880.4555246  -180.6          83.88329141]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   192.85677349   207.7644579 ]
------
Step:24, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349   207.7644579 ]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349   199.98796196]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  391.60726269   44.78065781 -252.78192178]
------
Step:25, Action:South
State  105
Old Q Values:  [ -180.6          295.48182771 -5851.25726525     0.        ]
New Q values:  [ -180.6          118.3560288  -5851.25726525     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 2.54432572e+00  0.00000000e+00 -4.43434340e+03 -1.78980000e+02]
------
Step:26, Action:North
State  185
Old Q Values:  [ 2.54432572e+00  0.00000000e+00 -4.43434340e+03 -1.78980000e+02]
New Q values:  [   35.92453893     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  34
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6          118.3560288  -5851.25726525     0.        ]
------
Step:27, Action:South
State  105
Old Q Values:  [ -180.6          118.3560288  -5851.25726525     0.        ]
New Q values:  [ -180.6           57.5197732  -5851.25726525     0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   35.92453893     0.         -4434.34340468  -178.98      ]
------
Step:28, Action:North
State  185
Old Q Values:  [   35.92453893     0.         -4434.34340468  -178.98      ]
New Q values:  [   31.02574753     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  32
xxxxx
xa gx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6           57.5197732  -5851.25726525     0.        ]
------
Step:29, Action:South
State  105
Old Q Values:  [ -180.6           57.5197732  -5851.25726525     0.        ]
New Q values:  [ -180.6           31.71563354 -5851.25726525     0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x g x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   31.02574753     0.         -4434.34340468  -178.98      ]
------
Step:30, Action:North
State  184
Old Q Values:  [  14.56955029    0.         2146.17322709    0.        ]
New Q values:  [-5232.32304719     0.          2146.17322709     0.        ]
Reward: -10001  Episode Reward:  -9970
xxxxx
xg  x
x   x
x.  x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6          83.88329141]
------
Step:1, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          83.88329141]
New Q values:  [-180.6        -880.4555246  -180.6         123.41885614]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011    281.55179858]
------
Step:2, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   5595.87088433  -5810.25956145]
New Q values:  [-10156.11771313  -8069.05606225   5595.87088433  -2295.68701369]
Reward: 9  Episode Reward:  18
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   76.72270299    3.88307055 -180.6       ]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 1625.34088145  192.24933255 -120.29354603]
New Q values:  [-177.44732869  775.18558927  192.24933255 -120.29354603]
Reward: -1  Episode Reward:  17
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   418.83078898 -4177.65087173  -244.98066897]
------
Step:4, Action:South
State  183
Old Q Values:  [ 225.17846117  198.8110338  1937.69526132    0.        ]
New Q values:  [ 225.17846117 1654.71634746 1937.69526132    0.        ]
Reward: 9  Episode Reward:  26
xxxxx
x   x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 260.01300857  -40.34168621 5232.6397798   -35.88578819]
------
Step:5, Action:North
State  260
Old Q Values:  [  729.60828124 -8695.4397473   3879.2809816  -2601.74710518]
New Q values:  [  565.18716423 -8695.4397473   3879.2809816  -2601.74710518]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   913.14617244     0.        ]
------
Step:6, Action:East
State  188
Old Q Values:  [-6523.78898263   258.74794358  1687.41680659     0.        ]
New Q values:  [-6523.78898263   258.74794358  1972.05460653     0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x   x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4305.62627964   174.55451539     0.        ]
------
Step:7, Action:South
State  198
Old Q Values:  [-2.78872080e-01  1.53053530e+03  5.59303454e+01  0.00000000e+00]
New Q values:  [-2.78872080e-01  3.92854232e+03  5.59303454e+01  0.00000000e+00]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525  -180.6        11036.4273362   6515.41066028]
------
Step:8, Action:East
State  276
Old Q Values:  [   16.82637525  -180.6        11036.4273362   6515.41066028]
New Q values:  [  16.82637525 -180.6        9503.58387167 6515.41066028]
Reward: 9  Episode Reward:  52
xxxxx
x   x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16945.3764573  -9022.41491635 -7525.7277781   3525.59675819]
------
Step:9, Action:North
State  288
Old Q Values:  [16945.3764573  -9022.41491635 -7525.7277781   3525.59675819]
New Q values:  [79824.09893519 -9022.41491635 -7525.7277781   3525.59675819]
Reward: 100009  Episode Reward:  100061
xxxxx
x   x
x  ax
xg  x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.34684945e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
------
Step:1, Action:North
State  210
Old Q Values:  [4.34684945e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [1.74298235e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         123.41885614]
------
Step:2, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         123.41885614]
New Q values:  [-180.6        -880.4555246  -180.6         139.23308203]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011    281.55179858]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011    281.55179858]
New Q values:  [ -281.736      -9545.4473624     29.8412011    350.57639621]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  775.18558927  192.24933255 -120.29354603]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869  775.18558927  192.24933255 -120.29354603]
New Q values:  [-177.44732869  441.12347241  192.24933255 -120.29354603]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294   418.83078898 -4177.65087173  -244.98066897]
------
Step:5, Action:South
State  189
Old Q Values:  [    9.84673294   418.83078898 -4177.65087173  -244.98066897]
New Q values:  [    9.84673294  1736.72424953 -4177.65087173  -244.98066897]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 260.01300857  -40.34168621 5232.6397798   -35.88578819]
------
Step:6, Action:East
State  260
Old Q Values:  [  565.18716423 -8695.4397473   3879.2809816  -2601.74710518]
New Q values:  [  565.18716423 -8695.4397473   4091.54367319 -2601.74710518]
Reward: 9  Episode Reward:  44
xxxxx
xg  x
x . x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8448.10426851  2963.80814412]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8448.10426851  2963.80814412]
New Q values:  [-2527.46239811 -8521.23367799 27331.87138796  2963.80814412]
Reward: 9  Episode Reward:  53
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[79824.09893519 -9022.41491635 -7525.7277781   3525.59675819]
------
Step:8, Action:North
State  288
Old Q Values:  [79824.09893519 -9022.41491635 -7525.7277781   3525.59675819]
New Q values:  [36645.19069871 -9022.41491635 -7525.7277781   3525.59675819]
Reward: -1  Episode Reward:  52
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15720.50374878  8638.04379123   535.33196404 -3385.12952694]
------
Step:9, Action:North
State  208
Old Q Values:  [15720.50374878  8638.04379123   535.33196404 -3385.12952694]
New Q values:  [ 6329.37142412  8638.04379123   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  51
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         139.23308203]
------
Step:10, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         139.23308203]
New Q values:  [-180.6        -880.4555246  -180.6         160.26615168]
Reward: -1  Episode Reward:  50
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011    350.57639621]
------
Step:11, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011    350.57639621]
New Q values:  [ -281.736      -9545.4473624     29.8412011    271.96760021]
Reward: -1  Episode Reward:  49
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  441.12347241  192.24933255 -120.29354603]
------
Step:12, Action:South
State  110
Old Q Values:  [-239.29051573 -123.73875545   32.43345925 -180.6       ]
New Q values:  [-239.29051573  223.84834955   32.43345925 -180.6       ]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xa. x
xg  x
xxxxx
Step:13, Action:West
State  180
Old Q Values:  [-923.83463439  266.69721195 4047.59500673    0.        ]
New Q values:  [ -923.83463439   266.69721195  4047.59500673 -4966.32149798]
Reward: -10301  Episode Reward:  -10253
xxxxx
x   x
xg. x
x   x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 27331.87138796  2963.80814412]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 27331.87138796  2963.80814412]
New Q values:  [-2527.46239811 -8521.23367799 21931.7057648   2963.80814412]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[36645.19069871 -9022.41491635 -7525.7277781   3525.59675819]
------
Step:2, Action:North
State  288
Old Q Values:  [36645.19069871 -9022.41491635 -7525.7277781   3525.59675819]
New Q values:  [19892.42331744 -9022.41491635 -7525.7277781   3525.59675819]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.74298235e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [1.74298235e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [2.66183490e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[29415.41153072  2346.20646678  -180.00807518 65470.06539732]
------
Step:4, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         160.26615168]
New Q values:  [-180.6        -880.4555246  -180.6         151.09674073]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011    271.96760021]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349   199.98796196]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349   202.87736359]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  391.60726269   44.78065781 -252.78192178]
------
Step:6, Action:South
State  107
Old Q Values:  [-252.35169558  391.60726269   44.78065781 -252.78192178]
New Q values:  [-252.35169558  202.60433677   44.78065781 -252.78192178]
Reward: 9  Episode Reward:  54
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[135.20477233   0.          12.80716019   0.        ]
------
Step:7, Action:North
State  185
Old Q Values:  [   31.02574753     0.         -4434.34340468  -178.98      ]
New Q values:  [   72.59160004     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  53
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  202.60433677   44.78065781 -252.78192178]
------
Step:8, Action:South
State  107
Old Q Values:  [-252.35169558  202.60433677   44.78065781 -252.78192178]
New Q values:  [-252.35169558  102.21921472   44.78065781 -252.78192178]
Reward: -1  Episode Reward:  52
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   72.59160004     0.         -4434.34340468  -178.98      ]
------
Step:9, Action:North
State  185
Old Q Values:  [   72.59160004     0.         -4434.34340468  -178.98      ]
New Q values:  [   59.10240443     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  51
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  102.21921472   44.78065781 -252.78192178]
------
Step:10, Action:South
State  107
Old Q Values:  [-252.35169558  102.21921472   44.78065781 -252.78192178]
New Q values:  [-252.35169558   58.01840722   44.78065781 -252.78192178]
Reward: -1  Episode Reward:  50
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   59.10240443     0.         -4434.34340468  -178.98      ]
------
Step:11, Action:North
State  185
Old Q Values:  [   59.10240443     0.         -4434.34340468  -178.98      ]
New Q values:  [   40.44648394     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  49
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   58.01840722   44.78065781 -252.78192178]
------
Step:12, Action:South
State  105
Old Q Values:  [ -180.6           31.71563354 -5851.25726525     0.        ]
New Q values:  [ -180.6           24.2201986  -5851.25726525     0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   40.44648394     0.         -4434.34340468  -178.98      ]
------
Step:13, Action:North
State  185
Old Q Values:  [   40.44648394     0.         -4434.34340468  -178.98      ]
New Q values:  [   32.98411574     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  47
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   58.01840722   44.78065781 -252.78192178]
------
Step:14, Action:South
State  105
Old Q Values:  [ -180.6           24.2201986  -5851.25726525     0.        ]
New Q values:  [ -180.6           18.98331416 -5851.25726525     0.        ]
Reward: -1  Episode Reward:  46
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   32.98411574     0.         -4434.34340468  -178.98      ]
------
Step:15, Action:North
State  185
Old Q Values:  [   32.98411574     0.         -4434.34340468  -178.98      ]
New Q values:  [   18.28864055     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  45
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6           18.98331416 -5851.25726525     0.        ]
------
Step:16, Action:South
State  105
Old Q Values:  [ -180.6           18.98331416 -5851.25726525     0.        ]
New Q values:  [ -180.6           12.47991783 -5851.25726525     0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   18.28864055     0.         -4434.34340468  -178.98      ]
------
Step:17, Action:North
State  185
Old Q Values:  [   18.28864055     0.         -4434.34340468  -178.98      ]
New Q values:  [   24.12097838     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   58.01840722   44.78065781 -252.78192178]
------
Step:18, Action:South
State  105
Old Q Values:  [ -180.6           12.47991783 -5851.25726525     0.        ]
New Q values:  [ -180.6           11.62826065 -5851.25726525     0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   24.12097838     0.         -4434.34340468  -178.98      ]
------
Step:19, Action:North
State  185
Old Q Values:  [   24.12097838     0.         -4434.34340468  -178.98      ]
New Q values:  [   26.45391352     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   58.01840722   44.78065781 -252.78192178]
------
Step:20, Action:South
State  107
Old Q Values:  [-252.35169558   58.01840722   44.78065781 -252.78192178]
New Q values:  [-252.35169558   30.54353694   44.78065781 -252.78192178]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   26.45391352     0.         -4434.34340468  -178.98      ]
------
Step:21, Action:North
State  184
Old Q Values:  [-5232.32304719     0.          2146.17322709     0.        ]
New Q values:  [-2081.09028721     0.          2146.17322709     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062    41.46310554  -180.6       ]
------
Step:22, Action:East
State  107
Old Q Values:  [-252.35169558   30.54353694   44.78065781 -252.78192178]
New Q values:  [-252.35169558   30.54353694   98.90254319 -252.78192178]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011    271.96760021]
------
Step:23, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   5595.87088433  -2295.68701369]
New Q values:  [-10156.11771313  -8069.05606225   5595.87088433   -915.38632728]
Reward: -1  Episode Reward:  37
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6           11.62826065 -5851.25726525     0.        ]
------
Step:24, Action:South
State  104
Old Q Values:  [-8.65284000e+03  6.84575004e+00  2.54149711e+03 -8.65284000e+03]
New Q values:  [-8652.84         645.99026814  2541.49710899 -8652.84      ]
Reward: -1  Episode Reward:  36
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[-2081.09028721     0.          2146.17322709     0.        ]
------
Step:25, Action:East
State  184
Old Q Values:  [-2081.09028721     0.          2146.17322709     0.        ]
New Q values:  [-2081.09028721     0.          1867.03457627     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1206.93052108 3363.88428478  181.20343395]
------
Step:26, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.97854041e+03  3.73597579e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.97854041e+03  2.29288079e+04  1.20371620e+03]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.66183490e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
------
Step:27, Action:North
State  216
Old Q Values:  [   41.53014975  6161.92605956 -6170.35693855 -1798.95296703]
New Q values:  [   61.34108212  6161.92605956 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         151.09674073]
------
Step:28, Action:West
State  136
Old Q Values:  [ 1117.30787879  2969.63947408 -6245.61866138  -408.84403263]
New Q values:  [ 1117.30787879  2969.63947408 -6245.61866138  1514.62365225]
Reward: -1  Episode Reward:  32
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225   5595.87088433   -915.38632728]
------
Step:29, Action:East
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   5595.87088433   -915.38632728]
New Q values:  [-10156.11771313  -8069.05606225   3128.64019596   -915.38632728]
Reward: -1  Episode Reward:  31
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  2969.63947408 -6245.61866138  1514.62365225]
------
Step:30, Action:South
State  136
Old Q Values:  [ 1117.30787879  2969.63947408 -6245.61866138  1514.62365225]
New Q values:  [ 1117.30787879  3035.8336075  -6245.61866138  1514.62365225]
Reward: -1  Episode Reward:  30
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   61.34108212  6161.92605956 -6170.35693855 -1798.95296703]
------
Step:31, Action:South
State  216
Old Q Values:  [   61.34108212  6161.92605956 -6170.35693855 -1798.95296703]
New Q values:  [   61.34108212  8431.89741906 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19892.42331744 -9022.41491635 -7525.7277781   3525.59675819]
------
Step:32, Action:North
State  288
Old Q Values:  [19892.42331744 -9022.41491635 -7525.7277781   3525.59675819]
New Q values:  [10485.93855269 -9022.41491635 -7525.7277781   3525.59675819]
Reward: -1  Episode Reward:  28
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   61.34108212  8431.89741906 -6170.35693855 -1798.95296703]
------
Step:33, Action:South
State  216
Old Q Values:  [   61.34108212  8431.89741906 -6170.35693855 -1798.95296703]
New Q values:  [   61.34108212  6517.94053343 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  27
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10485.93855269 -9022.41491635 -7525.7277781   3525.59675819]
------
Step:34, Action:North
State  288
Old Q Values:  [10485.93855269 -9022.41491635 -7525.7277781   3525.59675819]
New Q values:  [ 6149.15758111 -9022.41491635 -7525.7277781   3525.59675819]
Reward: -1  Episode Reward:  26
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   61.34108212  6517.94053343 -6170.35693855 -1798.95296703]
------
Step:35, Action:South
State  216
Old Q Values:  [   61.34108212  6517.94053343 -6170.35693855 -1798.95296703]
New Q values:  [   61.34108212  4451.3234877  -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  25
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6149.15758111 -9022.41491635 -7525.7277781   3525.59675819]
------
Step:36, Action:North
State  288
Old Q Values:  [ 6149.15758111 -9022.41491635 -7525.7277781   3525.59675819]
New Q values:  [ 3794.46007875 -9022.41491635 -7525.7277781   3525.59675819]
Reward: -1  Episode Reward:  24
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   61.34108212  4451.3234877  -6170.35693855 -1798.95296703]
------
Step:37, Action:South
State  216
Old Q Values:  [   61.34108212  4451.3234877  -6170.35693855 -1798.95296703]
New Q values:  [   61.34108212  2918.26741871 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3794.46007875 -9022.41491635 -7525.7277781   3525.59675819]
------
Step:38, Action:West
State  288
Old Q Values:  [ 3794.46007875 -9022.41491635 -7525.7277781   3525.59675819]
New Q values:  [ 3794.46007875 -9022.41491635 -7525.7277781   5917.42897799]
Reward: -1  Episode Reward:  22
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 15025.96758239  8883.29406141]
------
Step:39, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 15025.96758239  8883.29406141]
New Q values:  [  37.74111519 -168.92307549 7785.01572635 8883.29406141]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3794.46007875 -9022.41491635 -7525.7277781   5917.42897799]
------
Step:40, Action:West
State  288
Old Q Values:  [ 3794.46007875 -9022.41491635 -7525.7277781   5917.42897799]
New Q values:  [ 3794.46007875 -9022.41491635 -7525.7277781   8945.88332064]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 21931.7057648   2963.80814412]
------
Step:41, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 21931.7057648   2963.80814412]
New Q values:  [-2527.46239811 -8521.23367799 11455.84730211  2963.80814412]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3794.46007875 -9022.41491635 -7525.7277781   8945.88332064]
------
Step:42, Action:North
State  288
Old Q Values:  [ 3794.46007875 -9022.41491635 -7525.7277781   8945.88332064]
New Q values:  [ 3069.02563229 -9022.41491635 -7525.7277781   8945.88332064]
Reward: -1  Episode Reward:  18
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  89.46599613 5172.80533595    0.          322.01285006]
------
Step:43, Action:West
State  218
Old Q Values:  [  89.46599613 5172.80533595    0.          322.01285006]
New Q values:  [  89.46599613 5172.80533595    0.          398.45968492]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458   900.84848298     0.        ]
------
Step:44, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.97854041e+03  2.29288079e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.97854041e+03  1.71564278e+04  1.20371620e+03]
Reward: -1  Episode Reward:  16
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.66183490e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
------
Step:45, Action:North
State  216
Old Q Values:  [   61.34108212  2918.26741871 -6170.35693855 -1798.95296703]
New Q values:  [   69.26545507  2918.26741871 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         151.09674073]
------
Step:46, Action:West
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 65470.06539732]
New Q values:  [29415.41153072  2346.20646678  -180.00807518 61805.7880268 ]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  70318.28144641 118727.87289289]
------
Step:47, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011    271.96760021]
New Q values:  [ -281.736      -9545.4473624     29.8412011    120.62597174]
Reward: -1  Episode Reward:  13
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062    41.46310554  -180.6       ]
------
Step:48, Action:East
State  98
Old Q Values:  [    0.         41136.94667617 48290.70319312     0.        ]
New Q values:  [    0.         41136.94667617 54934.04314511     0.        ]
Reward: -1  Episode Reward:  12
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  70318.28144641 118727.87289289]
------
Step:49, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011    120.62597174]
New Q values:  [ -281.736      -9545.4473624     29.8412011     77.32115165]
Reward: -1  Episode Reward:  11
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   30.54353694   98.90254319 -252.78192178]
------
Step:50, Action:East
State  107
Old Q Values:  [-252.35169558   30.54353694   98.90254319 -252.78192178]
New Q values:  [-252.35169558   30.54353694   62.15736277 -252.78192178]
Reward: -1  Episode Reward:  10
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011     77.32115165]
------
Step:51, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349   202.87736359]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349    99.19815427]
Reward: -1  Episode Reward:  9
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   30.54353694   62.15736277 -252.78192178]
------
Step:52, Action:East
State  105
Old Q Values:  [ -180.6           11.62826065 -5851.25726525     0.        ]
New Q values:  [ -180.6           11.62826065 -2318.12555531     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -6661.88868685    76.59116932]
------
Step:53, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349    99.19815427]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349    57.72647054]
Reward: -1  Episode Reward:  7
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   30.54353694   62.15736277 -252.78192178]
------
Step:54, Action:East
State  105
Old Q Values:  [ -180.6           11.62826065 -2318.12555531     0.        ]
New Q values:  [-180.6          11.62826065 -904.87287133    0.        ]
Reward: -1  Episode Reward:  6
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -6661.88868685    76.59116932]
------
Step:55, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   3128.64019596   -915.38632728]
New Q values:  [-10156.11771313  -8069.05606225   3128.64019596   -363.26605272]
Reward: -1  Episode Reward:  5
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6          11.62826065 -904.87287133    0.        ]
------
Step:56, Action:South
State  105
Old Q Values:  [-180.6          11.62826065 -904.87287133    0.        ]
New Q values:  [-180.6          11.98747831 -904.87287133    0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   26.45391352     0.         -4434.34340468  -178.98      ]
------
Step:57, Action:North
State  185
Old Q Values:  [   26.45391352     0.         -4434.34340468  -178.98      ]
New Q values:  [   13.5778089      0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  3
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6          11.98747831 -904.87287133    0.        ]
------
Step:58, Action:South
State  107
Old Q Values:  [-252.35169558   30.54353694   62.15736277 -252.78192178]
New Q values:  [-252.35169558   15.69075745   62.15736277 -252.78192178]
Reward: -1  Episode Reward:  2
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   13.5778089      0.         -4434.34340468  -178.98      ]
------
Step:59, Action:North
State  185
Old Q Values:  [   13.5778089      0.         -4434.34340468  -178.98      ]
New Q values:  [    8.42736706     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  1
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6          11.98747831 -904.87287133    0.        ]
------
Step:60, Action:South
State  105
Old Q Values:  [-180.6          11.98747831 -904.87287133    0.        ]
New Q values:  [-180.6           6.72320144 -904.87287133    0.        ]
Reward: -1  Episode Reward:  0
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[    8.42736706     0.         -4434.34340468  -178.98      ]
------
Step:61, Action:North
State  185
Old Q Values:  [    8.42736706     0.         -4434.34340468  -178.98      ]
New Q values:  [    4.78790725     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  -1
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6           6.72320144 -904.87287133    0.        ]
------
Step:62, Action:South
State  107
Old Q Values:  [-252.35169558   15.69075745   62.15736277 -252.78192178]
New Q values:  [-252.35169558    7.11267516   62.15736277 -252.78192178]
Reward: -1  Episode Reward:  -2
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[    4.78790725     0.         -4434.34340468  -178.98      ]
------
Step:63, Action:North
State  185
Old Q Values:  [    4.78790725     0.         -4434.34340468  -178.98      ]
New Q values:  [   19.96237173     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  -3
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   62.15736277 -252.78192178]
------
Step:64, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   62.15736277 -252.78192178]
New Q values:  [-252.35169558    7.11267516   47.4592906  -252.78192178]
Reward: -1  Episode Reward:  -4
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011     77.32115165]
------
Step:65, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   3128.64019596   -363.26605272]
New Q values:  [-10156.11771313  -8069.05606225   3128.64019596   -143.88946065]
Reward: -1  Episode Reward:  -5
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6           6.72320144 -904.87287133    0.        ]
------
Step:66, Action:South
State  104
Old Q Values:  [-8652.84         645.99026814  2541.49710899 -8652.84      ]
New Q values:  [-8652.84         817.90648014  2541.49710899 -8652.84      ]
Reward: -1  Episode Reward:  -6
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[-2081.09028721     0.          1867.03457627     0.        ]
------
Step:67, Action:East
State  184
Old Q Values:  [-2081.09028721     0.          1867.03457627     0.        ]
New Q values:  [-2081.09028721     0.          1755.37911594     0.        ]
Reward: -1  Episode Reward:  -7
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1206.93052108 3363.88428478  181.20343395]
------
Step:68, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.97854041e+03  1.71564278e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.97854041e+03  1.48474758e+04  1.20371620e+03]
Reward: -1  Episode Reward:  -8
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.66183490e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
------
Step:69, Action:North
State  218
Old Q Values:  [  89.46599613 5172.80533595    0.          398.45968492]
New Q values:  [  80.51542067 5172.80533595    0.          398.45968492]
Reward: -1  Episode Reward:  -9
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        -880.4555246  -180.6         151.09674073]
------
Step:70, Action:West
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6         151.09674073]
New Q values:  [-180.6        -880.4555246  -180.6          83.03504179]
Reward: -1  Episode Reward:  -10
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011     77.32115165]
------
Step:71, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011     77.32115165]
New Q values:  [ -281.736      -9545.4473624     29.8412011     42.76739232]
Reward: -1  Episode Reward:  -11
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062    41.46310554  -180.6       ]
------
Step:72, Action:East
State  104
Old Q Values:  [-8652.84         817.90648014  2541.49710899 -8652.84      ]
New Q values:  [-8652.84         817.90648014  1954.59090238 -8652.84      ]
Reward: -1  Episode Reward:  -12
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225   3128.64019596   -143.88946065]
------
Step:73, Action:East
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   3128.64019596   -143.88946065]
New Q values:  [-10156.11771313  -8069.05606225   2161.60616063   -143.88946065]
Reward: -1  Episode Reward:  -13
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  3035.8336075  -6245.61866138  1514.62365225]
------
Step:74, Action:South
State  138
Old Q Values:  [-180.6        -880.4555246  -180.6          83.03504179]
New Q values:  [-180.6         522.69801577 -180.6          83.03504179]
Reward: -1  Episode Reward:  -14
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   69.26545507  2918.26741871 -6170.35693855 -1798.95296703]
------
Step:75, Action:South
State  218
Old Q Values:  [  80.51542067 5172.80533595    0.          398.45968492]
New Q values:  [  80.51542067 4752.28713057    0.          398.45968492]
Reward: -1  Episode Reward:  -15
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3069.02563229 -9022.41491635 -7525.7277781   8945.88332064]
------
Step:76, Action:North
State  288
Old Q Values:  [ 3069.02563229 -9022.41491635 -7525.7277781   8945.88332064]
New Q values:  [ 2652.69639209 -9022.41491635 -7525.7277781   8945.88332064]
Reward: -1  Episode Reward:  -16
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  80.51542067 4752.28713057    0.          398.45968492]
------
Step:77, Action:West
State  218
Old Q Values:  [  80.51542067 4752.28713057    0.          398.45968492]
New Q values:  [  80.51542067 4752.28713057    0.          429.03841886]
Reward: -1  Episode Reward:  -17
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458   900.84848298     0.        ]
------
Step:78, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.97854041e+03  1.48474758e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.97854041e+03  1.39238950e+04  1.20371620e+03]
Reward: -1  Episode Reward:  -18
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.66183490e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
------
Step:79, Action:North
State  216
Old Q Values:  [   69.26545507  2918.26741871 -6170.35693855 -1798.95296703]
New Q values:  [  183.91558676  2918.26741871 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  -19
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         522.69801577 -180.6          83.03504179]
------
Step:80, Action:South
State  130
Old Q Values:  [29415.41153072  2346.20646678  -180.00807518 61805.7880268 ]
New Q values:  [29415.41153072  8923.38728765  -180.00807518 61805.7880268 ]
Reward: -1  Episode Reward:  -20
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.66183490e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
------
Step:81, Action:North
State  216
Old Q Values:  [  183.91558676  2918.26741871 -6170.35693855 -1798.95296703]
New Q values:  [  229.77563944  2918.26741871 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  -21
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         522.69801577 -180.6          83.03504179]
------
Step:82, Action:South
State  138
Old Q Values:  [-180.6         522.69801577 -180.6          83.03504179]
New Q values:  [-180.6        1083.95943192 -180.6          83.03504179]
Reward: -1  Episode Reward:  -22
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  229.77563944  2918.26741871 -6170.35693855 -1798.95296703]
------
Step:83, Action:South
State  216
Old Q Values:  [  229.77563944  2918.26741871 -6170.35693855 -1798.95296703]
New Q values:  [  229.77563944  3850.47196367 -6170.35693855 -1798.95296703]
Reward: -1  Episode Reward:  -23
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2652.69639209 -9022.41491635 -7525.7277781   8945.88332064]
------
Step:84, Action:West
State  288
Old Q Values:  [ 2652.69639209 -9022.41491635 -7525.7277781   8945.88332064]
New Q values:  [ 2652.69639209 -9022.41491635 -7525.7277781   7014.50751889]
Reward: -1  Episode Reward:  -24
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 11455.84730211  2963.80814412]
------
Step:85, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 11455.84730211  2963.80814412]
New Q values:  [-2527.46239811 -8521.23367799  6686.09117651  2963.80814412]
Reward: -1  Episode Reward:  -25
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2652.69639209 -9022.41491635 -7525.7277781   7014.50751889]
------
Step:86, Action:West
State  288
Old Q Values:  [ 2652.69639209 -9022.41491635 -7525.7277781   7014.50751889]
New Q values:  [ 2652.69639209 -9022.41491635 -7525.7277781   4811.03036051]
Reward: -1  Episode Reward:  -26
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6686.09117651  2963.80814412]
------
Step:87, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6686.09117651  2963.80814412]
New Q values:  [-2527.46239811 -8521.23367799  4117.14557876  2963.80814412]
Reward: -1  Episode Reward:  -27
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2652.69639209 -9022.41491635 -7525.7277781   4811.03036051]
------
Step:88, Action:West
State  288
Old Q Values:  [ 2652.69639209 -9022.41491635 -7525.7277781   4811.03036051]
New Q values:  [ 2652.69639209 -9022.41491635 -7525.7277781   3158.95581783]
Reward: -1  Episode Reward:  -28
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4117.14557876  2963.80814412]
------
Step:89, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4117.14557876  2963.80814412]
New Q values:  [-2527.46239811 -8521.23367799  2593.94497685  2963.80814412]
Reward: -1  Episode Reward:  -29
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2652.69639209 -9022.41491635 -7525.7277781   3158.95581783]
------
Step:90, Action:West
State  288
Old Q Values:  [ 2652.69639209 -9022.41491635 -7525.7277781   3158.95581783]
New Q values:  [ 2652.69639209 -9022.41491635 -7525.7277781   2152.12477037]
Reward: -1  Episode Reward:  -30
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2593.94497685  2963.80814412]
------
Step:91, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2593.94497685  2963.80814412]
New Q values:  [-2527.46239811 -8521.23367799  2593.94497685 72341.73242543]
Reward: 100009  Episode Reward:  99979
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011     42.76739232]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011     42.76739232]
New Q values:  [ -281.736     -9545.4473624    29.8412011    89.6614618]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  223.84834955   32.43345925 -180.6       ]
------
Step:2, Action:East
State  110
Old Q Values:  [-239.29051573  223.84834955   32.43345925 -180.6       ]
New Q values:  [-239.29051573  223.84834955   39.27182224 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736     -9545.4473624    29.8412011    89.6614618]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736     -9545.4473624    29.8412011    89.6614618]
New Q values:  [ -281.736      -9545.4473624     29.8412011    102.41908958]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  223.84834955   39.27182224 -180.6       ]
------
Step:4, Action:East
State  110
Old Q Values:  [-239.29051573  223.84834955   39.27182224 -180.6       ]
New Q values:  [-239.29051573  223.84834955   45.83445577 -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
x a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011    102.41908958]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011    102.41908958]
New Q values:  [ -281.736      -9545.4473624     29.8412011    172.70467756]
Reward: -1  Episode Reward:  5
xxxxx
xa .x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  441.12347241  192.24933255 -120.29354603]
------
Step:6, Action:South
State  110
Old Q Values:  [-239.29051573  223.84834955   45.83445577 -180.6       ]
New Q values:  [-239.29051573  368.88319155   45.83445577 -180.6       ]
Reward: 9  Episode Reward:  14
xxxxx
x  .x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   913.14617244     0.        ]
------
Step:7, Action:East
State  180
Old Q Values:  [ -923.83463439   266.69721195  4047.59500673 -4966.32149798]
New Q values:  [ -923.83463439   266.69721195  2916.12588658 -4966.32149798]
Reward: 9  Episode Reward:  23
xxxxx
x  .x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4305.62627964   174.55451539     0.        ]
------
Step:8, Action:South
State  198
Old Q Values:  [-2.78872080e-01  3.92854232e+03  5.59303454e+01  0.00000000e+00]
New Q values:  [-2.78872080e-01  4.42789209e+03  5.59303454e+01  0.00000000e+00]
Reward: 9  Episode Reward:  32
xxxxx
x  .x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        9503.58387167 6515.41066028]
------
Step:9, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        9503.58387167 6515.41066028]
New Q values:  [  16.82637525 -180.6        4602.64246629 6515.41066028]
Reward: 9  Episode Reward:  41
xxxxx
x  .x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2652.69639209 -9022.41491635 -7525.7277781   2152.12477037]
------
Step:10, Action:North
State  288
Old Q Values:  [ 2652.69639209 -9022.41491635 -7525.7277781   2152.12477037]
New Q values:  [ 3657.8916942  -9022.41491635 -7525.7277781   2152.12477037]
Reward: 9  Episode Reward:  50
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6329.37142412  8638.04379123   535.33196404 -3385.12952694]
------
Step:11, Action:South
State  208
Old Q Values:  [ 6329.37142412  8638.04379123   535.33196404 -3385.12952694]
New Q values:  [ 6329.37142412  4551.98502475   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  49
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3657.8916942  -9022.41491635 -7525.7277781   2152.12477037]
------
Step:12, Action:North
State  288
Old Q Values:  [ 3657.8916942  -9022.41491635 -7525.7277781   2152.12477037]
New Q values:  [ 3361.36810492 -9022.41491635 -7525.7277781   2152.12477037]
Reward: -1  Episode Reward:  48
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6329.37142412  4551.98502475   535.33196404 -3385.12952694]
------
Step:13, Action:North
State  208
Old Q Values:  [ 6329.37142412  4551.98502475   535.33196404 -3385.12952694]
New Q values:  [71674.92973338  4551.98502475   535.33196404 -3385.12952694]
Reward: 100009  Episode Reward:  100057
xxxxx
x gax
x   x
x   x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 7785.01572635 8883.29406141]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2593.94497685 72341.73242543]
New Q values:  [-2527.46239811 -8521.23367799  2593.94497685 30511.88490411]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 260.01300857  -40.34168621 5232.6397798   -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [ 260.01300857  -40.34168621 5232.6397798   -35.88578819]
New Q values:  [ 690.71378182  -40.34168621 5232.6397798   -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 225.17846117 1654.71634746 1937.69526132    0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [ 225.17846117 1654.71634746 1937.69526132    0.        ]
New Q values:  [ 225.17846117 1654.71634746 4957.64661559    0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.97854041e+03  1.39238950e+04  1.20371620e+03]
------
Step:4, Action:East
State  192
Old Q Values:  [3.89777037e-01 7.42556550e+03 1.51455110e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 7.42556550e+03 2.75660833e+04 2.91043938e+03]
Reward: 9  Episode Reward:  36
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[71674.92973338  4551.98502475   535.33196404 -3385.12952694]
------
Step:5, Action:North
State  208
Old Q Values:  [71674.92973338  4551.98502475   535.33196404 -3385.12952694]
New Q values:  [37813.15305708  4551.98502475   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  45
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068  1504.73148864 -8652.84       30459.27054576]
------
Step:6, Action:North
State  130
Old Q Values:  [29415.41153072  8923.38728765  -180.00807518 61805.7880268 ]
New Q values:  [30127.30102033  8923.38728765  -180.00807518 61805.7880268 ]
Reward: -301  Episode Reward:  -256
xxxxx
x..ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[30127.30102033  8923.38728765  -180.00807518 61805.7880268 ]
------
Step:7, Action:West
State  128
Old Q Values:  [ 8775.70846068  1504.73148864 -8652.84       30459.27054576]
New Q values:  [ 8775.70846068  1504.73148864 -8652.84       35423.73909831]
Reward: -9991  Episode Reward:  -10247
xxxxx
x.g x
x   x
x   x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011    172.70467756]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011    172.70467756]
New Q values:  [ -281.736      -9545.4473624     29.8412011    206.81891274]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  441.12347241  192.24933255 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  441.12347241  192.24933255 -120.29354603]
New Q values:  [-177.44732869  235.55077318  192.24933255 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 179.00461405   15.18059333 -986.66020107 -180.6       ]
------
Step:3, Action:North
State  181
Old Q Values:  [ 179.00461405   15.18059333 -986.66020107 -180.6       ]
New Q values:  [ 141.66707757   15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
xa .x
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  235.55077318  192.24933255 -120.29354603]
------
Step:4, Action:South
State  109
Old Q Values:  [-241.10880094   76.72270299    3.88307055 -180.6       ]
New Q values:  [-241.10880094   72.58920447    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 141.66707757   15.18059333 -986.66020107 -180.6       ]
------
Step:5, Action:North
State  181
Old Q Values:  [ 141.66707757   15.18059333 -986.66020107 -180.6       ]
New Q values:  [ 126.73206298   15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
xa .x
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  235.55077318  192.24933255 -120.29354603]
------
Step:6, Action:South
State  109
Old Q Values:  [-241.10880094   72.58920447    3.88307055 -180.6       ]
New Q values:  [-241.10880094   66.45530068    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 126.73206298   15.18059333 -986.66020107 -180.6       ]
------
Step:7, Action:North
State  181
Old Q Values:  [ 126.73206298   15.18059333 -986.66020107 -180.6       ]
New Q values:  [  70.0294154    15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  13
xxxxx
xag.x
x ..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   66.45530068    3.88307055 -180.6       ]
------
Step:8, Action:South
State  108
Old Q Values:  [-8463.16477134  2096.97236033  3336.41261644     0.        ]
New Q values:  [-8463.16477134  1713.02671011  3336.41261644     0.        ]
Reward: -1  Episode Reward:  12
xxxxx
xg .x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -923.83463439   266.69721195  2916.12588658 -4966.32149798]
------
Step:9, Action:East
State  180
Old Q Values:  [ -923.83463439   266.69721195  2916.12588658 -4966.32149798]
New Q values:  [ -923.83463439   266.69721195  2463.53823852 -4966.32149798]
Reward: 9  Episode Reward:  21
xxxxx
x  .x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4305.62627964   174.55451539     0.        ]
------
Step:10, Action:South
State  196
Old Q Values:  [-2469.90645144  4305.62627964   174.55451539     0.        ]
New Q values:  [-2469.90645144  3682.27370994   174.55451539     0.        ]
Reward: 9  Episode Reward:  30
xxxxx
x  .x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        4602.64246629 6515.41066028]
------
Step:11, Action:West
State  277
Old Q Values:  [  1.64433      0.         388.49519889   0.        ]
New Q values:  [   1.64433       0.          388.49519889 1569.19193394]
Reward: -1  Episode Reward:  29
xxxxx
x  .x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 690.71378182  -40.34168621 5232.6397798   -35.88578819]
------
Step:12, Action:East
State  261
Old Q Values:  [ 690.71378182  -40.34168621 5232.6397798   -35.88578819]
New Q values:  [ 690.71378182  -40.34168621 4047.07911     -35.88578819]
Reward: -1  Episode Reward:  28
xxxxx
x  .x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        4602.64246629 6515.41066028]
------
Step:13, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        4602.64246629 6515.41066028]
New Q values:  [  16.82637525 -180.6        4602.64246629 3833.02736607]
Reward: -1  Episode Reward:  27
xxxxx
x  .x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  565.18716423 -8695.4397473   4091.54367319 -2601.74710518]
------
Step:14, Action:East
State  260
Old Q Values:  [  565.18716423 -8695.4397473   4091.54367319 -2601.74710518]
New Q values:  [  565.18716423 -8695.4397473   3016.81020917 -2601.74710518]
Reward: -1  Episode Reward:  26
xxxxx
xg .x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        4602.64246629 3833.02736607]
------
Step:15, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        4602.64246629 3833.02736607]
New Q values:  [  16.82637525 -180.6        2854.86741799 3833.02736607]
Reward: 9  Episode Reward:  35
xxxxx
x g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3361.36810492 -9022.41491635 -7525.7277781   2152.12477037]
------
Step:16, Action:North
State  288
Old Q Values:  [ 3361.36810492 -9022.41491635 -7525.7277781   2152.12477037]
New Q values:  [12693.89315909 -9022.41491635 -7525.7277781   2152.12477037]
Reward: 9  Episode Reward:  44
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[37813.15305708  4551.98502475   535.33196404 -3385.12952694]
------
Step:17, Action:North
State  208
Old Q Values:  [37813.15305708  4551.98502475   535.33196404 -3385.12952694]
New Q values:  [93672.39763087  4551.98502475   535.33196404 -3385.12952694]
Reward: 100009  Episode Reward:  100053
xxxxx
x  ax
xg  x
x   x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 7.42556550e+03 2.75660833e+04 2.91043938e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [  62.8218634  1206.93052108 3363.88428478  181.20343395]
New Q values:  [  62.8218634  1206.93052108 2506.09530301  181.20343395]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  229.77563944  3850.47196367 -6170.35693855 -1798.95296703]
------
Step:2, Action:South
State  208
Old Q Values:  [93672.39763087  4551.98502475   535.33196404 -3385.12952694]
New Q values:  [93672.39763087  5634.36195763   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12693.89315909 -9022.41491635 -7525.7277781   2152.12477037]
------
Step:3, Action:North
State  288
Old Q Values:  [12693.89315909 -9022.41491635 -7525.7277781   2152.12477037]
New Q values:  [33178.6765529  -9022.41491635 -7525.7277781   2152.12477037]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[93672.39763087  5634.36195763   535.33196404 -3385.12952694]
------
Step:4, Action:North
State  216
Old Q Values:  [  229.77563944  3850.47196367 -6170.35693855 -1798.95296703]
New Q values:  [ 1008.06033802  3850.47196367 -6170.35693855 -1798.95296703]
Reward: 9  Episode Reward:  26
xxxxx
x.gax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  3035.8336075  -6245.61866138  1514.62365225]
------
Step:5, Action:South
State  128
Old Q Values:  [ 8775.70846068  1504.73148864 -8652.84       35423.73909831]
New Q values:  [ 8775.70846068 28703.01188472 -8652.84       35423.73909831]
Reward: -1  Episode Reward:  25
xxxxx
x..gx
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[93672.39763087  5634.36195763   535.33196404 -3385.12952694]
------
Step:6, Action:South
State  208
Old Q Values:  [93672.39763087  5634.36195763   535.33196404 -3385.12952694]
New Q values:  [93672.39763087 12206.74774892   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[33178.6765529  -9022.41491635 -7525.7277781   2152.12477037]
------
Step:7, Action:West
State  288
Old Q Values:  [33178.6765529  -9022.41491635 -7525.7277781   2152.12477037]
New Q values:  [33178.6765529  -9022.41491635 -7525.7277781   3531.23812657]
Reward: 9  Episode Reward:  33
xxxxx
x.. x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 7785.01572635 8883.29406141]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2593.94497685 30511.88490411]
New Q values:  [-2527.46239811 -8521.23367799  2593.94497685 23360.96312942]
Reward: 9  Episode Reward:  42
xxxxx
x.. x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[37169.36389259  2256.66526474   425.90861234  1875.31501677]
------
Step:9, Action:North
State  256
Old Q Values:  [23001.22962401  7580.15833597  5576.40109469   644.94785455]
New Q values:  [35558.86345191  7580.15833597  5576.40109469   644.94785455]
Reward: -1  Episode Reward:  41
xxxxx
x.. x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:10, Action:East
State  179
Old Q Values:  [     0.           4614.46100011 105506.08823113      0.        ]
New Q values:  [    0.          4614.46100011 46379.00380351     0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x.. x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.97854041e+03  1.39238950e+04  1.20371620e+03]
------
Step:11, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.97854041e+03  1.39238950e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -7.97854041e+03  1.35544627e+04  1.20371620e+03]
Reward: -1  Episode Reward:  39
xxxxx
x.. x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.66183490e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
------
Step:12, Action:North
State  210
Old Q Values:  [2.66183490e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [2.91884760e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
Reward: -1  Episode Reward:  38
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[30127.30102033  8923.38728765  -180.00807518 61805.7880268 ]
------
Step:13, Action:West
State  130
Old Q Values:  [30127.30102033  8923.38728765  -180.00807518 61805.7880268 ]
New Q values:  [30127.30102033  8923.38728765  -180.00807518 62892.98816114]
Reward: 9  Episode Reward:  47
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.27217577e+05]
------
Step:14, Action:West
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.27217577e+05]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.28658799e+05]
Reward: 100009  Episode Reward:  100056
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[33178.6765529  -9022.41491635 -7525.7277781   3531.23812657]
------
Step:1, Action:North
State  288
Old Q Values:  [33178.6765529  -9022.41491635 -7525.7277781   3531.23812657]
New Q values:  [41378.58991042 -9022.41491635 -7525.7277781   3531.23812657]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[93672.39763087 12206.74774892   535.33196404 -3385.12952694]
------
Step:2, Action:North
State  208
Old Q Values:  [93672.39763087 12206.74774892   535.33196404 -3385.12952694]
New Q values:  [37799.54688192 12206.74774892   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1083.95943192 -180.6          83.03504179]
------
Step:3, Action:South
State  138
Old Q Values:  [-180.6        1083.95943192 -180.6          83.03504179]
New Q values:  [-180.6        5772.84783735 -180.6          83.03504179]
Reward: -10001  Episode Reward:  -9983
xxxxx
x . x
x..gx
x.. x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        5772.84783735 -180.6          83.03504179]
------
Step:1, Action:South
State  138
Old Q Values:  [-180.6        5772.84783735 -180.6          83.03504179]
New Q values:  [-180.6        3469.68072404 -180.6          83.03504179]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1008.06033802  3850.47196367 -6170.35693855 -1798.95296703]
------
Step:2, Action:South
State  210
Old Q Values:  [2.91884760e+04 4.30926513e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [2.91884760e+04 1.41426830e+04 4.36673472e+03 3.52184257e+00]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[41378.58991042 -9022.41491635 -7525.7277781   3531.23812657]
------
Step:3, Action:North
State  288
Old Q Values:  [41378.58991042 -9022.41491635 -7525.7277781   3531.23812657]
New Q values:  [27890.70002875 -9022.41491635 -7525.7277781   3531.23812657]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[37799.54688192 12206.74774892   535.33196404 -3385.12952694]
------
Step:4, Action:North
State  210
Old Q Values:  [2.91884760e+04 1.41426830e+04 4.36673472e+03 3.52184257e+00]
New Q values:  [3.05426869e+04 1.41426830e+04 4.36673472e+03 3.52184257e+00]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[30127.30102033  8923.38728765  -180.00807518 62892.98816114]
------
Step:5, Action:West
State  138
Old Q Values:  [-180.6        3469.68072404 -180.6          83.03504179]
New Q values:  [-180.6        3469.68072404 -180.6         100.65969054]
Reward: 9  Episode Reward:  25
xxxxx
x.a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011    206.81891274]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011    206.81891274]
New Q values:  [ -281.736      -9545.4473624     29.8412011    102.36535228]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   47.4592906  -252.78192178]
------
Step:7, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   47.4592906  -252.78192178]
New Q values:  [-252.35169558    7.11267516   49.09332193 -252.78192178]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011    102.36535228]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011    102.36535228]
New Q values:  [ -281.736      -9545.4473624     29.8412011     55.07413749]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   49.09332193 -252.78192178]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   49.09332193 -252.78192178]
New Q values:  [-252.35169558    7.11267516   35.55957002 -252.78192178]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011     55.07413749]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011     55.07413749]
New Q values:  [ -281.736     -9545.4473624    29.8412011    32.097526 ]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   35.55957002 -252.78192178]
------
Step:11, Action:East
State  110
Old Q Values:  [-239.29051573  368.88319155   45.83445577 -180.6       ]
New Q values:  [-239.29051573  368.88319155   27.36304011 -180.6       ]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x . x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736     -9545.4473624    29.8412011    32.097526 ]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736     -9545.4473624    29.8412011    32.097526 ]
New Q values:  [ -281.736      -9545.4473624     29.8412011    122.90396787]
Reward: -1  Episode Reward:  28
xxxxx
xa  x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  368.88319155   27.36304011 -180.6       ]
------
Step:13, Action:East
State  110
Old Q Values:  [-239.29051573  368.88319155   27.36304011 -180.6       ]
New Q values:  [-239.29051573  368.88319155   47.2164064  -180.6       ]
Reward: -1  Episode Reward:  27
xxxxx
x a x
x . x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011    122.90396787]
------
Step:14, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011    122.90396787]
New Q values:  [ -281.736      -9545.4473624     29.8412011    159.22654461]
Reward: -1  Episode Reward:  26
xxxxx
xa  x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  368.88319155   47.2164064  -180.6       ]
------
Step:15, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   35.55957002 -252.78192178]
New Q values:  [-252.35169558    7.11267516   61.39179139 -252.78192178]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011    159.22654461]
------
Step:16, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011    159.22654461]
New Q values:  [ -281.736      -9545.4473624     29.8412011     81.50815526]
Reward: -1  Episode Reward:  24
xxxxx
xa  x
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   61.39179139 -252.78192178]
------
Step:17, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   61.39179139 -252.78192178]
New Q values:  [-252.35169558    7.11267516   48.40916313 -252.78192178]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011     81.50815526]
------
Step:18, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   2161.60616063   -143.88946065]
New Q values:  [-10156.11771313  -8069.05606225   2161.60616063    -38.21919406]
Reward: -1  Episode Reward:  22
xxxxx
xag x
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   66.45530068    3.88307055 -180.6       ]
------
Step:19, Action:South
State  109
Old Q Values:  [-241.10880094   66.45530068    3.88307055 -180.6       ]
New Q values:  [-241.10880094   46.99094489    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  21
xxxxx
x  gx
xa. x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  70.0294154    15.18059333 -986.66020107 -180.6       ]
------
Step:20, Action:North
State  181
Old Q Values:  [  70.0294154    15.18059333 -986.66020107 -180.6       ]
New Q values:  [  98.07699811   15.18059333 -986.66020107 -180.6       ]
Reward: -1  Episode Reward:  20
xxxxx
xa  x
x .gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  235.55077318  192.24933255 -120.29354603]
------
Step:21, Action:South
State  111
Old Q Values:  [-177.44732869  235.55077318  192.24933255 -120.29354603]
New Q values:  [-177.44732869 1580.91429395  192.24933255 -120.29354603]
Reward: -1  Episode Reward:  19
xxxxx
x   x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 225.17846117 1654.71634746 4957.64661559    0.        ]
------
Step:22, Action:East
State  187
Old Q Values:  [135.20477233   0.          12.80716019   0.        ]
New Q values:  [135.20477233   0.         280.77740897   0.        ]
Reward: 9  Episode Reward:  28
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458   900.84848298     0.        ]
------
Step:23, Action:East
State  203
Old Q Values:  [  3.60604218 917.47486444 992.62141468   0.        ]
New Q values:  [   3.60604218  917.47486444 1822.13470504    0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  80.51542067 4752.28713057    0.          429.03841886]
------
Step:24, Action:West
State  216
Old Q Values:  [ 1008.06033802  3850.47196367 -6170.35693855 -1798.95296703]
New Q values:  [ 1.00806034e+03  3.85047196e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  2.41382324e+03 -8.94356769e+03  2.40000000e-02]
------
Step:25, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  2.41382324e+03 -8.94356769e+03  2.40000000e-02]
New Q values:  [ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
Reward: 9  Episode Reward:  35
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 7785.01572635 8883.29406141]
------
Step:26, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 7785.01572635 8883.29406141]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  7.78501573e+03  7.47095268e+04]
Reward: 100009  Episode Reward:  100044
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[27890.70002875 -9022.41491635 -7525.7277781   3531.23812657]
------
Step:1, Action:North
State  288
Old Q Values:  [27890.70002875 -9022.41491635 -7525.7277781   3531.23812657]
New Q values:  [22501.54407608 -9022.41491635 -7525.7277781   3531.23812657]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[37799.54688192 12206.74774892   535.33196404 -3385.12952694]
------
Step:2, Action:South
State  208
Old Q Values:  [37799.54688192 12206.74774892   535.33196404 -3385.12952694]
New Q values:  [37799.54688192 11632.56232239   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[22501.54407608 -9022.41491635 -7525.7277781   3531.23812657]
------
Step:3, Action:North
State  288
Old Q Values:  [22501.54407608 -9022.41491635 -7525.7277781   3531.23812657]
New Q values:  [20339.88169501 -9022.41491635 -7525.7277781   3531.23812657]
Reward: -1  Episode Reward:  7
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[37799.54688192 11632.56232239   535.33196404 -3385.12952694]
------
Step:4, Action:South
State  208
Old Q Values:  [37799.54688192 11632.56232239   535.33196404 -3385.12952694]
New Q values:  [37799.54688192 10754.38943746   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  6
xxxxx
x. .x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20339.88169501 -9022.41491635 -7525.7277781   3531.23812657]
------
Step:5, Action:West
State  288
Old Q Values:  [20339.88169501 -9022.41491635 -7525.7277781   3531.23812657]
New Q values:  [20339.88169501 -9022.41491635 -7525.7277781  23830.75328833]
Reward: 9  Episode Reward:  15
xxxxx
x. .x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  7.78501573e+03  7.47095268e+04]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2593.94497685 23360.96312942]
New Q values:  [-2527.46239811 -8521.23367799  2593.94497685 20500.59441955]
Reward: 9  Episode Reward:  24
xxxxx
x. .x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[37169.36389259  2256.66526474   425.90861234  1875.31501677]
------
Step:7, Action:North
State  257
Old Q Values:  [37169.36389259  2256.66526474   425.90861234  1875.31501677]
New Q values:  [44618.89738728  2256.66526474   425.90861234  1875.31501677]
Reward: 9  Episode Reward:  33
xxxxx
x. .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 99152.50610081     0.        ]
------
Step:8, Action:North
State  180
Old Q Values:  [ -923.83463439   266.69721195  2463.53823852 -4966.32149798]
New Q values:  [ -362.58585376   266.69721195  2463.53823852 -4966.32149798]
Reward: 9  Episode Reward:  42
xxxxx
xa .x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[ -180.6       -2257.0253383     5.16       -180.6      ]
------
Step:9, Action:East
State  100
Old Q Values:  [  0.         407.67277794   0.           0.        ]
New Q values:  [  0.         407.67277794  -0.6          0.        ]
Reward: -1  Episode Reward:  41
xxxxx
xga.x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:NW
[0. 0. 0. 0.]
------
Step:10, Action:North
State  118
Old Q Values:  [ -180.6 -6000.6     0.      0. ]
New Q values:  [ -252.84 -6000.6      0.       0.  ]
Reward: -301  Episode Reward:  -260
xxxxx
x a.x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[ -252.84 -6000.6      0.       0.  ]
------
Step:11, Action:East
State  114
Old Q Values:  [  -180.6          4272.38349051  70318.28144641 118727.87289289]
New Q values:  [  -180.6          4272.38349051  47000.60902691 118727.87289289]
Reward: 9  Episode Reward:  -251
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[30127.30102033  8923.38728765  -180.00807518 62892.98816114]
------
Step:12, Action:West
State  138
Old Q Values:  [-180.6        3469.68072404 -180.6         100.65969054]
New Q values:  [-180.6        3469.68072404 -180.6          97.52090826]
Reward: -1  Episode Reward:  -252
xxxxx
x a x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   192.85677349    57.72647054]
------
Step:13, Action:East
State  121
Old Q Values:  [    0.             0.         -6661.88868685    76.59116932]
New Q values:  [    0.             0.         -7754.60539249    76.59116932]
Reward: -10001  Episode Reward:  -10253
xxxxx
x  gx
x . x
x   x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.97854041e+03  1.35544627e+04  1.20371620e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 7.42556550e+03 2.75660833e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 7.42556550e+03 2.23716974e+04 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[37799.54688192 10754.38943746   535.33196404 -3385.12952694]
------
Step:2, Action:North
State  208
Old Q Values:  [37799.54688192 10754.38943746   535.33196404 -3385.12952694]
New Q values:  [16035.96883502 10754.38943746   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x.gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  3035.8336075  -6245.61866138  1514.62365225]
------
Step:3, Action:South
State  138
Old Q Values:  [-180.6        3469.68072404 -180.6          97.52090826]
New Q values:  [-180.6        6198.06294012 -180.6          97.52090826]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16035.96883502 10754.38943746   535.33196404 -3385.12952694]
------
Step:4, Action:North
State  208
Old Q Values:  [16035.96883502 10754.38943746   535.33196404 -3385.12952694]
New Q values:  [ 7324.53761626 10754.38943746   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  16
xxxxx
x.gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  3035.8336075  -6245.61866138  1514.62365225]
------
Step:5, Action:South
State  136
Old Q Values:  [ 1117.30787879  3035.8336075  -6245.61866138  1514.62365225]
New Q values:  [ 1117.30787879  4440.05027424 -6245.61866138  1514.62365225]
Reward: -1  Episode Reward:  15
xxxxx
x..gx
x. ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7324.53761626 10754.38943746   535.33196404 -3385.12952694]
------
Step:6, Action:South
State  208
Old Q Values:  [ 7324.53761626 10754.38943746   535.33196404 -3385.12952694]
New Q values:  [ 7324.53761626 11456.38176148   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  24
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20339.88169501 -9022.41491635 -7525.7277781  23830.75328833]
------
Step:7, Action:West
State  288
Old Q Values:  [20339.88169501 -9022.41491635 -7525.7277781  23830.75328833]
New Q values:  [20339.88169501 -9022.41491635 -7525.7277781  31944.55935303]
Reward: -1  Episode Reward:  23
xxxxx
x..gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  7.78501573e+03  7.47095268e+04]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2593.94497685 20500.59441955]
New Q values:  [-2527.46239811 -8521.23367799  2593.94497685 21591.306984  ]
Reward: 9  Episode Reward:  32
xxxxx
x.g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44618.89738728  2256.66526474   425.90861234  1875.31501677]
------
Step:9, Action:North
State  257
Old Q Values:  [44618.89738728  2256.66526474   425.90861234  1875.31501677]
New Q values:  [47598.71078516  2256.66526474   425.90861234  1875.31501677]
Reward: 9  Episode Reward:  41
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 99152.50610081     0.        ]
------
Step:10, Action:North
State  181
Old Q Values:  [  98.07699811   15.18059333 -986.66020107 -180.6       ]
New Q values:  [  82.20889447   15.18059333 -986.66020107 -180.6       ]
Reward: 9  Episode Reward:  50
xxxxx
xa. x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         125.26031743    5.4           0.        ]
------
Step:11, Action:South
State  103
Old Q Values:  [-180.6         125.26031743    5.4           0.        ]
New Q values:  [-180.6        1536.79811165    5.4           0.        ]
Reward: -1  Episode Reward:  49
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 225.17846117 1654.71634746 4957.64661559    0.        ]
------
Step:12, Action:East
State  181
Old Q Values:  [  82.20889447   15.18059333 -986.66020107 -180.6       ]
New Q values:  [  82.20889447   15.18059333 1715.81666226 -180.6       ]
Reward: -1  Episode Reward:  48
xxxxx
x . x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  7036.93580895 -3909.58186816  1909.41710691]
------
Step:13, Action:South
State  192
Old Q Values:  [3.89777037e-01 7.42556550e+03 2.23716974e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 9.44701829e+03 2.23716974e+04 2.91043938e+03]
Reward: -1  Episode Reward:  47
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2593.94497685 21591.306984  ]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2593.94497685 21591.306984  ]
New Q values:  [-2527.46239811 -8521.23367799  2593.94497685  9540.96585635]
Reward: -1  Episode Reward:  46
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  565.18716423 -8695.4397473   3016.81020917 -2601.74710518]
------
Step:15, Action:East
State  260
Old Q Values:  [  565.18716423 -8695.4397473   3016.81020917 -2601.74710518]
New Q values:  [  565.18716423 -8695.4397473   4068.41384057 -2601.74710518]
Reward: -1  Episode Reward:  45
xxxxx
x . x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2593.94497685  9540.96585635]
------
Step:16, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2593.94497685  9540.96585635]
New Q values:  [-2527.46239811 -8521.23367799 10620.34579665  9540.96585635]
Reward: -1  Episode Reward:  44
xxxxx
x . x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20339.88169501 -9022.41491635 -7525.7277781  31944.55935303]
------
Step:17, Action:West
State  288
Old Q Values:  [20339.88169501 -9022.41491635 -7525.7277781  31944.55935303]
New Q values:  [20339.88169501 -9022.41491635 -7525.7277781  15963.32748021]
Reward: -1  Episode Reward:  43
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 10620.34579665  9540.96585635]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10620.34579665  9540.96585635]
New Q values:  [-2527.46239811 -8521.23367799 10349.50282716  9540.96585635]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20339.88169501 -9022.41491635 -7525.7277781  15963.32748021]
------
Step:19, Action:North
State  288
Old Q Values:  [20339.88169501 -9022.41491635 -7525.7277781  15963.32748021]
New Q values:  [11572.26720645 -9022.41491635 -7525.7277781  15963.32748021]
Reward: -1  Episode Reward:  41
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7324.53761626 11456.38176148   535.33196404 -3385.12952694]
------
Step:20, Action:South
State  208
Old Q Values:  [ 7324.53761626 11456.38176148   535.33196404 -3385.12952694]
New Q values:  [ 7324.53761626  9370.95094866   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  40
xxxxx
x g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11572.26720645 -9022.41491635 -7525.7277781  15963.32748021]
------
Step:21, Action:West
State  288
Old Q Values:  [11572.26720645 -9022.41491635 -7525.7277781  15963.32748021]
New Q values:  [11572.26720645 -9022.41491635 -7525.7277781  28797.58902979]
Reward: -1  Episode Reward:  39
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  7.78501573e+03  7.47095268e+04]
------
Step:22, Action:West
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  7.78501573e+03  7.47095268e+04]
New Q values:  [   37.74111519  -168.92307549  7785.01572635 31097.33444994]
Reward: -1  Episode Reward:  38
xxxxx
x . x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 690.71378182  -40.34168621 4047.07911     -35.88578819]
------
Step:23, Action:East
State  261
Old Q Values:  [ 690.71378182  -40.34168621 4047.07911     -35.88578819]
New Q values:  [  690.71378182   -40.34168621 10947.43197898   -35.88578819]
Reward: -1  Episode Reward:  37
xxxxx
x . x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  7785.01572635 31097.33444994]
------
Step:24, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  7785.01572635 31097.33444994]
New Q values:  [   37.74111519  -168.92307549  7785.01572635 15722.56337367]
Reward: -1  Episode Reward:  36
xxxxx
x . x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  690.71378182   -40.34168621 10947.43197898   -35.88578819]
------
Step:25, Action:East
State  261
Old Q Values:  [  690.71378182   -40.34168621 10947.43197898   -35.88578819]
New Q values:  [ 690.71378182  -40.34168621 7483.22363974  -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 10349.50282716  9540.96585635]
------
Step:26, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10349.50282716  9540.96585635]
New Q values:  [-2527.46239811 -8521.23367799 12778.4778398   9540.96585635]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11572.26720645 -9022.41491635 -7525.7277781  28797.58902979]
------
Step:27, Action:West
State  288
Old Q Values:  [11572.26720645 -9022.41491635 -7525.7277781  28797.58902979]
New Q values:  [11572.26720645 -9022.41491635 -7525.7277781  15351.97896385]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 12778.4778398   9540.96585635]
------
Step:28, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 12778.4778398   9540.96585635]
New Q values:  [-2527.46239811 -8521.23367799  9716.38482508  9540.96585635]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11572.26720645 -9022.41491635 -7525.7277781  15351.97896385]
------
Step:29, Action:West
State  288
Old Q Values:  [11572.26720645 -9022.41491635 -7525.7277781  15351.97896385]
New Q values:  [11572.26720645 -9022.41491635 -7525.7277781   9055.10703306]
Reward: -1  Episode Reward:  31
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9716.38482508  9540.96585635]
------
Step:30, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9716.38482508  9540.96585635]
New Q values:  [-2527.46239811 -8521.23367799  7357.63409197  9540.96585635]
Reward: -1  Episode Reward:  30
xxxxx
x g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11572.26720645 -9022.41491635 -7525.7277781   9055.10703306]
------
Step:31, Action:North
State  288
Old Q Values:  [11572.26720645 -9022.41491635 -7525.7277781   9055.10703306]
New Q values:  [ 7439.59216718 -9022.41491635 -7525.7277781   9055.10703306]
Reward: -1  Episode Reward:  29
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7324.53761626  9370.95094866   535.33196404 -3385.12952694]
------
Step:32, Action:South
State  210
Old Q Values:  [3.05426869e+04 1.41426830e+04 4.36673472e+03 3.52184257e+00]
New Q values:  [3.05426869e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
Reward: -1  Episode Reward:  28
xxxxx
x . x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7439.59216718 -9022.41491635 -7525.7277781   9055.10703306]
------
Step:33, Action:North
State  288
Old Q Values:  [ 7439.59216718 -9022.41491635 -7525.7277781   9055.10703306]
New Q values:  [12138.04292249 -9022.41491635 -7525.7277781   9055.10703306]
Reward: -1  Episode Reward:  27
xxxxx
x . x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.05426869e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:34, Action:North
State  210
Old Q Values:  [3.05426869e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [3.10843712e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
Reward: -1  Episode Reward:  26
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[30127.30102033  8923.38728765  -180.00807518 62892.98816114]
------
Step:35, Action:West
State  130
Old Q Values:  [30127.30102033  8923.38728765  -180.00807518 62892.98816114]
New Q values:  [ 30127.30102033   8923.38728765   -180.00807518 120780.95713233]
Reward: 100009  Episode Reward:  100035
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   46.99094489    3.88307055 -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869 1580.91429395  192.24933255 -120.29354603]
New Q values:  [-177.44732869 1152.51071626  192.24933255 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  82.20889447   15.18059333 1715.81666226 -180.6       ]
------
Step:2, Action:East
State  181
Old Q Values:  [  82.20889447   15.18059333 1715.81666226 -180.6       ]
New Q values:  [   82.20889447    15.18059333 -4556.44474419  -180.6       ]
Reward: -9991  Episode Reward:  -9982
xxxxx
x . x
x g.x
x...x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7324.53761626  9370.95094866   535.33196404 -3385.12952694]
------
Step:1, Action:South
State  208
Old Q Values:  [ 7324.53761626  9370.95094866   535.33196404 -3385.12952694]
New Q values:  [ 7324.53761626  7395.19325621   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12138.04292249 -9022.41491635 -7525.7277781   9055.10703306]
------
Step:2, Action:North
State  288
Old Q Values:  [12138.04292249 -9022.41491635 -7525.7277781   9055.10703306]
New Q values:  [14179.92852575 -9022.41491635 -7525.7277781   9055.10703306]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x .ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.10843712e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [3.10843712e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [1.42985674e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
Reward: 9  Episode Reward:  17
xxxxx
x..ax
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        6198.06294012 -180.6          97.52090826]
------
Step:4, Action:South
State  138
Old Q Values:  [-180.6        6198.06294012 -180.6          97.52090826]
New Q values:  [ -180.6        -1302.81684709  -180.6           97.52090826]
Reward: -10001  Episode Reward:  -9984
xxxxx
x.. x
x .gx
x.. x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   46.99094489    3.88307055 -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869 1152.51071626  192.24933255 -120.29354603]
New Q values:  [-177.44732869  491.06695484  192.24933255 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x  .x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   82.20889447    15.18059333 -4556.44474419  -180.6       ]
------
Step:2, Action:North
State  181
Old Q Values:  [   82.20889447    15.18059333 -4556.44474419  -180.6       ]
New Q values:  [  179.60364424    15.18059333 -4556.44474419  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
xa .x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  491.06695484  192.24933255 -120.29354603]
------
Step:3, Action:South
State  109
Old Q Values:  [-241.10880094   46.99094489    3.88307055 -180.6       ]
New Q values:  [-241.10880094   72.07747123    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x  gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  179.60364424    15.18059333 -4556.44474419  -180.6       ]
------
Step:4, Action:North
State  181
Old Q Values:  [  179.60364424    15.18059333 -4556.44474419  -180.6       ]
New Q values:  [  218.56154415    15.18059333 -4556.44474419  -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
xa .x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  491.06695484  192.24933255 -120.29354603]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869  491.06695484  192.24933255 -120.29354603]
New Q values:  [-177.44732869  261.39524518  192.24933255 -120.29354603]
Reward: -1  Episode Reward:  5
xxxxx
x  .x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  218.56154415    15.18059333 -4556.44474419  -180.6       ]
------
Step:6, Action:North
State  181
Old Q Values:  [  218.56154415    15.18059333 -4556.44474419  -180.6       ]
New Q values:  [  165.24319121    15.18059333 -4556.44474419  -180.6       ]
Reward: -1  Episode Reward:  4
xxxxx
xa .x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  261.39524518  192.24933255 -120.29354603]
------
Step:7, Action:South
State  109
Old Q Values:  [-241.10880094   72.07747123    3.88307055 -180.6       ]
New Q values:  [-241.10880094   77.80394586    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  3
xxxxx
x  gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  165.24319121    15.18059333 -4556.44474419  -180.6       ]
------
Step:8, Action:North
State  181
Old Q Values:  [  165.24319121    15.18059333 -4556.44474419  -180.6       ]
New Q values:  [   88.83846024    15.18059333 -4556.44474419  -180.6       ]
Reward: -1  Episode Reward:  2
xxxxx
xag.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   77.80394586    3.88307055 -180.6       ]
------
Step:9, Action:South
State  108
Old Q Values:  [-8463.16477134  1713.02671011  3336.41261644     0.        ]
New Q values:  [-8463.16477134  1423.6721556   3336.41261644     0.        ]
Reward: -1  Episode Reward:  1
xxxxx
xg .x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -362.58585376   266.69721195  2463.53823852 -4966.32149798]
------
Step:10, Action:East
State  180
Old Q Values:  [ -362.58585376   266.69721195  2463.53823852 -4966.32149798]
New Q values:  [ -362.58585376   266.69721195  2095.49740839 -4966.32149798]
Reward: 9  Episode Reward:  10
xxxxx
x  .x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  3682.27370994   174.55451539     0.        ]
------
Step:11, Action:South
State  196
Old Q Values:  [-2469.90645144  3682.27370994   174.55451539     0.        ]
New Q values:  [-2469.90645144  2628.2176938    174.55451539     0.        ]
Reward: 9  Episode Reward:  19
xxxxx
x  .x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        2854.86741799 3833.02736607]
------
Step:12, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        2854.86741799 3833.02736607]
New Q values:  [  16.82637525 -180.6        2854.86741799 2759.1350986 ]
Reward: 9  Episode Reward:  28
xxxxx
x  .x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  565.18716423 -8695.4397473   4068.41384057 -2601.74710518]
------
Step:13, Action:East
State  260
Old Q Values:  [  565.18716423 -8695.4397473   4068.41384057 -2601.74710518]
New Q values:  [  565.18716423 -8695.4397473   2483.22576163 -2601.74710518]
Reward: -1  Episode Reward:  27
xxxxx
x  .x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        2854.86741799 2759.1350986 ]
------
Step:14, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        2854.86741799 2759.1350986 ]
New Q values:  [  16.82637525 -180.6        5401.32552492 2759.1350986 ]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14179.92852575 -9022.41491635 -7525.7277781   9055.10703306]
------
Step:15, Action:North
State  288
Old Q Values:  [14179.92852575 -9022.41491635 -7525.7277781   9055.10703306]
New Q values:  [ 7895.92938716 -9022.41491635 -7525.7277781   9055.10703306]
Reward: 9  Episode Reward:  45
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7324.53761626  7395.19325621   535.33196404 -3385.12952694]
------
Step:16, Action:South
State  208
Old Q Values:  [ 7324.53761626  7395.19325621   535.33196404 -3385.12952694]
New Q values:  [ 7324.53761626  5674.0094124    535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  44
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7895.92938716 -9022.41491635 -7525.7277781   9055.10703306]
------
Step:17, Action:West
State  288
Old Q Values:  [ 7895.92938716 -9022.41491635 -7525.7277781   9055.10703306]
New Q values:  [ 7895.92938716 -9022.41491635 -7525.7277781   4092.20039341]
Reward: -1  Episode Reward:  43
xxxxx
x  .x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.          388.49519889 1569.19193394]
------
Step:18, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        5401.32552492 2759.1350986 ]
New Q values:  [  16.82637525 -180.6        5401.32552492 3348.02113136]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 690.71378182  -40.34168621 7483.22363974  -35.88578819]
------
Step:19, Action:North
State  261
Old Q Values:  [ 690.71378182  -40.34168621 7483.22363974  -35.88578819]
New Q values:  [1762.97949741  -40.34168621 7483.22363974  -35.88578819]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 225.17846117 1654.71634746 4957.64661559    0.        ]
------
Step:20, Action:East
State  181
Old Q Values:  [   88.83846024    15.18059333 -4556.44474419  -180.6       ]
New Q values:  [   88.83846024    15.18059333 -1635.49101158  -180.6       ]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[1.32443385e-01 6.25622954e+02 0.00000000e+00 0.00000000e+00]
------
Step:21, Action:South
State  199
Old Q Values:  [  22.48535485 1478.00809166  549.89931413  753.62201984]
New Q values:  [  22.48535485 1061.36081685  549.89931413  753.62201984]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.          388.49519889 1569.19193394]
------
Step:22, Action:West
State  277
Old Q Values:  [   1.64433       0.          388.49519889 1569.19193394]
New Q values:  [1.64433000e+00 0.00000000e+00 3.88495199e+02 2.87204387e+03]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1762.97949741  -40.34168621 7483.22363974  -35.88578819]
------
Step:23, Action:East
State  257
Old Q Values:  [47598.71078516  2256.66526474   425.90861234  1875.31501677]
New Q values:  [47598.71078516  2256.66526474  4886.53245704  1875.31501677]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  7785.01572635 15722.56337367]
------
Step:24, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.88495199e+02 2.87204387e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 3.88495199e+02 3.39318464e+03]
Reward: -1  Episode Reward:  36
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1762.97949741  -40.34168621 7483.22363974  -35.88578819]
------
Step:25, Action:East
State  261
Old Q Values:  [1762.97949741  -40.34168621 7483.22363974  -35.88578819]
New Q values:  [1762.97949741  -40.34168621 4613.08711337  -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        5401.32552492 3348.02113136]
------
Step:26, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        5401.32552492 3348.02113136]
New Q values:  [  16.82637525 -180.6        4528.70902612 3348.02113136]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7895.92938716 -9022.41491635 -7525.7277781   4092.20039341]
------
Step:27, Action:North
State  288
Old Q Values:  [ 7895.92938716 -9022.41491635 -7525.7277781   4092.20039341]
New Q values:  [ 7447.34196218 -9022.41491635 -7525.7277781   4092.20039341]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.42985674e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:28, Action:North
State  208
Old Q Values:  [ 7324.53761626  5674.0094124    535.33196404 -3385.12952694]
New Q values:  [99169.5021862   5674.0094124    535.33196404 -3385.12952694]
Reward: 100009  Episode Reward:  100042
xxxxx
x  ax
x  gx
x   x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     29.8412011     81.50815526]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011     81.50815526]
New Q values:  [ -281.736      -9545.4473624     29.8412011    148.66821957]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  368.88319155   47.2164064  -180.6       ]
------
Step:2, Action:East
State  108
Old Q Values:  [-8463.16477134  1423.6721556   3336.41261644     0.        ]
New Q values:  [-8463.16477134  1423.6721556   1982.44689476     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xga.x
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-10156.11771313  -8069.05606225   2161.60616063    -38.21919406]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     29.8412011    148.66821957]
New Q values:  [ -281.736      -9545.4473624     46.59275292   148.66821957]
Reward: 9  Episode Reward:  17
xxxxx
x  ax
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -1302.81684709  -180.6           97.52090826]
------
Step:4, Action:West
State  138
Old Q Values:  [ -180.6        -1302.81684709  -180.6           97.52090826]
New Q values:  [ -180.6        -1302.81684709  -180.6           83.00882918]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     46.59275292   148.66821957]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     46.59275292   148.66821957]
New Q values:  [ -281.736      -9545.4473624     46.59275292    73.39003677]
Reward: -1  Episode Reward:  15
xxxxx
xa  x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   48.40916313 -252.78192178]
------
Step:6, Action:East
State  111
Old Q Values:  [-177.44732869  261.39524518  192.24933255 -120.29354603]
New Q values:  [-177.44732869  261.39524518   98.31674405 -120.29354603]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     46.59275292    73.39003677]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     46.59275292    73.39003677]
New Q values:  [ -281.736      -9545.4473624     46.59275292   139.42097217]
Reward: -1  Episode Reward:  13
xxxxx
xa  x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  368.88319155   47.2164064  -180.6       ]
------
Step:8, Action:East
State  111
Old Q Values:  [-177.44732869  261.39524518   98.31674405 -120.29354603]
New Q values:  [-177.44732869  261.39524518   80.55298927 -120.29354603]
Reward: -1  Episode Reward:  12
xxxxx
x a x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     46.59275292   139.42097217]
------
Step:9, Action:West
State  120
Old Q Values:  [-10156.11771313  -8069.05606225   2161.60616063    -38.21919406]
New Q values:  [-1.01561177e+04 -8.06905606e+03  2.16160616e+03  7.45350613e+00]
Reward: -1  Episode Reward:  11
xxxxx
xag x
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094   77.80394586    3.88307055 -180.6       ]
------
Step:10, Action:South
State  108
Old Q Values:  [-8463.16477134  1423.6721556   1982.44689476     0.        ]
New Q values:  [-8463.16477134  1166.4852442   1982.44689476     0.        ]
Reward: 9  Episode Reward:  20
xxxxx
xg  x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263   258.74794358  1972.05460653     0.        ]
------
Step:11, Action:East
State  189
Old Q Values:  [    9.84673294  1736.72424953 -4177.65087173  -244.98066897]
New Q values:  [   9.84673294 1736.72424953 -919.83175779 -244.98066897]
Reward: -1  Episode Reward:  19
xxxxx
x g x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1206.93052108 2506.09530301  181.20343395]
------
Step:12, Action:East
State  200
Old Q Values:  [  62.8218634  1206.93052108 2506.09530301  181.20343395]
New Q values:  [  62.8218634  1206.93052108 2162.97971031  181.20343395]
Reward: 9  Episode Reward:  28
xxxxx
xg  x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.00806034e+03  3.85047196e+03 -6.17035694e+03  3.96578640e+00]
------
Step:13, Action:South
State  216
Old Q Values:  [ 1.00806034e+03  3.85047196e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 1.00806034e+03  3.77979137e+03 -6.17035694e+03  3.96578640e+00]
Reward: 9  Episode Reward:  37
xxxxx
x g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7447.34196218 -9022.41491635 -7525.7277781   4092.20039341]
------
Step:14, Action:North
State  288
Old Q Values:  [ 7447.34196218 -9022.41491635 -7525.7277781   4092.20039341]
New Q values:  [ 4112.27419711 -9022.41491635 -7525.7277781   4092.20039341]
Reward: -1  Episode Reward:  36
xxxxx
xg  x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.00806034e+03  3.77979137e+03 -6.17035694e+03  3.96578640e+00]
------
Step:15, Action:South
State  216
Old Q Values:  [ 1.00806034e+03  3.77979137e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 1.00806034e+03  2.74499881e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4112.27419711 -9022.41491635 -7525.7277781   4092.20039341]
------
Step:16, Action:North
State  288
Old Q Values:  [ 4112.27419711 -9022.41491635 -7525.7277781   4092.20039341]
New Q values:  [ 2467.80932148 -9022.41491635 -7525.7277781   4092.20039341]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.00806034e+03  2.74499881e+03 -6.17035694e+03  3.96578640e+00]
------
Step:17, Action:South
State  218
Old Q Values:  [  80.51542067 4752.28713057    0.          429.03841886]
New Q values:  [  80.51542067 3127.97497025    0.          429.03841886]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2467.80932148 -9022.41491635 -7525.7277781   4092.20039341]
------
Step:18, Action:North
State  288
Old Q Values:  [ 2467.80932148 -9022.41491635 -7525.7277781   4092.20039341]
New Q values:  [ 1924.91621967 -9022.41491635 -7525.7277781   4092.20039341]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  80.51542067 3127.97497025    0.          429.03841886]
------
Step:19, Action:South
State  216
Old Q Values:  [ 1.00806034e+03  2.74499881e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 1.00806034e+03  2.32505964e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1924.91621967 -9022.41491635 -7525.7277781   4092.20039341]
------
Step:20, Action:West
State  288
Old Q Values:  [ 1924.91621967 -9022.41491635 -7525.7277781   4092.20039341]
New Q values:  [ 1924.91621967 -9022.41491635 -7525.7277781   4504.56991427]
Reward: 9  Episode Reward:  40
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7357.63409197  9540.96585635]
------
Step:21, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7357.63409197  9540.96585635]
New Q values:  [-2527.46239811 -8521.23367799  7357.63409197 78101.39957809]
Reward: 100009  Episode Reward:  100049
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7357.63409197 78101.39957809]
------
Step:1, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        4528.70902612 3348.02113136]
New Q values:  [  16.82637525 -180.6        4528.70902612 2089.57618103]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  565.18716423 -8695.4397473   2483.22576163 -2601.74710518]
------
Step:2, Action:East
State  261
Old Q Values:  [1762.97949741  -40.34168621 4613.08711337  -35.88578819]
New Q values:  [1762.97949741  -40.34168621 3203.24755318  -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        4528.70902612 2089.57618103]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7357.63409197 78101.39957809]
New Q values:  [-2527.46239811 -8521.23367799  4299.82461107 78101.39957809]
Reward: 9  Episode Reward:  17
xxxxx
x g.x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1924.91621967 -9022.41491635 -7525.7277781   4504.56991427]
------
Step:4, Action:West
State  288
Old Q Values:  [ 1924.91621967 -9022.41491635 -7525.7277781   4504.56991427]
New Q values:  [ 1924.91621967 -9022.41491635 -7525.7277781  25231.64783913]
Reward: -1  Episode Reward:  16
xxxxx
xg..x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4299.82461107 78101.39957809]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4299.82461107 78101.39957809]
New Q values:  [-2527.46239811 -8521.23367799  4299.82461107 32200.93409719]
Reward: -1  Episode Reward:  15
xxxxx
x g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1762.97949741  -40.34168621 3203.24755318  -35.88578819]
------
Step:6, Action:East
State  261
Old Q Values:  [1762.97949741  -40.34168621 3203.24755318  -35.88578819]
New Q values:  [1762.97949741  -40.34168621 5997.46803337  -35.88578819]
Reward: -1  Episode Reward:  14
xxxxx
x .gx
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  7785.01572635 15722.56337367]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4299.82461107 32200.93409719]
New Q values:  [-2527.46239811 -8521.23367799  4299.82461107 14679.01404889]
Reward: -1  Episode Reward:  13
xxxxx
x g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1762.97949741  -40.34168621 5997.46803337  -35.88578819]
------
Step:8, Action:East
State  261
Old Q Values:  [1762.97949741  -40.34168621 5997.46803337  -35.88578819]
New Q values:  [1762.97949741  -40.34168621 7115.15622545  -35.88578819]
Reward: -1  Episode Reward:  12
xxxxx
x .gx
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  7785.01572635 15722.56337367]
------
Step:9, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  7785.01572635 15722.56337367]
New Q values:  [  37.74111519 -168.92307549 7785.01572635 8422.9722171 ]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1762.97949741  -40.34168621 7115.15622545  -35.88578819]
------
Step:10, Action:East
State  261
Old Q Values:  [1762.97949741  -40.34168621 7115.15622545  -35.88578819]
New Q values:  [1762.97949741  -40.34168621 7249.16670485  -35.88578819]
Reward: -1  Episode Reward:  10
xxxxx
x ..x
x.g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4299.82461107 14679.01404889]
------
Step:11, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 7785.01572635 8422.9722171 ]
New Q values:  [  37.74111519 -168.92307549 7785.01572635 5543.3388983 ]
Reward: -1  Episode Reward:  9
xxxxx
x ..x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1762.97949741  -40.34168621 7249.16670485  -35.88578819]
------
Step:12, Action:East
State  261
Old Q Values:  [1762.97949741  -40.34168621 7249.16670485  -35.88578819]
New Q values:  [1762.97949741  -40.34168621 7302.77089661  -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x.g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4299.82461107 14679.01404889]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4299.82461107 14679.01404889]
New Q values:  [-2527.46239811 -8521.23367799  4299.82461107  8061.83688854]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
x...x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1762.97949741  -40.34168621 7302.77089661  -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [1762.97949741  -40.34168621 7302.77089661  -35.88578819]
New Q values:  [ 737.24333704  -40.34168621 7302.77089661  -35.88578819]
Reward: 9  Episode Reward:  16
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   88.83846024    15.18059333 -1635.49101158  -180.6       ]
------
Step:15, Action:North
State  181
Old Q Values:  [   88.83846024    15.18059333 -1635.49101158  -180.6       ]
New Q values:  [  495.97481759    15.18059333 -1635.49101158  -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
xa..x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        1536.79811165    5.4           0.        ]
------
Step:16, Action:South
State  103
Old Q Values:  [-180.6        1536.79811165    5.4           0.        ]
New Q values:  [-180.6         762.91168994    5.4           0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  495.97481759    15.18059333 -1635.49101158  -180.6       ]
------
Step:17, Action:North
State  181
Old Q Values:  [  495.97481759    15.18059333 -1635.49101158  -180.6       ]
New Q values:  [  426.66343402    15.18059333 -1635.49101158  -180.6       ]
Reward: -1  Episode Reward:  13
xxxxx
xa..x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         762.91168994    5.4           0.        ]
------
Step:18, Action:South
State  103
Old Q Values:  [-180.6         762.91168994    5.4           0.        ]
New Q values:  [-180.6         432.56370618    5.4           0.        ]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  426.66343402    15.18059333 -1635.49101158  -180.6       ]
------
Step:19, Action:North
State  181
Old Q Values:  [  426.66343402    15.18059333 -1635.49101158  -180.6       ]
New Q values:  [  299.83448546    15.18059333 -1635.49101158  -180.6       ]
Reward: -1  Episode Reward:  11
xxxxx
xa..x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         432.56370618    5.4           0.        ]
------
Step:20, Action:South
State  109
Old Q Values:  [-241.10880094   77.80394586    3.88307055 -180.6       ]
New Q values:  [-241.10880094  120.47192398    3.88307055 -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x .gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  299.83448546    15.18059333 -1635.49101158  -180.6       ]
------
Step:21, Action:North
State  181
Old Q Values:  [  299.83448546    15.18059333 -1635.49101158  -180.6       ]
New Q values:  [  155.47537138    15.18059333 -1635.49101158  -180.6       ]
Reward: -1  Episode Reward:  9
xxxxx
xag.x
x ..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  120.47192398    3.88307055 -180.6       ]
------
Step:22, Action:South
State  103
Old Q Values:  [-180.6         432.56370618    5.4           0.        ]
New Q values:  [-180.6         219.06809389    5.4           0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  155.47537138    15.18059333 -1635.49101158  -180.6       ]
------
Step:23, Action:North
State  181
Old Q Values:  [  155.47537138    15.18059333 -1635.49101158  -180.6       ]
New Q values:  [   97.73172575    15.18059333 -1635.49101158  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
xag.x
x ..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  120.47192398    3.88307055 -180.6       ]
------
Step:24, Action:South
State  100
Old Q Values:  [  0.         407.67277794  -0.6          0.        ]
New Q values:  [ 0.00000000e+00  7.91118334e+02 -6.00000000e-01  0.00000000e+00]
Reward: -1  Episode Reward:  6
xxxxx
xg..x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -362.58585376   266.69721195  2095.49740839 -4966.32149798]
------
Step:25, Action:East
State  180
Old Q Values:  [ -362.58585376   266.69721195  2095.49740839 -4966.32149798]
New Q values:  [ -362.58585376   266.69721195  1632.0642715  -4966.32149798]
Reward: 9  Episode Reward:  15
xxxxx
x ..x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2628.2176938    174.55451539     0.        ]
------
Step:26, Action:South
State  196
Old Q Values:  [-2469.90645144  2628.2176938    174.55451539     0.        ]
New Q values:  [-2469.90645144  2409.29978535   174.55451539     0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        4528.70902612 2089.57618103]
------
Step:27, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 7785.01572635 5543.3388983 ]
New Q values:  [   37.74111519  -168.92307549 10682.90064228  5543.3388983 ]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1924.91621967 -9022.41491635 -7525.7277781  25231.64783913]
------
Step:28, Action:West
State  288
Old Q Values:  [ 1924.91621967 -9022.41491635 -7525.7277781  25231.64783913]
New Q values:  [ 1924.91621967 -9022.41491635 -7525.7277781  11110.01452709]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.88495199e+02 3.39318464e+03]
------
Step:29, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        4528.70902612 2089.57618103]
New Q values:  [  16.82637525 -180.6        4528.70902612 3026.06174139]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 737.24333704  -40.34168621 7302.77089661  -35.88578819]
------
Step:30, Action:North
State  261
Old Q Values:  [ 737.24333704  -40.34168621 7302.77089661  -35.88578819]
New Q values:  [ 323.61685254  -40.34168621 7302.77089661  -35.88578819]
Reward: -1  Episode Reward:  10
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   97.73172575    15.18059333 -1635.49101158  -180.6       ]
------
Step:31, Action:North
State  181
Old Q Values:  [   97.73172575    15.18059333 -1635.49101158  -180.6       ]
New Q values:  [   49.77765379    15.18059333 -1635.49101158  -180.6       ]
Reward: -1  Episode Reward:  9
xxxxx
xag.x
x  .x
x   x
xxxxx
Step:32, Action:South
State  103
Old Q Values:  [-180.6         219.06809389    5.4           0.        ]
New Q values:  [-180.6         101.96053369    5.4           0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   49.77765379    15.18059333 -1635.49101158  -180.6       ]
------
Step:33, Action:North
State  181
Old Q Values:  [   49.77765379    15.18059333 -1635.49101158  -180.6       ]
New Q values:  [   30.596025      15.18059333 -1635.49101158  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
xag.x
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[ 0.         37.61654496  0.          0.        ]
------
Step:34, Action:South
State  101
Old Q Values:  [ 0.         37.61654496  0.          0.        ]
New Q values:  [ 0.         23.62542549  0.          0.        ]
Reward: -1  Episode Reward:  6
xxxxx
x .gx
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   30.596025      15.18059333 -1635.49101158  -180.6       ]
------
Step:35, Action:North
State  181
Old Q Values:  [   30.596025      15.18059333 -1635.49101158  -180.6       ]
New Q values:  [   42.22657011    15.18059333 -1635.49101158  -180.6       ]
Reward: -1  Episode Reward:  5
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         101.96053369    5.4           0.        ]
------
Step:36, Action:South
State  101
Old Q Values:  [ 0.         23.62542549  0.          0.        ]
New Q values:  [ 0.         21.51814123  0.          0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x .gx
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   42.22657011    15.18059333 -1635.49101158  -180.6       ]
------
Step:37, Action:North
State  181
Old Q Values:  [   42.22657011    15.18059333 -1635.49101158  -180.6       ]
New Q values:  [   22.74607041    15.18059333 -1635.49101158  -180.6       ]
Reward: -1  Episode Reward:  3
xxxxx
xag.x
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[ 0.         21.51814123  0.          0.        ]
------
Step:38, Action:South
State  101
Old Q Values:  [ 0.         21.51814123  0.          0.        ]
New Q values:  [ 0.         14.83107761  0.          0.        ]
Reward: -1  Episode Reward:  2
xxxxx
x .gx
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   22.74607041    15.18059333 -1635.49101158  -180.6       ]
------
Step:39, Action:North
State  181
Old Q Values:  [   22.74607041    15.18059333 -1635.49101158  -180.6       ]
New Q values:  [   39.08658827    15.18059333 -1635.49101158  -180.6       ]
Reward: -1  Episode Reward:  1
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         101.96053369    5.4           0.        ]
------
Step:40, Action:South
State  103
Old Q Values:  [-180.6         101.96053369    5.4           0.        ]
New Q values:  [-180.6        1527.47819815    5.4           0.        ]
Reward: -1  Episode Reward:  0
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 225.17846117 1654.71634746 4957.64661559    0.        ]
------
Step:41, Action:East
State  181
Old Q Values:  [   39.08658827    15.18059333 -1635.49101158  -180.6       ]
New Q values:  [  39.08658827   15.18059333 1456.28433806 -180.6       ]
Reward: -1  Episode Reward:  -1
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  7036.93580895 -3909.58186816  1909.41710691]
------
Step:42, Action:South
State  199
Old Q Values:  [  22.48535485 1061.36081685  549.89931413  753.62201984]
New Q values:  [  22.48535485 1441.89971818  549.89931413  753.62201984]
Reward: -1  Episode Reward:  -2
xxxxx
x ..x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.88495199e+02 3.39318464e+03]
------
Step:43, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 10682.90064228  5543.3388983 ]
New Q values:  [   37.74111519  -168.92307549 10682.90064228  4407.5668283 ]
Reward: -1  Episode Reward:  -3
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 323.61685254  -40.34168621 7302.77089661  -35.88578819]
------
Step:44, Action:East
State  261
Old Q Values:  [ 323.61685254  -40.34168621 7302.77089661  -35.88578819]
New Q values:  [ 323.61685254  -40.34168621 4279.12106648  -35.88578819]
Reward: -1  Episode Reward:  -4
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        4528.70902612 3026.06174139]
------
Step:45, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        4528.70902612 3026.06174139]
New Q values:  [  16.82637525 -180.6        5143.88796857 3026.06174139]
Reward: -1  Episode Reward:  -5
xxxxx
x g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1924.91621967 -9022.41491635 -7525.7277781  11110.01452709]
------
Step:46, Action:West
State  288
Old Q Values:  [ 1924.91621967 -9022.41491635 -7525.7277781  11110.01452709]
New Q values:  [ 1924.91621967 -9022.41491635 -7525.7277781   5986.57220141]
Reward: -1  Episode Reward:  -6
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        5143.88796857 3026.06174139]
------
Step:47, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        5143.88796857 3026.06174139]
New Q values:  [  16.82637525 -180.6        3852.92684785 3026.06174139]
Reward: -1  Episode Reward:  -7
xxxxx
x ..x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1924.91621967 -9022.41491635 -7525.7277781   5986.57220141]
------
Step:48, Action:West
State  288
Old Q Values:  [ 1924.91621967 -9022.41491635 -7525.7277781   5986.57220141]
New Q values:  [ 1924.91621967 -9022.41491635 -7525.7277781   3549.90693492]
Reward: -1  Episode Reward:  -8
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        3852.92684785 3026.06174139]
------
Step:49, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        3852.92684785 3026.06174139]
New Q values:  [  16.82637525 -180.6        2605.54281962 3026.06174139]
Reward: -1  Episode Reward:  -9
xxxxx
x ..x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1924.91621967 -9022.41491635 -7525.7277781   3549.90693492]
------
Step:50, Action:West
State  288
Old Q Values:  [ 1924.91621967 -9022.41491635 -7525.7277781   3549.90693492]
New Q values:  [ 1924.91621967 -9022.41491635 -7525.7277781   2327.18129639]
Reward: -1  Episode Reward:  -10
xxxxx
x ..x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        2605.54281962 3026.06174139]
------
Step:51, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        2605.54281962 3026.06174139]
New Q values:  [  16.82637525 -180.6        1739.77151676 3026.06174139]
Reward: -1  Episode Reward:  -11
xxxxx
x ..x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1924.91621967 -9022.41491635 -7525.7277781   2327.18129639]
------
Step:52, Action:West
State  288
Old Q Values:  [ 1924.91621967 -9022.41491635 -7525.7277781   2327.18129639]
New Q values:  [ 1924.91621967 -9022.41491635 -7525.7277781   1838.09104097]
Reward: -1  Episode Reward:  -12
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1739.77151676 3026.06174139]
------
Step:53, Action:West
State  276
Old Q Values:  [  16.82637525 -180.6        1739.77151676 3026.06174139]
New Q values:  [  16.82637525 -180.6        1739.77151676 2493.5610165 ]
Reward: -1  Episode Reward:  -13
xxxxx
x ..x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 323.61685254  -40.34168621 4279.12106648  -35.88578819]
------
Step:54, Action:North
State  260
Old Q Values:  [  565.18716423 -8695.4397473   2483.22576163 -2601.74710518]
New Q values:  [  499.41871742 -8695.4397473   2483.22576163 -2601.74710518]
Reward: -1  Episode Reward:  -14
xxxxx
x ..x
xa .x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534   913.14617244     0.        ]
------
Step:55, Action:East
State  180
Old Q Values:  [ -362.58585376   266.69721195  1632.0642715  -4966.32149798]
New Q values:  [ -362.58585376   266.69721195  1375.0156442  -4966.32149798]
Reward: -1  Episode Reward:  -15
xxxxx
x ..x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2409.29978535   174.55451539     0.        ]
------
Step:56, Action:South
State  198
Old Q Values:  [-2.78872080e-01  4.42789209e+03  5.59303454e+01  0.00000000e+00]
New Q values:  [-2.78872080e-01  2.51862514e+03  5.59303454e+01  0.00000000e+00]
Reward: -1  Episode Reward:  -16
xxxxx
x ..x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525 -180.6        1739.77151676 2493.5610165 ]
------
Step:57, Action:East
State  276
Old Q Values:  [  16.82637525 -180.6        1739.77151676 2493.5610165 ]
New Q values:  [  16.82637525 -180.6        1272.7834726  2493.5610165 ]
Reward: -1  Episode Reward:  -17
xxxxx
x ..x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1924.91621967 -9022.41491635 -7525.7277781   1838.09104097]
------
Step:58, Action:North
State  288
Old Q Values:  [ 1924.91621967 -9022.41491635 -7525.7277781   1838.09104097]
New Q values:  [ 5064.93669518 -9022.41491635 -7525.7277781   1838.09104097]
Reward: 9  Episode Reward:  -8
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.42985674e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:59, Action:North
State  208
Old Q Values:  [99169.5021862   5674.0094124    535.33196404 -3385.12952694]
New Q values:  [75907.48801418  5674.0094124    535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  1
xxxxx
x .ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 30127.30102033   8923.38728765   -180.00807518 120780.95713233]
------
Step:60, Action:West
State  128
Old Q Values:  [ 8775.70846068 28703.01188472 -8652.84       35423.73909831]
New Q values:  [ 8775.70846068 28703.01188472 -8652.84       74254.29710885]
Reward: 100009  Episode Reward:  100010
xxxxx
x agx
x   x
x   x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   192.85677349    57.72647054]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     46.59275292   139.42097217]
New Q values:  [ -281.736      -9545.4473624     48.93974992   139.42097217]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -1302.81684709  -180.6           83.00882918]
------
Step:2, Action:West
State  138
Old Q Values:  [ -180.6        -1302.81684709  -180.6           83.00882918]
New Q values:  [ -180.6        -1302.81684709  -180.6           74.42982332]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     48.93974992   139.42097217]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     48.93974992   139.42097217]
New Q values:  [ -281.736      -9545.4473624     48.93974992    75.69113781]
Reward: 9  Episode Reward:  17
xxxxx
xa  x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   48.40916313 -252.78192178]
------
Step:4, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   48.40916313 -252.78192178]
New Q values:  [-252.35169558    7.11267516   41.4710066  -252.78192178]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     48.93974992    75.69113781]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349    57.72647054]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349    34.93189019]
Reward: -1  Episode Reward:  15
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   41.4710066  -252.78192178]
------
Step:6, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   41.4710066  -252.78192178]
New Q values:  [-252.35169558    7.11267516   38.69574398 -252.78192178]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     48.93974992    75.69113781]
------
Step:7, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349    34.93189019]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349    24.98147927]
Reward: -1  Episode Reward:  13
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   38.69574398 -252.78192178]
------
Step:8, Action:East
State  109
Old Q Values:  [-241.10880094  120.47192398    3.88307055 -180.6       ]
New Q values:  [-241.10880094  120.47192398   23.93057901 -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
x agx
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7754.60539249    76.59116932]
------
Step:9, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349    24.98147927]
New Q values:  [ -253.44886264 -1902.20915811   192.85677349    21.0013149 ]
Reward: -1  Episode Reward:  11
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   38.69574398 -252.78192178]
------
Step:10, Action:East
State  109
Old Q Values:  [-241.10880094  120.47192398   23.93057901 -180.6       ]
New Q values:  [-241.10880094  120.47192398   31.9495824  -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x agx
x...x
x.. x
xxxxx
Step:11, Action:North
State  120
Old Q Values:  [-1.01561177e+04 -8.06905606e+03  2.16160616e+03  7.45350613e+00]
New Q values:  [-9.59456524e+03 -8.06905606e+03  2.16160616e+03  7.45350613e+00]
Reward: -10301  Episode Reward:  -10291
xxxxx
x g x
x...x
x.. x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 225.17846117 1654.71634746 4957.64661559    0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [ 225.17846117 1654.71634746 4957.64661559    0.        ]
New Q values:  [ 225.17846117 1654.71634746 6054.79746094    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.97854041e+03  1.35544627e+04  1.20371620e+03]
------
Step:2, Action:East
State  195
Old Q Values:  [   38.85388605 14927.26725394  6267.88141429  2546.60363946]
New Q values:  [   38.85388605 14927.26725394  6802.12277303  2546.60363946]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
x..gx
xxxxx
Step:3, Action:North
State  208
Old Q Values:  [75907.48801418  5674.0094124    535.33196404 -3385.12952694]
New Q values:  [66602.68234537  5674.0094124    535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 30127.30102033   8923.38728765   -180.00807518 120780.95713233]
------
Step:4, Action:West
State  136
Old Q Values:  [ 1117.30787879  4440.05027424 -6245.61866138  1514.62365225]
New Q values:  [ 1117.30787879  4440.05027424 -6245.61866138   634.22681169]
Reward: 9  Episode Reward:  36
xxxxx
x.agx
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7754.60539249    76.59116932]
------
Step:5, Action:West
State  120
Old Q Values:  [-9.59456524e+03 -8.06905606e+03  2.16160616e+03  7.45350613e+00]
New Q values:  [-9594.56523706 -8069.05606225  2161.60616063    10.39836289]
Reward: 9  Episode Reward:  45
xxxxx
xag x
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6           6.72320144 -904.87287133    0.        ]
------
Step:6, Action:South
State  104
Old Q Values:  [-8652.84         817.90648014  1954.59090238 -8652.84      ]
New Q values:  [-8652.84         853.17632684  1954.59090238 -8652.84      ]
Reward: -1  Episode Reward:  44
xxxxx
xg  x
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[-2081.09028721     0.          1755.37911594     0.        ]
------
Step:7, Action:East
State  184
Old Q Values:  [-2081.09028721     0.          1755.37911594     0.        ]
New Q values:  [-2081.09028721     0.          1350.44555947     0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xga x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1206.93052108 2162.97971031  181.20343395]
------
Step:8, Action:East
State  202
Old Q Values:  [    0.         -5884.35407458   900.84848298     0.        ]
New Q values:  [    0.         -5884.35407458  1298.13188427     0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  80.51542067 3127.97497025    0.          429.03841886]
------
Step:9, Action:South
State  216
Old Q Values:  [ 1.00806034e+03  2.32505964e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 1.00806034e+03  2.44890487e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5064.93669518 -9022.41491635 -7525.7277781   1838.09104097]
------
Step:10, Action:North
State  288
Old Q Values:  [ 5064.93669518 -9022.41491635 -7525.7277781   1838.09104097]
New Q values:  [ 2963.76716915 -9022.41491635 -7525.7277781   1838.09104097]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  80.51542067 3127.97497025    0.          429.03841886]
------
Step:11, Action:South
State  218
Old Q Values:  [  80.51542067 3127.97497025    0.          429.03841886]
New Q values:  [  80.51542067 2139.72013884    0.          429.03841886]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2963.76716915 -9022.41491635 -7525.7277781   1838.09104097]
------
Step:12, Action:North
State  288
Old Q Values:  [ 2963.76716915 -9022.41491635 -7525.7277781   1838.09104097]
New Q values:  [ 1826.82290931 -9022.41491635 -7525.7277781   1838.09104097]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  80.51542067 2139.72013884    0.          429.03841886]
------
Step:13, Action:South
State  216
Old Q Values:  [ 1.00806034e+03  2.44890487e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 1.00806034e+03  1.53038926e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1826.82290931 -9022.41491635 -7525.7277781   1838.09104097]
------
Step:14, Action:West
State  288
Old Q Values:  [ 1826.82290931 -9022.41491635 -7525.7277781   1838.09104097]
New Q values:  [ 1826.82290931 -9022.41491635 -7525.7277781   3159.18748295]
Reward: 9  Episode Reward:  46
xxxxx
x g x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4299.82461107  8061.83688854]
------
Step:15, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 10682.90064228  4407.5668283 ]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  1.06829006e+04  7.60480400e+04]
Reward: 100009  Episode Reward:  100055
xxxxx
x  gx
x   x
xa  x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  261.39524518   80.55298927 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6        1527.47819815    5.4           0.        ]
New Q values:  [-180.6        2432.83051754    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 225.17846117 1654.71634746 6054.79746094    0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 225.17846117 1654.71634746 6054.79746094    0.        ]
New Q values:  [ 225.17846117 1654.71634746 2859.88889983    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  22.48535485 1441.89971818  549.89931413  753.62201984]
------
Step:3, Action:South
State  198
Old Q Values:  [-2.78872080e-01  2.51862514e+03  5.59303454e+01  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.56859888e+03  5.59303454e+01  0.00000000e+00]
Reward: -9991  Episode Reward:  -9973
xxxxx
x ..x
x  .x
x.g x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  39.08658827   15.18059333 1456.28433806 -180.6       ]
------
Step:1, Action:East
State  181
Old Q Values:  [  39.08658827   15.18059333 1456.28433806 -180.6       ]
New Q values:  [  39.08658827   15.18059333 1299.42295117 -180.6       ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x g x
x...x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[66602.68234537  5674.0094124    535.33196404 -3385.12952694]
------
Step:1, Action:North
State  216
Old Q Values:  [ 1.00806034e+03  1.53038926e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 4.30953082e+02  1.53038926e+03 -6.17035694e+03  3.96578640e+00]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -1302.81684709  -180.6           74.42982332]
------
Step:2, Action:West
State  138
Old Q Values:  [ -180.6        -1302.81684709  -180.6           74.42982332]
New Q values:  [ -180.6        -1302.81684709  -180.6           57.87927067]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     48.93974992    75.69113781]
------
Step:3, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2161.60616063    10.39836289]
New Q values:  [-9594.56523706 -8069.05606225  2161.60616063 -5395.70658642]
Reward: -9991  Episode Reward:  -9973
xxxxx
xg  x
x . x
x...x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2161.60616063 -5395.70658642]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2161.60616063 -5395.70658642]
New Q values:  [-9594.56523706 -8069.05606225  2202.05754652 -5395.70658642]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  4440.05027424 -6245.61866138   634.22681169]
------
Step:2, Action:South
State  136
Old Q Values:  [ 1117.30787879  4440.05027424 -6245.61866138   634.22681169]
New Q values:  [ 1117.30787879  2240.5368872  -6245.61866138   634.22681169]
Reward: 9  Episode Reward:  18
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4.30953082e+02  1.53038926e+03 -6.17035694e+03  3.96578640e+00]
------
Step:3, Action:South
State  216
Old Q Values:  [ 4.30953082e+02  1.53038926e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 4.30953082e+02  1.56531195e+03 -6.17035694e+03  3.96578640e+00]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1826.82290931 -9022.41491635 -7525.7277781   3159.18748295]
------
Step:4, Action:West
State  288
Old Q Values:  [ 1826.82290931 -9022.41491635 -7525.7277781   3159.18748295]
New Q values:  [ 1826.82290931 -9022.41491635 -7525.7277781   3687.62605974]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4299.82461107  8061.83688854]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4299.82461107  8061.83688854]
New Q values:  [-2527.46239811 -8521.23367799  4299.82461107 17509.74799096]
Reward: 9  Episode Reward:  45
xxxxx
x g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[47598.71078516  2256.66526474  4886.53245704  1875.31501677]
------
Step:6, Action:North
State  260
Old Q Values:  [  499.41871742 -8695.4397473   2483.22576163 -2601.74710518]
New Q values:  [  617.67218023 -8695.4397473   2483.22576163 -2601.74710518]
Reward: 9  Episode Reward:  54
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -362.58585376   266.69721195  1375.0156442  -4966.32149798]
------
Step:7, Action:East
State  177
Old Q Values:  [66823.71809805  4025.17604709 99152.50610081     0.        ]
New Q values:  [ 66823.71809805   4025.17604709 106377.91165627      0.        ]
Reward: 100009  Episode Reward:  100063
xxxxx
x g x
x a x
x   x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -1302.81684709  -180.6           57.87927067]
------
Step:1, Action:West
State  136
Old Q Values:  [ 1117.30787879  2240.5368872  -6245.61866138   634.22681169]
New Q values:  [ 1117.30787879  2240.5368872  -6245.61866138 -5080.29201136]
Reward: -9991  Episode Reward:  -9991
xxxxx
x.g x
x. .x
x...x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  368.88319155   47.2164064  -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869  261.39524518   80.55298927 -120.29354603]
New Q values:  [-177.44732869  967.92476802   80.55298927 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 225.17846117 1654.71634746 2859.88889983    0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [  39.08658827   15.18059333 1299.42295117 -180.6       ]
New Q values:  [   39.08658827    15.18059333 -4752.04088393  -180.6       ]
Reward: -9991  Episode Reward:  -9982
xxxxx
x ..x
x g.x
x ..x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  7036.93580895 -3909.58186816  1909.41710691]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831  7036.93580895 -3909.58186816  1909.41710691]
New Q values:  [-5922.26708831 25634.58631364 -3909.58186816  1909.41710691]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x. gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  1.06829006e+04  7.60480400e+04]
------
Step:2, Action:West
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  1.06829006e+04  7.60480400e+04]
New Q values:  [   37.74111519  -168.92307549 10682.90064228 31708.35230669]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 323.61685254  -40.34168621 4279.12106648  -35.88578819]
------
Step:3, Action:East
State  257
Old Q Values:  [47598.71078516  2256.66526474  4886.53245704  1875.31501677]
New Q values:  [47598.71078516  2256.66526474  7206.9373801   1875.31501677]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4299.82461107 17509.74799096]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4299.82461107 17509.74799096]
New Q values:  [-2527.46239811 -8521.23367799  4299.82461107  8287.03551633]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 323.61685254  -40.34168621 4279.12106648  -35.88578819]
------
Step:5, Action:East
State  257
Old Q Values:  [47598.71078516  2256.66526474  7206.9373801   1875.31501677]
New Q values:  [47598.71078516  2256.66526474 12394.68064405  1875.31501677]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x. gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 10682.90064228 31708.35230669]
------
Step:6, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 10682.90064228 31708.35230669]
New Q values:  [   37.74111519  -168.92307549 10682.90064228 26962.35415822]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x. .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[47598.71078516  2256.66526474 12394.68064405  1875.31501677]
------
Step:7, Action:North
State  261
Old Q Values:  [ 323.61685254  -40.34168621 4279.12106648  -35.88578819]
New Q values:  [32048.2202379    -40.34168621  4279.12106648   -35.88578819]
Reward: 9  Episode Reward:  23
xxxxx
x.. x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[ 66823.71809805   4025.17604709 106377.91165627      0.        ]
------
Step:8, Action:East
State  183
Old Q Values:  [ 225.17846117 1654.71634746 2859.88889983    0.        ]
New Q values:  [ 225.17846117 1654.71634746 5621.53573611    0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 14927.26725394  6802.12277303  2546.60363946]
------
Step:9, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -7.97854041e+03  1.35544627e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -6.70590551e+03  1.35544627e+04  1.20371620e+03]
Reward: -10001  Episode Reward:  -9979
xxxxx
x.. x
x  .x
x g.x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   39.08658827    15.18059333 -4752.04088393  -180.6       ]
------
Step:1, Action:North
State  183
Old Q Values:  [ 225.17846117 1654.71634746 5621.53573611    0.        ]
New Q values:  [ 825.32053973 1654.71634746 5621.53573611    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        2432.83051754    5.4           0.        ]
------
Step:2, Action:South
State  103
Old Q Values:  [-180.6        2432.83051754    5.4           0.        ]
New Q values:  [-180.6        2658.99292785    5.4           0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xa. x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 825.32053973 1654.71634746 5621.53573611    0.        ]
------
Step:3, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534   913.14617244     0.        ]
New Q values:  [    0.         -5969.29177534  4436.99728368     0.        ]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -6.70590551e+03  1.35544627e+04  1.20371620e+03]
------
Step:4, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -6.70590551e+03  1.35544627e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -6.70590551e+03  9.71075529e+03  1.20371620e+03]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.42985674e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:5, Action:North
State  216
Old Q Values:  [ 4.30953082e+02  1.56531195e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 1.95145014e+02  1.56531195e+03 -6.17035694e+03  3.96578640e+00]
Reward: 9  Episode Reward:  25
xxxxx
x .ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -1302.81684709  -180.6           57.87927067]
------
Step:6, Action:West
State  138
Old Q Values:  [ -180.6        -1302.81684709  -180.6           57.87927067]
New Q values:  [ -180.6        -1302.81684709  -180.6           86.40874032]
Reward: 9  Episode Reward:  34
xxxxx
x a x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   192.85677349    21.0013149 ]
------
Step:7, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   192.85677349    21.0013149 ]
New Q values:  [ -253.44886264 -1902.20915811   102.46533149    21.0013149 ]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -1302.81684709  -180.6           86.40874032]
------
Step:8, Action:West
State  138
Old Q Values:  [ -180.6        -1302.81684709  -180.6           86.40874032]
New Q values:  [ -180.6        -1302.81684709  -180.6           64.70309557]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   102.46533149    21.0013149 ]
------
Step:9, Action:East
State  121
Old Q Values:  [    0.             0.         -7754.60539249    76.59116932]
New Q values:  [    0.             0.         -8430.28109083    76.59116932]
Reward: -10001  Episode Reward:  -9969
xxxxx
x  gx
x   x
x...x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   39.08658827    15.18059333 -4752.04088393  -180.6       ]
------
Step:1, Action:North
State  181
Old Q Values:  [   39.08658827    15.18059333 -4752.04088393  -180.6       ]
New Q values:  [  311.41206572    15.18059333 -4752.04088393  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  967.92476802   80.55298927 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  967.92476802   80.55298927 -120.29354603]
New Q values:  [-177.44732869 2073.03062804   80.55298927 -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x  .x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 825.32053973 1654.71634746 5621.53573611    0.        ]
------
Step:3, Action:East
State  181
Old Q Values:  [  311.41206572    15.18059333 -4752.04088393  -180.6       ]
New Q values:  [  311.41206572    15.18059333 -7172.62641796  -180.6       ]
Reward: -9991  Episode Reward:  -9983
xxxxx
x  .x
x g.x
x...x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -1302.81684709  -180.6           64.70309557]
------
Step:1, Action:West
State  138
Old Q Values:  [ -180.6        -1302.81684709  -180.6           64.70309557]
New Q values:  [ -180.6        -1302.81684709  -180.6           62.02083768]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   102.46533149    21.0013149 ]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     48.93974992    75.69113781]
New Q values:  [ -281.736      -9545.4473624     37.58215127    75.69113781]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -1302.81684709  -180.6           62.02083768]
------
Step:3, Action:West
State  138
Old Q Values:  [ -180.6        -1302.81684709  -180.6           62.02083768]
New Q values:  [ -180.6        -1302.81684709  -180.6           54.94793452]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   102.46533149    21.0013149 ]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     37.58215127    75.69113781]
New Q values:  [ -281.736      -9545.4473624     30.91724086    75.69113781]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -1302.81684709  -180.6           54.94793452]
------
Step:5, Action:West
State  138
Old Q Values:  [ -180.6        -1302.81684709  -180.6           54.94793452]
New Q values:  [ -180.6        -1302.81684709  -180.6           52.11877326]
Reward: -1  Episode Reward:  5
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   102.46533149    21.0013149 ]
------
Step:6, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   102.46533149    21.0013149 ]
New Q values:  [ -253.44886264 -1902.20915811    56.02176457    21.0013149 ]
Reward: -1  Episode Reward:  4
xxxxx
x. ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        -1302.81684709  -180.6           52.11877326]
------
Step:7, Action:West
State  138
Old Q Values:  [ -180.6        -1302.81684709  -180.6           52.11877326]
New Q values:  [ -180.6        -1302.81684709  -180.6           42.95485065]
Reward: -1  Episode Reward:  3
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.91724086    75.69113781]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     30.91724086    75.69113781]
New Q values:  [ -281.736      -9545.4473624     30.91724086   146.34141259]
Reward: 9  Episode Reward:  12
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  368.88319155   47.2164064  -180.6       ]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   38.69574398 -252.78192178]
New Q values:  [-252.35169558    7.11267516   58.78072137 -252.78192178]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.91724086   146.34141259]
------
Step:10, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2202.05754652 -5395.70658642]
New Q values:  [-9594.56523706 -8069.05606225  2202.05754652 -2122.74105737]
Reward: -1  Episode Reward:  10
xxxxx
xag x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  120.47192398   31.9495824  -180.6       ]
------
Step:11, Action:South
State  109
Old Q Values:  [-241.10880094  120.47192398   31.9495824  -180.6       ]
New Q values:  [-241.10880094  574.60604445   31.9495824  -180.6       ]
Reward: 9  Episode Reward:  19
xxxxx
x  gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294 1736.72424953 -919.83175779 -244.98066897]
------
Step:12, Action:South
State  189
Old Q Values:  [   9.84673294 1736.72424953 -919.83175779 -244.98066897]
New Q values:  [ 9.84673294e+00  1.03145558e+04 -9.19831758e+02 -2.44980669e+02]
Reward: 9  Episode Reward:  28
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[32048.2202379    -40.34168621  4279.12106648   -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [32048.2202379    -40.34168621  4279.12106648   -35.88578819]
New Q values:  [15913.05482651   -40.34168621  4279.12106648   -35.88578819]
Reward: -1  Episode Reward:  27
xxxxx
x  gx
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 9.84673294e+00  1.03145558e+04 -9.19831758e+02 -2.44980669e+02]
------
Step:14, Action:South
State  189
Old Q Values:  [ 9.84673294e+00  1.03145558e+04 -9.19831758e+02 -2.44980669e+02]
New Q values:  [   9.84673294 8899.13875643 -919.83175779 -244.98066897]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[15913.05482651   -40.34168621  4279.12106648   -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [15913.05482651   -40.34168621  4279.12106648   -35.88578819]
New Q values:  [9034.36355753  -40.34168621 4279.12106648  -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294 8899.13875643 -919.83175779 -244.98066897]
------
Step:16, Action:South
State  189
Old Q Values:  [   9.84673294 8899.13875643 -919.83175779 -244.98066897]
New Q values:  [   9.84673294 6269.36456983 -919.83175779 -244.98066897]
Reward: -1  Episode Reward:  24
xxxxx
x g x
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[9034.36355753  -40.34168621 4279.12106648  -35.88578819]
------
Step:17, Action:North
State  261
Old Q Values:  [9034.36355753  -40.34168621 4279.12106648  -35.88578819]
New Q values:  [5493.95479396  -40.34168621 4279.12106648  -35.88578819]
Reward: -1  Episode Reward:  23
xxxxx
x  gx
xa. x
x ..x
xxxxx
Step:18, Action:West
State  189
Old Q Values:  [   9.84673294 6269.36456983 -919.83175779 -244.98066897]
New Q values:  [   9.84673294 6269.36456983 -919.83175779 1602.21710336]
Reward: -301  Episode Reward:  -278
xxxxx
x   x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294 6269.36456983 -919.83175779 1602.21710336]
------
Step:19, Action:South
State  189
Old Q Values:  [   9.84673294 6269.36456983 -919.83175779 1602.21710336]
New Q values:  [   9.84673294 4155.33226612 -919.83175779 1602.21710336]
Reward: -1  Episode Reward:  -279
xxxxx
x   x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5493.95479396  -40.34168621 4279.12106648  -35.88578819]
------
Step:20, Action:North
State  261
Old Q Values:  [5493.95479396  -40.34168621 4279.12106648  -35.88578819]
New Q values:  [3883.44263842  -40.34168621 4279.12106648  -35.88578819]
Reward: -1  Episode Reward:  -280
xxxxx
x   x
xa. x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 825.32053973 1654.71634746 5621.53573611    0.        ]
------
Step:21, Action:East
State  189
Old Q Values:  [   9.84673294 4155.33226612 -919.83175779 1602.21710336]
New Q values:  [    9.84673294  4155.33226612 -5713.63879002  1602.21710336]
Reward: -9991  Episode Reward:  -10271
xxxxx
x   x
x g x
x ..x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  311.41206572    15.18059333 -7172.62641796  -180.6       ]
------
Step:1, Action:North
State  181
Old Q Values:  [  311.41206572    15.18059333 -7172.62641796  -180.6       ]
New Q values:  [  751.8740147     15.18059333 -7172.62641796  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
xa. x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2073.03062804   80.55298927 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 2073.03062804   80.55298927 -120.29354603]
New Q values:  [-177.44732869 1054.17445563   80.55298927 -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x . x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  751.8740147     15.18059333 -7172.62641796  -180.6       ]
------
Step:3, Action:North
State  183
Old Q Values:  [ 825.32053973 1654.71634746 5621.53573611    0.        ]
New Q values:  [ 645.78055258 1654.71634746 5621.53573611    0.        ]
Reward: -1  Episode Reward:  7
xxxxx
xa. x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1054.17445563   80.55298927 -120.29354603]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 1054.17445563   80.55298927 -120.29354603]
New Q values:  [-177.44732869 2107.53050309   80.55298927 -120.29354603]
Reward: -1  Episode Reward:  6
xxxxx
x . x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 645.78055258 1654.71634746 5621.53573611    0.        ]
------
Step:5, Action:East
State  189
Old Q Values:  [    9.84673294  4155.33226612 -5713.63879002  1602.21710336]
New Q values:  [    9.84673294  4155.33226612 -1189.2802612   1602.21710336]
Reward: 9  Episode Reward:  15
xxxxx
x . x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
------
Step:6, Action:South
State  195
Old Q Values:  [   38.85388605 14927.26725394  6802.12277303  2546.60363946]
New Q values:  [   38.85388605 14065.01314904  6802.12277303  2546.60363946]
Reward: 9  Episode Reward:  24
xxxxx
x . x
x  .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 10682.90064228 26962.35415822]
------
Step:7, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 10682.90064228 26962.35415822]
New Q values:  [   37.74111519  -168.92307549 10682.90064228 12074.07798323]
Reward: 9  Episode Reward:  33
xxxxx
x . x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3883.44263842  -40.34168621 4279.12106648  -35.88578819]
------
Step:8, Action:East
State  261
Old Q Values:  [3883.44263842  -40.34168621 4279.12106648  -35.88578819]
New Q values:  [3883.44263842  -40.34168621 2729.00381803  -35.88578819]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
x  .x
x a.x
xxxxx
Step:9, Action:South
State  276
Old Q Values:  [  16.82637525 -180.6        1272.7834726  2493.5610165 ]
New Q values:  [  16.82637525  495.22830495 1272.7834726  2493.5610165 ]
Reward: -301  Episode Reward:  -269
xxxxx
x g x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1272.7834726  2493.5610165 ]
------
Step:10, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1272.7834726  2493.5610165 ]
New Q values:  [  16.82637525  495.22830495 1272.7834726  1741.79213509]
Reward: -1  Episode Reward:  -270
xxxxx
xg. x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  617.67218023 -8695.4397473   2483.22576163 -2601.74710518]
------
Step:11, Action:East
State  261
Old Q Values:  [3883.44263842  -40.34168621 2729.00381803  -35.88578819]
New Q values:  [3883.44263842  -40.34168621 1613.53916774  -35.88578819]
Reward: -1  Episode Reward:  -271
xxxxx
x g x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1272.7834726  1741.79213509]
------
Step:12, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1272.7834726  1741.79213509]
New Q values:  [  16.82637525  495.22830495 1272.7834726  1441.08458252]
Reward: -1  Episode Reward:  -272
xxxxx
xg. x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  617.67218023 -8695.4397473   2483.22576163 -2601.74710518]
------
Step:13, Action:East
State  261
Old Q Values:  [3883.44263842  -40.34168621 1613.53916774  -35.88578819]
New Q values:  [3883.44263842  -40.34168621 1077.14104185  -35.88578819]
Reward: -1  Episode Reward:  -273
xxxxx
x g x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1272.7834726  1441.08458252]
------
Step:14, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1272.7834726  1441.08458252]
New Q values:  [  16.82637525  495.22830495 1272.7834726  1740.86662454]
Reward: -1  Episode Reward:  -274
xxxxx
x . x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3883.44263842  -40.34168621 1077.14104185  -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [3883.44263842  -40.34168621 1077.14104185  -35.88578819]
New Q values:  [1778.33925978  -40.34168621 1077.14104185  -35.88578819]
Reward: -1  Episode Reward:  -275
xxxxx
x . x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  751.8740147     15.18059333 -7172.62641796  -180.6       ]
------
Step:16, Action:North
State  181
Old Q Values:  [  751.8740147     15.18059333 -7172.62641796  -180.6       ]
New Q values:  [  472.53141922    15.18059333 -7172.62641796  -180.6       ]
Reward: -1  Episode Reward:  -276
xxxxx
xa.gx
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  574.60604445   31.9495824  -180.6       ]
------
Step:17, Action:South
State  109
Old Q Values:  [-241.10880094  574.60604445   31.9495824  -180.6       ]
New Q values:  [-241.10880094  371.00184355   31.9495824  -180.6       ]
Reward: -1  Episode Reward:  -277
xxxxx
x g x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  472.53141922    15.18059333 -7172.62641796  -180.6       ]
------
Step:18, Action:North
State  181
Old Q Values:  [  472.53141922    15.18059333 -7172.62641796  -180.6       ]
New Q values:  [  299.71312075    15.18059333 -7172.62641796  -180.6       ]
Reward: -1  Episode Reward:  -278
xxxxx
xa.gx
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  371.00184355   31.9495824  -180.6       ]
------
Step:19, Action:South
State  109
Old Q Values:  [-241.10880094  371.00184355   31.9495824  -180.6       ]
New Q values:  [-241.10880094  237.71467364   31.9495824  -180.6       ]
Reward: -1  Episode Reward:  -279
xxxxx
x g x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  299.71312075    15.18059333 -7172.62641796  -180.6       ]
------
Step:20, Action:North
State  180
Old Q Values:  [ -362.58585376   266.69721195  1375.0156442  -4966.32149798]
New Q values:  [-5550.90027307   266.69721195  1375.0156442  -4966.32149798]
Reward: -10001  Episode Reward:  -10280
xxxxx
xg. x
x  .x
x  .x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  299.71312075    15.18059333 -7172.62641796  -180.6       ]
------
Step:1, Action:North
State  181
Old Q Values:  [  299.71312075    15.18059333 -7172.62641796  -180.6       ]
New Q values:  [  757.54439923    15.18059333 -7172.62641796  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
xa. x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2107.53050309   80.55298927 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 2107.53050309   80.55298927 -120.29354603]
New Q values:  [-177.44732869 2528.87292207   80.55298927 -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x . x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 645.78055258 1654.71634746 5621.53573611    0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [ 645.78055258 1654.71634746 5621.53573611    0.        ]
New Q values:  [ 645.78055258 1654.71634746 5167.24088252    0.        ]
Reward: 9  Episode Reward:  17
xxxxx
x . x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -6.70590551e+03  9.71075529e+03  1.20371620e+03]
------
Step:4, Action:East
State  200
Old Q Values:  [  62.8218634  1206.93052108 2162.97971031  181.20343395]
New Q values:  [  62.8218634  1206.93052108 1340.18546859  181.20343395]
Reward: 9  Episode Reward:  26
xxxxx
x . x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.95145014e+02  1.56531195e+03 -6.17035694e+03  3.96578640e+00]
------
Step:5, Action:South
State  216
Old Q Values:  [ 1.95145014e+02  1.56531195e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 1.95145014e+02  1.73781260e+03 -6.17035694e+03  3.96578640e+00]
Reward: 9  Episode Reward:  35
xxxxx
x g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1826.82290931 -9022.41491635 -7525.7277781   3687.62605974]
------
Step:6, Action:West
State  288
Old Q Values:  [ 1826.82290931 -9022.41491635 -7525.7277781   3687.62605974]
New Q values:  [ 1826.82290931 -9022.41491635 -7525.7277781   3966.56107879]
Reward: 9  Episode Reward:  44
xxxxx
x . x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4299.82461107  8287.03551633]
------
Step:7, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 10682.90064228 12074.07798323]
New Q values:  [   37.74111519  -168.92307549 10682.90064228  5368.53297123]
Reward: 9  Episode Reward:  53
xxxxx
x . x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1778.33925978  -40.34168621 1077.14104185  -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [1778.33925978  -40.34168621 1077.14104185  -35.88578819]
New Q values:  [2260.90796867  -40.34168621 1077.14104185  -35.88578819]
Reward: -1  Episode Reward:  52
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 645.78055258 1654.71634746 5167.24088252    0.        ]
------
Step:9, Action:East
State  181
Old Q Values:  [  757.54439923    15.18059333 -7172.62641796  -180.6       ]
New Q values:  [ 757.54439923   15.18059333 4820.72532691 -180.6       ]
Reward: -1  Episode Reward:  51
xxxxx
x . x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 25634.58631364 -3909.58186816  1909.41710691]
------
Step:10, Action:South
State  195
Old Q Values:  [   38.85388605 14065.01314904  6802.12277303  2546.60363946]
New Q values:  [  38.85388605 8830.2754523  6802.12277303 2546.60363946]
Reward: -1  Episode Reward:  50
xxxxx
x . x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 10682.90064228  5368.53297123]
------
Step:11, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 10682.90064228  5368.53297123]
New Q values:  [   37.74111519  -168.92307549 10682.90064228  2825.08557909]
Reward: -1  Episode Reward:  49
xxxxx
x . x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2260.90796867  -40.34168621 1077.14104185  -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [2260.90796867  -40.34168621 1077.14104185  -35.88578819]
New Q values:  [2349.98078554  -40.34168621 1077.14104185  -35.88578819]
Reward: -1  Episode Reward:  48
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 757.54439923   15.18059333 4820.72532691 -180.6       ]
------
Step:13, Action:East
State  177
Old Q Values:  [ 66823.71809805   4025.17604709 106377.91165627      0.        ]
New Q values:  [66823.71809805  4025.17604709 49262.07387846     0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.44701829e+03 2.23716974e+04 2.91043938e+03]
------
Step:14, Action:East
State  192
Old Q Values:  [3.89777037e-01 9.44701829e+03 2.23716974e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 9.44701829e+03 2.89288837e+04 2.91043938e+03]
Reward: -1  Episode Reward:  46
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[66602.68234537  5674.0094124    535.33196404 -3385.12952694]
------
Step:15, Action:North
State  208
Old Q Values:  [66602.68234537  5674.0094124    535.33196404 -3385.12952694]
New Q values:  [48916.7620708   5674.0094124    535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  45
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068 28703.01188472 -8652.84       74254.29710885]
------
Step:16, Action:South
State  130
Old Q Values:  [ 30127.30102033   8923.38728765   -180.00807518 120780.95713233]
New Q values:  [ 30127.30102033  18243.7835363    -180.00807518 120780.95713233]
Reward: -1  Episode Reward:  44
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[48916.7620708   5674.0094124    535.33196404 -3385.12952694]
------
Step:17, Action:North
State  208
Old Q Values:  [48916.7620708   5674.0094124    535.33196404 -3385.12952694]
New Q values:  [41842.39396098  5674.0094124    535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  43
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068 28703.01188472 -8652.84       74254.29710885]
------
Step:18, Action:South
State  128
Old Q Values:  [ 8775.70846068 28703.01188472 -8652.84       74254.29710885]
New Q values:  [ 8775.70846068 24033.32294218 -8652.84       74254.29710885]
Reward: -1  Episode Reward:  42
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[41842.39396098  5674.0094124    535.33196404 -3385.12952694]
------
Step:19, Action:South
State  208
Old Q Values:  [41842.39396098  5674.0094124    535.33196404 -3385.12952694]
New Q values:  [41842.39396098  3458.9720886    535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1826.82290931 -9022.41491635 -7525.7277781   3966.56107879]
------
Step:20, Action:West
State  288
Old Q Values:  [ 1826.82290931 -9022.41491635 -7525.7277781   3966.56107879]
New Q values:  [ 1826.82290931 -9022.41491635 -7525.7277781   4072.13508642]
Reward: -1  Episode Reward:  40
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4299.82461107  8287.03551633]
------
Step:21, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4299.82461107  8287.03551633]
New Q values:  [-2527.46239811 -8521.23367799  4299.82461107 17593.82744208]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[47598.71078516  2256.66526474 12394.68064405  1875.31501677]
------
Step:22, Action:North
State  260
Old Q Values:  [  617.67218023 -8695.4397473   2483.22576163 -2601.74710518]
New Q values:  [  658.97356535 -8695.4397473   2483.22576163 -2601.74710518]
Reward: -1  Episode Reward:  38
xxxxx
xg. x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5550.90027307   266.69721195  1375.0156442  -4966.32149798]
------
Step:23, Action:East
State  180
Old Q Values:  [-5550.90027307   266.69721195  1375.0156442  -4966.32149798]
New Q values:  [-5550.90027307   266.69721195  9228.07135514 -4966.32149798]
Reward: -1  Episode Reward:  37
xxxxx
x . x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.44701829e+03 2.89288837e+04 2.91043938e+03]
------
Step:24, Action:East
State  192
Old Q Values:  [3.89777037e-01 9.44701829e+03 2.89288837e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 9.44701829e+03 2.41236717e+04 2.91043938e+03]
Reward: -1  Episode Reward:  36
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[41842.39396098  3458.9720886    535.33196404 -3385.12952694]
------
Step:25, Action:North
State  208
Old Q Values:  [41842.39396098  3458.9720886    535.33196404 -3385.12952694]
New Q values:  [39012.64671705  3458.9720886    535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  35
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068 24033.32294218 -8652.84       74254.29710885]
------
Step:26, Action:South
State  130
Old Q Values:  [ 30127.30102033  18243.7835363    -180.00807518 120780.95713233]
New Q values:  [ 30127.30102033  19000.70742964   -180.00807518 120780.95713233]
Reward: -1  Episode Reward:  34
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[39012.64671705  3458.9720886    535.33196404 -3385.12952694]
------
Step:27, Action:North
State  208
Old Q Values:  [39012.64671705  3458.9720886    535.33196404 -3385.12952694]
New Q values:  [51838.74582652  3458.9720886    535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  33
xxxxx
x .ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 30127.30102033  19000.70742964   -180.00807518 120780.95713233]
------
Step:28, Action:West
State  128
Old Q Values:  [ 8775.70846068 24033.32294218 -8652.84       74254.29710885]
New Q values:  [ 8775.70846068 24033.32294218 -8652.84       89786.52031307]
Reward: 100009  Episode Reward:  100042
xxxxx
x agx
x   x
x   x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2528.87292207   80.55298927 -120.29354603]
------
Step:1, Action:South
State  110
Old Q Values:  [-239.29051573  368.88319155   47.2164064  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684    47.2164064   -180.6       ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x ..x
xg .x
x...x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.91724086   146.34141259]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     30.91724086   146.34141259]
New Q values:  [ -281.736      -9545.4473624     30.91724086    78.10148696]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684    47.2164064   -180.6       ]
------
Step:2, Action:East
State  111
Old Q Values:  [-177.44732869 2528.87292207   80.55298927 -120.29354603]
New Q values:  [-177.44732869 2528.87292207   55.0516418  -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.91724086    78.10148696]
------
Step:3, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    56.02176457    21.0013149 ]
New Q values:  [ -253.44886264 -1902.20915811    56.02176457   766.46240258]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2528.87292207   55.0516418  -120.29354603]
------
Step:4, Action:South
State  109
Old Q Values:  [-241.10880094  237.71467364   31.9495824  -180.6       ]
New Q values:  [-241.10880094 1546.70346753   31.9495824  -180.6       ]
Reward: 9  Episode Reward:  16
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 757.54439923   15.18059333 4820.72532691 -180.6       ]
------
Step:5, Action:East
State  181
Old Q Values:  [ 757.54439923   15.18059333 4820.72532691 -180.6       ]
New Q values:  [ 757.54439923   15.18059333 2656.48006637 -180.6       ]
Reward: 9  Episode Reward:  25
xxxxx
x g.x
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2409.29978535   174.55451539     0.        ]
------
Step:6, Action:South
State  196
Old Q Values:  [-2469.90645144  2409.29978535   174.55451539     0.        ]
New Q values:  [-2469.90645144  1491.3799015    174.55451539     0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x  .x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1272.7834726  1740.86662454]
------
Step:7, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1272.7834726  1740.86662454]
New Q values:  [  16.82637525  495.22830495 1272.7834726  1440.7143783 ]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  658.97356535 -8695.4397473   2483.22576163 -2601.74710518]
------
Step:8, Action:East
State  260
Old Q Values:  [  658.97356535 -8695.4397473   2483.22576163 -2601.74710518]
New Q values:  [  658.97356535 -8695.4397473   1424.90461814 -2601.74710518]
Reward: -1  Episode Reward:  32
xxxxx
xg .x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1272.7834726  1440.7143783 ]
------
Step:9, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1272.7834726  1440.7143783 ]
New Q values:  [  16.82637525  495.22830495 1272.7834726  1003.15713676]
Reward: -1  Episode Reward:  31
xxxxx
xg .x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  658.97356535 -8695.4397473   1424.90461814 -2601.74710518]
------
Step:10, Action:East
State  261
Old Q Values:  [2349.98078554  -40.34168621 1077.14104185  -35.88578819]
New Q values:  [2349.98078554  -40.34168621  812.09145852  -35.88578819]
Reward: -1  Episode Reward:  30
xxxxx
x g.x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1272.7834726  1003.15713676]
------
Step:11, Action:East
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.88495199e+02 3.39318464e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 1.38243861e+03 3.39318464e+03]
Reward: 9  Episode Reward:  39
xxxxx
x  gx
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1826.82290931 -9022.41491635 -7525.7277781   4072.13508642]
------
Step:12, Action:West
State  288
Old Q Values:  [ 1826.82290931 -9022.41491635 -7525.7277781   4072.13508642]
New Q values:  [ 1826.82290931 -9022.41491635 -7525.7277781   2646.209426  ]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 1.38243861e+03 3.39318464e+03]
------
Step:13, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1272.7834726  1003.15713676]
New Q values:  [  16.82637525  495.22830495 1272.7834726  1105.65709037]
Reward: -1  Episode Reward:  37
xxxxx
x  .x
x g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2349.98078554  -40.34168621  812.09145852  -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [2349.98078554  -40.34168621  812.09145852  -35.88578819]
New Q values:  [1736.33633413  -40.34168621  812.09145852  -35.88578819]
Reward: -1  Episode Reward:  36
xxxxx
x g.x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 757.54439923   15.18059333 2656.48006637 -180.6       ]
------
Step:15, Action:East
State  180
Old Q Values:  [-5550.90027307   266.69721195  9228.07135514 -4966.32149798]
New Q values:  [-5550.90027307   266.69721195  4138.04251251 -4966.32149798]
Reward: -1  Episode Reward:  35
xxxxx
xg .x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1491.3799015    174.55451539     0.        ]
------
Step:16, Action:South
State  196
Old Q Values:  [-2469.90645144  1491.3799015    174.55451539     0.        ]
New Q values:  [-2469.90645144   977.78700238   174.55451539     0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x g.x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1272.7834726  1105.65709037]
------
Step:17, Action:East
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 1.38243861e+03 3.39318464e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 1.34623827e+03 3.39318464e+03]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1826.82290931 -9022.41491635 -7525.7277781   2646.209426  ]
------
Step:18, Action:West
State  288
Old Q Values:  [ 1826.82290931 -9022.41491635 -7525.7277781   2646.209426  ]
New Q values:  [ 1826.82290931 -9022.41491635 -7525.7277781   1439.71881218]
Reward: -1  Episode Reward:  32
xxxxx
x g.x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1272.7834726  1105.65709037]
------
Step:19, Action:East
State  276
Old Q Values:  [  16.82637525  495.22830495 1272.7834726  1105.65709037]
New Q values:  [  16.82637525  495.22830495 1056.56026184 1105.65709037]
Reward: -1  Episode Reward:  31
xxxxx
x  .x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1826.82290931 -9022.41491635 -7525.7277781   1439.71881218]
------
Step:20, Action:North
State  288
Old Q Values:  [ 1826.82290931 -9022.41491635 -7525.7277781   1439.71881218]
New Q values:  [16287.75291168 -9022.41491635 -7525.7277781   1439.71881218]
Reward: 9  Episode Reward:  40
xxxxx
x  .x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[51838.74582652  3458.9720886    535.33196404 -3385.12952694]
------
Step:21, Action:North
State  208
Old Q Values:  [51838.74582652  3458.9720886    535.33196404 -3385.12952694]
New Q values:  [116975.1854703    3458.9720886     535.33196404  -3385.12952694]
Reward: 100009  Episode Reward:  100049
xxxxx
x  ax
x g x
x   x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1736.33633413  -40.34168621  812.09145852  -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [1736.33633413  -40.34168621  812.09145852  -35.88578819]
New Q values:  [1496.87855356  -40.34168621  812.09145852  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 757.54439923   15.18059333 2656.48006637 -180.6       ]
------
Step:2, Action:North
State  181
Old Q Values:  [ 757.54439923   15.18059333 2656.48006637 -180.6       ]
New Q values:  [1067.07963631   15.18059333 2656.48006637 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
xa..x
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2528.87292207   55.0516418  -120.29354603]
------
Step:3, Action:South
State  109
Old Q Values:  [-241.10880094 1546.70346753   31.9495824  -180.6       ]
New Q values:  [-241.10880094 1415.02540692   31.9495824  -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x .gx
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1067.07963631   15.18059333 2656.48006637 -180.6       ]
------
Step:4, Action:East
State  181
Old Q Values:  [1067.07963631   15.18059333 2656.48006637 -180.6       ]
New Q values:  [1067.07963631   15.18059333 8758.36792064 -180.6       ]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 25634.58631364 -3909.58186816  1909.41710691]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 9.44701829e+03 2.41236717e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 4.11590444e+03 2.41236717e+04 2.91043938e+03]
Reward: 9  Episode Reward:  35
xxxxx
x ..x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1056.56026184 1105.65709037]
------
Step:6, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1056.56026184 1105.65709037]
New Q values:  [  16.82637525  495.22830495 1056.56026184  890.72640222]
Reward: -1  Episode Reward:  34
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1496.87855356  -40.34168621  812.09145852  -35.88578819]
------
Step:7, Action:North
State  261
Old Q Values:  [1496.87855356  -40.34168621  812.09145852  -35.88578819]
New Q values:  [3225.66179762  -40.34168621  812.09145852  -35.88578819]
Reward: -1  Episode Reward:  33
xxxxx
x .gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1067.07963631   15.18059333 8758.36792064 -180.6       ]
------
Step:8, Action:East
State  181
Old Q Values:  [1067.07963631   15.18059333 8758.36792064 -180.6       ]
New Q values:  [ 1067.07963631    15.18059333 11193.12306235  -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 25634.58631364 -3909.58186816  1909.41710691]
------
Step:9, Action:South
State  193
Old Q Values:  [-5922.26708831 25634.58631364 -3909.58186816  1909.41710691]
New Q values:  [-5922.26708831 13458.10471814 -3909.58186816  1909.41710691]
Reward: -1  Episode Reward:  31
xxxxx
x .gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 10682.90064228  2825.08557909]
------
Step:10, Action:East
State  276
Old Q Values:  [  16.82637525  495.22830495 1056.56026184  890.72640222]
New Q values:  [  16.82637525  495.22830495 5314.34997824  890.72640222]
Reward: 9  Episode Reward:  40
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16287.75291168 -9022.41491635 -7525.7277781   1439.71881218]
------
Step:11, Action:North
State  288
Old Q Values:  [16287.75291168 -9022.41491635 -7525.7277781   1439.71881218]
New Q values:  [41607.05680576 -9022.41491635 -7525.7277781   1439.71881218]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[116975.1854703    3458.9720886     535.33196404  -3385.12952694]
------
Step:12, Action:North
State  210
Old Q Values:  [1.42985674e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [4.19591141e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
Reward: 9  Episode Reward:  48
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 30127.30102033  19000.70742964   -180.00807518 120780.95713233]
------
Step:13, Action:West
State  130
Old Q Values:  [ 30127.30102033  19000.70742964   -180.00807518 120780.95713233]
New Q values:  [ 30127.30102033  19000.70742964   -180.00807518 143936.1447208 ]
Reward: 100009  Episode Reward:  100057
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1067.07963631    15.18059333 11193.12306235  -180.6       ]
------
Step:1, Action:North
State  180
Old Q Values:  [-5550.90027307   266.69721195  4138.04251251 -4966.32149798]
New Q values:  [-2200.79518731   266.69721195  4138.04251251 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684    47.2164064   -180.6       ]
------
Step:2, Action:East
State  108
Old Q Values:  [-8463.16477134  1166.4852442   1982.44689476     0.        ]
New Q values:  [-8463.16477134  1166.4852442   1458.99602186     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xga.x
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2202.05754652 -2122.74105737]
------
Step:3, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2202.05754652 -2122.74105737]
New Q values:  [-9594.56523706 -8069.05606225  1558.38408477 -2122.74105737]
Reward: 9  Episode Reward:  27
xxxxx
x gax
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  2240.5368872  -6245.61866138 -5080.29201136]
------
Step:4, Action:South
State  138
Old Q Values:  [ -180.6        -1302.81684709  -180.6           42.95485065]
New Q values:  [-180.6           5.61704033 -180.6          42.95485065]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.95145014e+02  1.73781260e+03 -6.17035694e+03  3.96578640e+00]
------
Step:5, Action:South
State  216
Old Q Values:  [ 1.95145014e+02  1.73781260e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 1.95145014e+02  1.31826421e+04 -6.17035694e+03  3.96578640e+00]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[41607.05680576 -9022.41491635 -7525.7277781   1439.71881218]
------
Step:6, Action:North
State  288
Old Q Values:  [41607.05680576 -9022.41491635 -7525.7277781   1439.71881218]
New Q values:  [17284.13876396 -9022.41491635 -7525.7277781   1439.71881218]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  80.51542067 2139.72013884    0.          429.03841886]
------
Step:7, Action:South
State  218
Old Q Values:  [  80.51542067 2139.72013884    0.          429.03841886]
New Q values:  [  80.51542067 6040.52968473    0.          429.03841886]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x   x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17284.13876396 -9022.41491635 -7525.7277781   1439.71881218]
------
Step:8, Action:North
State  288
Old Q Values:  [17284.13876396 -9022.41491635 -7525.7277781   1439.71881218]
New Q values:  [10867.84812977 -9022.41491635 -7525.7277781   1439.71881218]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.95145014e+02  1.31826421e+04 -6.17035694e+03  3.96578640e+00]
------
Step:9, Action:South
State  218
Old Q Values:  [  80.51542067 6040.52968473    0.          429.03841886]
New Q values:  [  80.51542067 5675.96631282    0.          429.03841886]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x   x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10867.84812977 -9022.41491635 -7525.7277781   1439.71881218]
------
Step:10, Action:North
State  288
Old Q Values:  [10867.84812977 -9022.41491635 -7525.7277781   1439.71881218]
New Q values:  [ 8301.33187609 -9022.41491635 -7525.7277781   1439.71881218]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.95145014e+02  1.31826421e+04 -6.17035694e+03  3.96578640e+00]
------
Step:11, Action:South
State  218
Old Q Values:  [  80.51542067 5675.96631282    0.          429.03841886]
New Q values:  [  80.51542067 4760.18608796    0.          429.03841886]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x   x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8301.33187609 -9022.41491635 -7525.7277781   1439.71881218]
------
Step:12, Action:North
State  288
Old Q Values:  [ 8301.33187609 -9022.41491635 -7525.7277781   1439.71881218]
New Q values:  [ 4747.98857682 -9022.41491635 -7525.7277781   1439.71881218]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  80.51542067 4760.18608796    0.          429.03841886]
------
Step:13, Action:South
State  216
Old Q Values:  [ 1.95145014e+02  1.31826421e+04 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 1.95145014e+02  6.69685341e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4747.98857682 -9022.41491635 -7525.7277781   1439.71881218]
------
Step:14, Action:North
State  288
Old Q Values:  [ 4747.98857682 -9022.41491635 -7525.7277781   1439.71881218]
New Q values:  [-2092.34854768 -9022.41491635 -7525.7277781   1439.71881218]
Reward: -10001  Episode Reward:  -9964
xxxxx
x   x
x  gx
x.. x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1415.02540692   31.9495824  -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [-241.10880094 1415.02540692   31.9495824  -180.6       ]
New Q values:  [-241.10880094 3929.34708147   31.9495824  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x  gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1067.07963631    15.18059333 11193.12306235  -180.6       ]
------
Step:2, Action:East
State  181
Old Q Values:  [ 1067.07963631    15.18059333 11193.12306235  -180.6       ]
New Q values:  [1067.07963631   15.18059333 4775.98532565 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x g.x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   977.78700238   174.55451539     0.        ]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144   977.78700238   174.55451539     0.        ]
New Q values:  [-2469.90645144  1990.81979442   174.55451539     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 5314.34997824  890.72640222]
------
Step:4, Action:East
State  276
Old Q Values:  [  16.82637525  495.22830495 5314.34997824  890.72640222]
New Q values:  [  16.82637525  495.22830495 2563.05563495  890.72640222]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2092.34854768 -9022.41491635 -7525.7277781   1439.71881218]
------
Step:5, Action:West
State  288
Old Q Values:  [-2092.34854768 -9022.41491635 -7525.7277781   1439.71881218]
New Q values:  [-2092.34854768 -9022.41491635 -7525.7277781   1344.20421536]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 2563.05563495  890.72640222]
------
Step:6, Action:East
State  276
Old Q Values:  [  16.82637525  495.22830495 2563.05563495  890.72640222]
New Q values:  [  16.82637525  495.22830495 1427.88351859  890.72640222]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2092.34854768 -9022.41491635 -7525.7277781   1344.20421536]
------
Step:7, Action:West
State  288
Old Q Values:  [-2092.34854768 -9022.41491635 -7525.7277781   1344.20421536]
New Q values:  [-2092.34854768 -9022.41491635 -7525.7277781    965.44674172]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
x g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1427.88351859  890.72640222]
------
Step:8, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 10682.90064228  2825.08557909]
New Q values:  [  37.74111519 -168.92307549 4562.19427943 2825.08557909]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2092.34854768 -9022.41491635 -7525.7277781    965.44674172]
------
Step:9, Action:West
State  288
Old Q Values:  [-2092.34854768 -9022.41491635 -7525.7277781    965.44674172]
New Q values:  [-2092.34854768 -9022.41491635 -7525.7277781   1403.53408812]
Reward: -1  Episode Reward:  31
xxxxx
x  .x
x  .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 1.34623827e+03 3.39318464e+03]
------
Step:10, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1427.88351859  890.72640222]
New Q values:  [  16.82637525  495.22830495 1427.88351859 1329.38910017]
Reward: 9  Episode Reward:  40
xxxxx
x  .x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3225.66179762  -40.34168621  812.09145852  -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [3225.66179762  -40.34168621  812.09145852  -35.88578819]
New Q values:  [2839.8369838   -40.34168621  812.09145852  -35.88578819]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 645.78055258 1654.71634746 5167.24088252    0.        ]
------
Step:12, Action:East
State  181
Old Q Values:  [1067.07963631   15.18059333 4775.98532565 -180.6       ]
New Q values:  [1067.07963631   15.18059333 2097.48101636 -180.6       ]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[1.32443385e-01 6.25622954e+02 0.00000000e+00 0.00000000e+00]
------
Step:13, Action:South
State  197
Old Q Values:  [1.32443385e-01 6.25622954e+02 0.00000000e+00 0.00000000e+00]
New Q values:  [1.32443385e-01 1.26760457e+03 0.00000000e+00 0.00000000e+00]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 1.34623827e+03 3.39318464e+03]
------
Step:14, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1427.88351859 1329.38910017]
New Q values:  [  16.82637525  495.22830495 1427.88351859 1383.10673521]
Reward: -1  Episode Reward:  36
xxxxx
x g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2839.8369838   -40.34168621  812.09145852  -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [2839.8369838   -40.34168621  812.09145852  -35.88578819]
New Q values:  [1764.57909843  -40.34168621  812.09145852  -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1067.07963631   15.18059333 2097.48101636 -180.6       ]
------
Step:16, Action:North
State  181
Old Q Values:  [1067.07963631   15.18059333 2097.48101636 -180.6       ]
New Q values:  [1223.92973288   15.18059333 2097.48101636 -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
xa .x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        2658.99292785    5.4           0.        ]
------
Step:17, Action:South
State  109
Old Q Values:  [-241.10880094 3929.34708147   31.9495824  -180.6       ]
New Q values:  [-241.10880094 2200.3831375    31.9495824  -180.6       ]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1223.92973288   15.18059333 2097.48101636 -180.6       ]
------
Step:18, Action:East
State  181
Old Q Values:  [1223.92973288   15.18059333 2097.48101636 -180.6       ]
New Q values:  [1223.92973288   15.18059333 1218.67377842 -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[1.32443385e-01 1.26760457e+03 0.00000000e+00 0.00000000e+00]
------
Step:19, Action:South
State  197
Old Q Values:  [1.32443385e-01 1.26760457e+03 0.00000000e+00 0.00000000e+00]
New Q values:  [1.32443385e-01 1.52439722e+03 0.00000000e+00 0.00000000e+00]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 1.34623827e+03 3.39318464e+03]
------
Step:20, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 1.34623827e+03 3.39318464e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 1.34623827e+03 1.88604758e+03]
Reward: -1  Episode Reward:  30
xxxxx
x  gx
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1764.57909843  -40.34168621  812.09145852  -35.88578819]
------
Step:21, Action:North
State  261
Old Q Values:  [1764.57909843  -40.34168621  812.09145852  -35.88578819]
New Q values:  [1072.41055924  -40.34168621  812.09145852  -35.88578819]
Reward: -1  Episode Reward:  29
xxxxx
x g.x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1223.92973288   15.18059333 1218.67377842 -180.6       ]
------
Step:22, Action:North
State  181
Old Q Values:  [1223.92973288   15.18059333 1218.67377842 -180.6       ]
New Q values:  [1286.66977151   15.18059333 1218.67377842 -180.6       ]
Reward: -1  Episode Reward:  28
xxxxx
xa .x
x g.x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        2658.99292785    5.4           0.        ]
------
Step:23, Action:South
State  103
Old Q Values:  [-180.6        2658.99292785    5.4           0.        ]
New Q values:  [-180.6       2613.1694359    5.4          0.       ]
Reward: -1  Episode Reward:  27
xxxxx
x  .x
xa .x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 645.78055258 1654.71634746 5167.24088252    0.        ]
------
Step:24, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  4436.99728368     0.        ]
New Q values:  [    0.         -5969.29177534  1790.97801708     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x  .x
x a.x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.56859888e+03  5.59303454e+01  0.00000000e+00]
------
Step:25, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -2.56859888e+03  5.59303454e+01  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.56859888e+03  1.26155064e+04  0.00000000e+00]
Reward: 9  Episode Reward:  35
xxxxx
x  .x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.19591141e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:26, Action:North
State  210
Old Q Values:  [4.19591141e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [1.19969889e+05 8.37300532e+03 4.36673472e+03 3.52184257e+00]
Reward: 100009  Episode Reward:  100044
xxxxx
x  ax
x   x
xg  x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1286.66977151   15.18059333 1218.67377842 -180.6       ]
------
Step:1, Action:North
State  183
Old Q Values:  [ 645.78055258 1654.71634746 5167.24088252    0.        ]
New Q values:  [1047.6630518  1654.71634746 5167.24088252    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6       2613.1694359    5.4          0.       ]
------
Step:2, Action:South
State  103
Old Q Values:  [-180.6       2613.1694359    5.4          0.       ]
New Q values:  [-180.6        2594.84003912    5.4           0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xa. x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1047.6630518  1654.71634746 5167.24088252    0.        ]
------
Step:3, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  1790.97801708     0.        ]
New Q values:  [    0.         -5969.29177534  3635.01779491     0.        ]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -6.70590551e+03  9.71075529e+03  1.20371620e+03]
------
Step:4, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -6.70590551e+03  9.71075529e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -6.70590551e+03  3.98746688e+04  1.20371620e+03]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.19969889e+05 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:5, Action:North
State  218
Old Q Values:  [  80.51542067 4760.18608796    0.          429.03841886]
New Q values:  [  50.49262346 4760.18608796    0.          429.03841886]
Reward: 9  Episode Reward:  25
xxxxx
x .ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           5.61704033 -180.6          42.95485065]
------
Step:6, Action:West
State  138
Old Q Values:  [-180.6           5.61704033 -180.6          42.95485065]
New Q values:  [-180.6           5.61704033 -180.6          46.01238635]
Reward: 9  Episode Reward:  34
xxxxx
x a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.91724086    78.10148696]
------
Step:7, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    56.02176457   766.46240258]
New Q values:  [ -253.44886264 -1902.20915811    56.02176457   323.61917744]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   58.78072137 -252.78192178]
------
Step:8, Action:East
State  111
Old Q Values:  [-177.44732869 2528.87292207   55.0516418  -120.29354603]
New Q values:  [-177.44732869 2528.87292207  118.50640995 -120.29354603]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    56.02176457   323.61917744]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     30.91724086    78.10148696]
New Q values:  [ -281.736      -9545.4473624     30.91724086   789.3024714 ]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2528.87292207  118.50640995 -120.29354603]
------
Step:10, Action:South
State  111
Old Q Values:  [-177.44732869 2528.87292207  118.50640995 -120.29354603]
New Q values:  [-177.44732869 2257.54884866  118.50640995 -120.29354603]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294  4155.33226612 -1189.2802612   1602.21710336]
------
Step:11, Action:South
State  189
Old Q Values:  [    9.84673294  4155.33226612 -1189.2802612   1602.21710336]
New Q values:  [    9.84673294  1989.25607422 -1189.2802612   1602.21710336]
Reward: 9  Episode Reward:  39
xxxxx
x  gx
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1072.41055924  -40.34168621  812.09145852  -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [1072.41055924  -40.34168621  812.09145852  -35.88578819]
New Q values:  [1025.14104596  -40.34168621  812.09145852  -35.88578819]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294  1989.25607422 -1189.2802612   1602.21710336]
------
Step:13, Action:South
State  189
Old Q Values:  [    9.84673294  1989.25607422 -1189.2802612   1602.21710336]
New Q values:  [    9.84673294  1102.64474348 -1189.2802612   1602.21710336]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1025.14104596  -40.34168621  812.09145852  -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [1025.14104596  -40.34168621  812.09145852  -35.88578819]
New Q values:  [890.12154939 -40.34168621 812.09145852 -35.88578819]
Reward: -1  Episode Reward:  36
xxxxx
x g x
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294  1102.64474348 -1189.2802612   1602.21710336]
------
Step:15, Action:West
State  189
Old Q Values:  [    9.84673294  1102.64474348 -1189.2802612   1602.21710336]
New Q values:  [    9.84673294  1102.64474348 -1189.2802612    940.95197235]
Reward: -301  Episode Reward:  -265
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294  1102.64474348 -1189.2802612    940.95197235]
------
Step:16, Action:South
State  188
Old Q Values:  [-6523.78898263   258.74794358  1972.05460653     0.        ]
New Q values:  [-6523.78898263   530.37056288  1972.05460653     0.        ]
Reward: -1  Episode Reward:  -266
xxxxx
x   x
xg  x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  658.97356535 -8695.4397473   1424.90461814 -2601.74710518]
------
Step:17, Action:East
State  260
Old Q Values:  [  658.97356535 -8695.4397473   1424.90461814 -2601.74710518]
New Q values:  [  658.97356535 -8695.4397473   1003.72690283 -2601.74710518]
Reward: 9  Episode Reward:  -257
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1427.88351859 1383.10673521]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4299.82461107 17593.82744208]
New Q values:  [-2527.46239811 -8521.23367799 62146.39007086 17593.82744208]
Reward: 100009  Episode Reward:  99752
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4562.19427943 2825.08557909]
------
Step:1, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 4562.19427943 2825.08557909]
New Q values:  [  37.74111519 -168.92307549 2251.33793821 2825.08557909]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2092.34854768 -9022.41491635 -7525.7277781   1403.53408812]
------
Step:2, Action:West
State  288
Old Q Values:  [-2092.34854768 -9022.41491635 -7525.7277781   1403.53408812]
New Q values:  [-2092.34854768 -9022.41491635 -7525.7277781   1408.33930898]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2251.33793821 2825.08557909]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 62146.39007086 17593.82744208]
New Q values:  [-2527.46239811 -8521.23367799 62146.39007086 21322.54421238]
Reward: 9  Episode Reward:  17
xxxxx
x.g x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[47598.71078516  2256.66526474 12394.68064405  1875.31501677]
------
Step:4, Action:North
State  261
Old Q Values:  [890.12154939 -40.34168621 812.09145852 -35.88578819]
New Q values:  [747.44955121 -40.34168621 812.09145852 -35.88578819]
Reward: 9  Episode Reward:  26
xxxxx
x..gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1286.66977151   15.18059333 1218.67377842 -180.6       ]
------
Step:5, Action:North
State  181
Old Q Values:  [1286.66977151   15.18059333 1218.67377842 -180.6       ]
New Q values:  [1298.51992034   15.18059333 1218.67377842 -180.6       ]
Reward: 9  Episode Reward:  35
xxxxx
xa. x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        2594.84003912    5.4           0.        ]
------
Step:6, Action:South
State  109
Old Q Values:  [-241.10880094 2200.3831375    31.9495824  -180.6       ]
New Q values:  [-241.10880094 1269.1092311    31.9495824  -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x .gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1298.51992034   15.18059333 1218.67377842 -180.6       ]
------
Step:7, Action:North
State  181
Old Q Values:  [1298.51992034   15.18059333 1218.67377842 -180.6       ]
New Q values:  [1297.25997987   15.18059333 1218.67377842 -180.6       ]
Reward: -1  Episode Reward:  33
xxxxx
xa. x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        2594.84003912    5.4           0.        ]
------
Step:8, Action:South
State  111
Old Q Values:  [-177.44732869 2257.54884866  118.50640995 -120.29354603]
New Q values:  [-177.44732869 2452.59180422  118.50640995 -120.29354603]
Reward: -1  Episode Reward:  32
xxxxx
x . x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1047.6630518  1654.71634746 5167.24088252    0.        ]
------
Step:9, Action:East
State  181
Old Q Values:  [1297.25997987   15.18059333 1218.67377842 -180.6       ]
New Q values:  [1297.25997987   15.18059333 4530.30092681 -180.6       ]
Reward: 9  Episode Reward:  41
xxxxx
x . x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 13458.10471814 -3909.58186816  1909.41710691]
------
Step:10, Action:South
State  195
Old Q Values:  [  38.85388605 8830.2754523  6802.12277303 2546.60363946]
New Q values:  [  38.85388605 4379.03585465 6802.12277303 2546.60363946]
Reward: -1  Episode Reward:  40
xxxxx
x . x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2251.33793821 2825.08557909]
------
Step:11, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2251.33793821 2825.08557909]
New Q values:  [  37.74111519 -168.92307549 2251.33793821 1373.06166919]
Reward: -1  Episode Reward:  39
xxxxx
x . x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[747.44955121 -40.34168621 812.09145852 -35.88578819]
------
Step:12, Action:East
State  261
Old Q Values:  [747.44955121 -40.34168621 812.09145852 -35.88578819]
New Q values:  [747.44955121 -40.34168621 999.63796487 -35.88578819]
Reward: -1  Episode Reward:  38
xxxxx
x . x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2251.33793821 1373.06166919]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 62146.39007086 21322.54421238]
New Q values:  [-2527.46239811 -8521.23367799 62146.39007086  8828.30907441]
Reward: -1  Episode Reward:  37
xxxxx
x . x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[747.44955121 -40.34168621 999.63796487 -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [747.44955121 -40.34168621 999.63796487 -35.88578819]
New Q values:  [1848.55208524  -40.34168621  999.63796487  -35.88578819]
Reward: -1  Episode Reward:  36
xxxxx
x . x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1047.6630518  1654.71634746 5167.24088252    0.        ]
------
Step:15, Action:East
State  181
Old Q Values:  [1297.25997987   15.18059333 4530.30092681 -180.6       ]
New Q values:  [1297.25997987   15.18059333 5848.95178617 -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x . x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 13458.10471814 -3909.58186816  1909.41710691]
------
Step:16, Action:South
State  195
Old Q Values:  [  38.85388605 4379.03585465 6802.12277303 2546.60363946]
New Q values:  [  38.85388605 2426.41572332 6802.12277303 2546.60363946]
Reward: -1  Episode Reward:  34
xxxxx
x . x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2251.33793821 1373.06166919]
------
Step:17, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 62146.39007086  8828.30907441]
New Q values:  [-2527.46239811 -8521.23367799 62146.39007086  4085.28925534]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1848.55208524  -40.34168621  999.63796487  -35.88578819]
------
Step:18, Action:North
State  261
Old Q Values:  [1848.55208524  -40.34168621  999.63796487  -35.88578819]
New Q values:  [2288.99309885  -40.34168621  999.63796487  -35.88578819]
Reward: -1  Episode Reward:  32
xxxxx
x . x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1047.6630518  1654.71634746 5167.24088252    0.        ]
------
Step:19, Action:East
State  183
Old Q Values:  [1047.6630518  1654.71634746 5167.24088252    0.        ]
New Q values:  [ 1047.6630518   1654.71634746 14028.69700268     0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x . x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -6.70590551e+03  3.98746688e+04  1.20371620e+03]
------
Step:20, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -6.70590551e+03  3.98746688e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -6.70590551e+03  5.19462342e+04  1.20371620e+03]
Reward: 9  Episode Reward:  40
xxxxx
x . x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.19969889e+05 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:21, Action:North
State  210
Old Q Values:  [1.19969889e+05 8.37300532e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [9.11681990e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
Reward: -1  Episode Reward:  39
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 30127.30102033  19000.70742964   -180.00807518 143936.1447208 ]
------
Step:22, Action:West
State  130
Old Q Values:  [ 30127.30102033  19000.70742964   -180.00807518 143936.1447208 ]
New Q values:  [ 30127.30102033  19000.70742964   -180.00807518 153198.21975619]
Reward: 100009  Episode Reward:  100048
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.11590444e+03 2.41236717e+04 2.91043938e+03]
------
Step:1, Action:East
State  193
Old Q Values:  [-5922.26708831 13458.10471814 -3909.58186816  1909.41710691]
New Q values:  [-5922.26708831 13458.10471814   450.62327432  1909.41710691]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.95145014e+02  6.69685341e+03 -6.17035694e+03  3.96578640e+00]
------
Step:2, Action:South
State  208
Old Q Values:  [116975.1854703    3458.9720886     535.33196404  -3385.12952694]
New Q values:  [116975.1854703    1811.49062813    535.33196404  -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2092.34854768 -9022.41491635 -7525.7277781   1408.33930898]
------
Step:3, Action:West
State  288
Old Q Values:  [-2092.34854768 -9022.41491635 -7525.7277781   1408.33930898]
New Q values:  [-2092.34854768 -9022.41491635 -7525.7277781  19212.65274485]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 62146.39007086  4085.28925534]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 62146.39007086  4085.28925534]
New Q values:  [-2527.46239811 -8521.23367799 30621.7518518   4085.28925534]
Reward: -1  Episode Reward:  26
xxxxx
x. .x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2092.34854768 -9022.41491635 -7525.7277781  19212.65274485]
------
Step:5, Action:North
State  288
Old Q Values:  [-2092.34854768 -9022.41491635 -7525.7277781  19212.65274485]
New Q values:  [34255.01622202 -9022.41491635 -7525.7277781  19212.65274485]
Reward: -1  Episode Reward:  25
xxxxx
x. .x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[116975.1854703    1811.49062813    535.33196404  -3385.12952694]
------
Step:6, Action:North
State  208
Old Q Values:  [116975.1854703    1811.49062813    535.33196404  -3385.12952694]
New Q values:  [92754.94011498  1811.49062813   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  34
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 30127.30102033  19000.70742964   -180.00807518 153198.21975619]
------
Step:7, Action:West
State  130
Old Q Values:  [ 30127.30102033  19000.70742964   -180.00807518 153198.21975619]
New Q values:  [30127.30102033 19000.70742964  -180.00807518 96897.04977034]
Reward: -1  Episode Reward:  33
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  47000.60902691 118727.87289289]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     30.91724086   789.3024714 ]
New Q values:  [ -281.736      -9545.4473624     30.91724086   333.55992022]
Reward: 9  Episode Reward:  42
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062    41.46310554  -180.6       ]
------
Step:9, Action:East
State  104
Old Q Values:  [-8652.84         853.17632684  1954.59090238 -8652.84      ]
New Q values:  [-8652.84         853.17632684  1248.75158638 -8652.84      ]
Reward: -1  Episode Reward:  41
xxxxx
xga x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1558.38408477 -2122.74105737]
------
Step:10, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1558.38408477 -2122.74105737]
New Q values:  [-9594.56523706 -8069.05606225  1294.91470007 -2122.74105737]
Reward: -1  Episode Reward:  40
xxxxx
xg ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  2240.5368872  -6245.61866138 -5080.29201136]
------
Step:11, Action:South
State  136
Old Q Values:  [ 1117.30787879  2240.5368872  -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879 28722.09678937 -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[92754.94011498  1811.49062813   535.33196404 -3385.12952694]
------
Step:12, Action:North
State  208
Old Q Values:  [92754.94011498  1811.49062813   535.33196404 -3385.12952694]
New Q values:  [45718.0050828   1811.49062813   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  38
xxxxx
xg ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879 28722.09678937 -6245.61866138 -5080.29201136]
------
Step:13, Action:South
State  136
Old Q Values:  [ 1117.30787879 28722.09678937 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879 25203.64024059 -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[45718.0050828   1811.49062813   535.33196404 -3385.12952694]
------
Step:14, Action:North
State  208
Old Q Values:  [45718.0050828   1811.49062813   535.33196404 -3385.12952694]
New Q values:  [25847.6941053   1811.49062813   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  36
xxxxx
x gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879 25203.64024059 -6245.61866138 -5080.29201136]
------
Step:15, Action:South
State  136
Old Q Values:  [ 1117.30787879 25203.64024059 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879 17835.16432783 -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  35
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25847.6941053   1811.49062813   535.33196404 -3385.12952694]
------
Step:16, Action:North
State  216
Old Q Values:  [ 1.95145014e+02  6.69685341e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 9.12617215e+01  6.69685341e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6           5.61704033 -180.6          46.01238635]
------
Step:17, Action:West
State  138
Old Q Values:  [-180.6           5.61704033 -180.6          46.01238635]
New Q values:  [-180.6           5.61704033 -180.6         117.8729306 ]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.91724086   333.55992022]
------
Step:18, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     30.91724086   333.55992022]
New Q values:  [ -281.736      -9545.4473624     30.91724086   145.26289975]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062    41.46310554  -180.6       ]
------
Step:19, Action:East
State  106
Old Q Values:  [ -180.6        -8952.15415062    41.46310554  -180.6       ]
New Q values:  [ -180.6        -8952.15415062    59.56411214  -180.6       ]
Reward: -1  Episode Reward:  31
xxxxx
x a x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.91724086   145.26289975]
------
Step:20, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     30.91724086   145.26289975]
New Q values:  [ -281.736      -9545.4473624     30.91724086    75.13937631]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   58.78072137 -252.78192178]
------
Step:21, Action:East
State  105
Old Q Values:  [-180.6           6.72320144 -904.87287133    0.        ]
New Q values:  [ -180.6            6.72320144 -5974.07473851     0.        ]
Reward: -10001  Episode Reward:  -9971
xxxxx
x g x
x.  x
x.  x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879 17835.16432783 -6245.61866138 -5080.29201136]
------
Step:1, Action:South
State  138
Old Q Values:  [-180.6           5.61704033 -180.6         117.8729306 ]
New Q values:  [-180.6        2016.70283772 -180.6         117.8729306 ]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 9.12617215e+01  6.69685341e+03 -6.17035694e+03  3.96578640e+00]
------
Step:2, Action:South
State  208
Old Q Values:  [25847.6941053   1811.49062813   535.33196404 -3385.12952694]
New Q values:  [25847.6941053  11006.50111786   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[34255.01622202 -9022.41491635 -7525.7277781  19212.65274485]
------
Step:3, Action:North
State  288
Old Q Values:  [34255.01622202 -9022.41491635 -7525.7277781  19212.65274485]
New Q values:  [21455.7147204  -9022.41491635 -7525.7277781  19212.65274485]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25847.6941053  11006.50111786   535.33196404 -3385.12952694]
------
Step:4, Action:North
State  210
Old Q Values:  [9.11681990e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [3.70716905e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
Reward: -1  Episode Reward:  16
xxxxx
x .ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2016.70283772 -180.6         117.8729306 ]
------
Step:5, Action:South
State  138
Old Q Values:  [-180.6        2016.70283772 -180.6         117.8729306 ]
New Q values:  [ -180.6       11927.5882748  -180.6         117.8729306]
Reward: -1  Episode Reward:  15
xxxxx
x . x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.70716905e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:6, Action:North
State  210
Old Q Values:  [3.70716905e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [1.84063527e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
Reward: -1  Episode Reward:  14
xxxxx
x .ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6       11927.5882748  -180.6         117.8729306]
------
Step:7, Action:South
State  138
Old Q Values:  [ -180.6       11927.5882748  -180.6         117.8729306]
New Q values:  [ -180.6        10292.34111054  -180.6          117.8729306 ]
Reward: -1  Episode Reward:  13
xxxxx
x . x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.84063527e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:8, Action:North
State  210
Old Q Values:  [1.84063527e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [1.04496434e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
Reward: -1  Episode Reward:  12
xxxxx
x .ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -180.6        10292.34111054  -180.6          117.8729306 ]
------
Step:9, Action:South
State  138
Old Q Values:  [ -180.6        10292.34111054  -180.6          117.8729306 ]
New Q values:  [-180.6       5870.6446758 -180.6        117.8729306]
Reward: -10001  Episode Reward:  -9989
xxxxx
x . x
x..gx
x.. x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -6.70590551e+03  5.19462342e+04  1.20371620e+03]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -6.70590551e+03  5.19462342e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -6.70590551e+03  2.39187867e+04  1.20371620e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.04496434e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:2, Action:North
State  208
Old Q Values:  [25847.6941053  11006.50111786   535.33196404 -3385.12952694]
New Q values:  [12105.67104486 11006.50111786   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       5870.6446758 -180.6        117.8729306]
------
Step:3, Action:South
State  130
Old Q Values:  [30127.30102033 19000.70742964  -180.00807518 96897.04977034]
New Q values:  [30127.30102033 10734.57599205  -180.00807518 96897.04977034]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x. ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.04496434e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:4, Action:North
State  210
Old Q Values:  [1.04496434e+04 8.37300532e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [5.94045076e+03 8.37300532e+03 4.36673472e+03 3.52184257e+00]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       5870.6446758 -180.6        117.8729306]
------
Step:5, Action:South
State  130
Old Q Values:  [30127.30102033 10734.57599205  -180.00807518 96897.04977034]
New Q values:  [30127.30102033  7924.93171028  -180.00807518 96897.04977034]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12105.67104486 11006.50111786   535.33196404 -3385.12952694]
------
Step:6, Action:North
State  208
Old Q Values:  [12105.67104486 11006.50111786   535.33196404 -3385.12952694]
New Q values:  [ 6602.86182069 11006.50111786   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  14
xxxxx
x..ax
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       5870.6446758 -180.6        117.8729306]
------
Step:7, Action:South
State  130
Old Q Values:  [30127.30102033  7924.93171028  -180.00807518 96897.04977034]
New Q values:  [30127.30102033  5681.27428011  -180.00807518 96897.04977034]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x. ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.94045076e+03 8.37300532e+03 4.36673472e+03 3.52184257e+00]
------
Step:8, Action:South
State  210
Old Q Values:  [5.94045076e+03 8.37300532e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [5.94045076e+03 3.79131654e+03 4.36673472e+03 3.52184257e+00]
Reward: -9991  Episode Reward:  -9978
xxxxx
x.. x
x.  x
x .gx
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.91724086    75.13937631]
------
Step:1, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    56.02176457   323.61917744]
New Q values:  [ -253.44886264 -1902.20915811    56.02176457   152.48188739]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   58.78072137 -252.78192178]
------
Step:2, Action:East
State  111
Old Q Values:  [-177.44732869 2452.59180422  118.50640995 -120.29354603]
New Q values:  [-177.44732869 2452.59180422   69.34437687 -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.91724086    75.13937631]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     30.91724086    75.13937631]
New Q values:  [ -281.736      -9545.4473624     30.91724086    47.08996694]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   58.78072137 -252.78192178]
------
Step:4, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   58.78072137 -252.78192178]
New Q values:  [-252.35169558    7.11267516   68.65685476 -252.78192178]
Reward: -1  Episode Reward:  6
xxxxx
x a.x
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    56.02176457   152.48188739]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     30.91724086    47.08996694]
New Q values:  [ -281.736      -9545.4473624     30.91724086    38.8330432 ]
Reward: -1  Episode Reward:  5
xxxxx
xa .x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   68.65685476 -252.78192178]
------
Step:6, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684    47.2164064   -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684    29.93647552  -180.6       ]
Reward: -1  Episode Reward:  4
xxxxx
x a.x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.91724086    38.8330432 ]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     30.91724086    38.8330432 ]
New Q values:  [ -281.736      -9545.4473624     30.91724086    35.53027371]
Reward: -1  Episode Reward:  3
xxxxx
xa .x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   68.65685476 -252.78192178]
------
Step:8, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684    29.93647552  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684    22.03367232  -180.6       ]
Reward: -1  Episode Reward:  2
xxxxx
x a.x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.91724086    35.53027371]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     30.91724086    35.53027371]
New Q values:  [ -281.736      -9545.4473624     30.91724086    20.22221118]
Reward: -1  Episode Reward:  1
xxxxx
xa .x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684    22.03367232  -180.6       ]
------
Step:10, Action:East
State  108
Old Q Values:  [-8463.16477134  1166.4852442   1458.99602186     0.        ]
New Q values:  [-8463.16477134  1166.4852442    971.47281877     0.        ]
Reward: -1  Episode Reward:  0
xxxxx
xga.x
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1294.91470007 -2122.74105737]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     30.91724086    20.22221118]
New Q values:  [ -281.736      -9545.4473624   1778.96029909    20.22221118]
Reward: 9  Episode Reward:  9
xxxxx
x  ax
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6       5870.6446758 -180.6        117.8729306]
------
Step:12, Action:South
State  138
Old Q Values:  [-180.6       5870.6446758 -180.6        117.8729306]
New Q values:  [-180.6        3781.71369671 -180.6         117.8729306 ]
Reward: 9  Episode Reward:  18
xxxxx
x   x
x. ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  50.49262346 4760.18608796    0.          429.03841886]
------
Step:13, Action:South
State  210
Old Q Values:  [5.94045076e+03 3.79131654e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [5.94045076e+03 7.95864103e+03 4.36673472e+03 3.52184257e+00]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21455.7147204  -9022.41491635 -7525.7277781  19212.65274485]
------
Step:14, Action:North
State  288
Old Q Values:  [21455.7147204  -9022.41491635 -7525.7277781  19212.65274485]
New Q values:  [10969.27819829 -9022.41491635 -7525.7277781  19212.65274485]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.94045076e+03 7.95864103e+03 4.36673472e+03 3.52184257e+00]
------
Step:15, Action:South
State  210
Old Q Values:  [5.94045076e+03 7.95864103e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [5.94045076e+03 8.94665224e+03 4.36673472e+03 3.52184257e+00]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10969.27819829 -9022.41491635 -7525.7277781  19212.65274485]
------
Step:16, Action:North
State  288
Old Q Values:  [10969.27819829 -9022.41491635 -7525.7277781  19212.65274485]
New Q values:  [ 6396.1673009  -9022.41491635 -7525.7277781  19212.65274485]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 9.12617215e+01  6.69685341e+03 -6.17035694e+03  3.96578640e+00]
------
Step:17, Action:South
State  216
Old Q Values:  [ 9.12617215e+01  6.69685341e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 9.12617215e+01  8.44193719e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  23
xxxxx
x g x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6396.1673009  -9022.41491635 -7525.7277781  19212.65274485]
------
Step:18, Action:West
State  288
Old Q Values:  [ 6396.1673009  -9022.41491635 -7525.7277781  19212.65274485]
New Q values:  [ 6396.1673009  -9022.41491635 -7525.7277781  16876.98665348]
Reward: 9  Episode Reward:  32
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 30621.7518518   4085.28925534]
------
Step:19, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 30621.7518518   4085.28925534]
New Q values:  [-2527.46239811 -8521.23367799 17311.19673676  4085.28925534]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6396.1673009  -9022.41491635 -7525.7277781  16876.98665348]
------
Step:20, Action:North
State  288
Old Q Values:  [ 6396.1673009  -9022.41491635 -7525.7277781  16876.98665348]
New Q values:  [ 5859.81725572 -9022.41491635 -7525.7277781  16876.98665348]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6602.86182069 11006.50111786   535.33196404 -3385.12952694]
------
Step:21, Action:South
State  216
Old Q Values:  [ 9.12617215e+01  8.44193719e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 9.12617215e+01  8.43927087e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5859.81725572 -9022.41491635 -7525.7277781  16876.98665348]
------
Step:22, Action:West
State  288
Old Q Values:  [ 5859.81725572 -9022.41491635 -7525.7277781  16876.98665348]
New Q values:  [ 5859.81725572 -9022.41491635 -7525.7277781  11943.55368242]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x.  x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 17311.19673676  4085.28925534]
------
Step:23, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 17311.19673676  4085.28925534]
New Q values:  [-2527.46239811 -8521.23367799 10506.94479943  4085.28925534]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5859.81725572 -9022.41491635 -7525.7277781  11943.55368242]
------
Step:24, Action:West
State  288
Old Q Values:  [ 5859.81725572 -9022.41491635 -7525.7277781  11943.55368242]
New Q values:  [ 5859.81725572 -9022.41491635 -7525.7277781   7928.9049128 ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 10506.94479943  4085.28925534]
------
Step:25, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 2251.33793821 1373.06166919]
New Q values:  [  37.74111519 -168.92307549 3278.60664912 1373.06166919]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5859.81725572 -9022.41491635 -7525.7277781   7928.9049128 ]
------
Step:26, Action:West
State  288
Old Q Values:  [ 5859.81725572 -9022.41491635 -7525.7277781   7928.9049128 ]
New Q values:  [ 5859.81725572 -9022.41491635 -7525.7277781   4154.54395986]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 3278.60664912 1373.06166919]
------
Step:27, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10506.94479943  4085.28925534]
New Q values:  [-2527.46239811 -8521.23367799 10506.94479943 15919.12893768]
Reward: 9  Episode Reward:  33
xxxxx
x   x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[47598.71078516  2256.66526474 12394.68064405  1875.31501677]
------
Step:28, Action:North
State  257
Old Q Values:  [47598.71078516  2256.66526474 12394.68064405  1875.31501677]
New Q values:  [99091.99974348  2256.66526474 12394.68064405  1875.31501677]
Reward: 100009  Episode Reward:  100042
xxxxx
x   x
xag x
x   x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        3781.71369671 -180.6         117.8729306 ]
------
Step:1, Action:South
State  138
Old Q Values:  [-180.6        3781.71369671 -180.6         117.8729306 ]
New Q values:  [-180.6        4202.08114977 -180.6         117.8729306 ]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.94045076e+03 8.94665224e+03 4.36673472e+03 3.52184257e+00]
------
Step:2, Action:South
State  208
Old Q Values:  [ 6602.86182069 11006.50111786   535.33196404 -3385.12952694]
New Q values:  [ 6602.86182069  6165.94562386   535.33196404 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5859.81725572 -9022.41491635 -7525.7277781   4154.54395986]
------
Step:3, Action:North
State  288
Old Q Values:  [ 5859.81725572 -9022.41491635 -7525.7277781   4154.54395986]
New Q values:  [ 4324.18544849 -9022.41491635 -7525.7277781   4154.54395986]
Reward: -1  Episode Reward:  17
xxxxx
xg. x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6602.86182069  6165.94562386   535.33196404 -3385.12952694]
------
Step:4, Action:North
State  208
Old Q Values:  [ 6602.86182069  6165.94562386   535.33196404 -3385.12952694]
New Q values:  [ 7991.09402662  6165.94562386   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  16
xxxxx
x.gax
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879 17835.16432783 -6245.61866138 -5080.29201136]
------
Step:5, Action:South
State  130
Old Q Values:  [30127.30102033  5681.27428011  -180.00807518 96897.04977034]
New Q values:  [30127.30102033  4669.23792003  -180.00807518 96897.04977034]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7991.09402662  6165.94562386   535.33196404 -3385.12952694]
------
Step:6, Action:North
State  208
Old Q Values:  [ 7991.09402662  6165.94562386   535.33196404 -3385.12952694]
New Q values:  [ 4456.46195558  6165.94562386   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  14
xxxxx
x..ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        4202.08114977 -180.6         117.8729306 ]
------
Step:7, Action:South
State  130
Old Q Values:  [30127.30102033  4669.23792003  -180.00807518 96897.04977034]
New Q values:  [30127.30102033  3716.87885517  -180.00807518 96897.04977034]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4456.46195558  6165.94562386   535.33196404 -3385.12952694]
------
Step:8, Action:South
State  208
Old Q Values:  [ 4456.46195558  6165.94562386   535.33196404 -3385.12952694]
New Q values:  [ 4456.46195558  3763.03388409   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  12
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4324.18544849 -9022.41491635 -7525.7277781   4154.54395986]
------
Step:9, Action:North
State  288
Old Q Values:  [ 4324.18544849 -9022.41491635 -7525.7277781   4154.54395986]
New Q values:  [ 3066.01276607 -9022.41491635 -7525.7277781   4154.54395986]
Reward: -1  Episode Reward:  11
xxxxx
xg. x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4456.46195558  3763.03388409   535.33196404 -3385.12952694]
------
Step:10, Action:North
State  208
Old Q Values:  [ 4456.46195558  3763.03388409   535.33196404 -3385.12952694]
New Q values:  [ 7132.53408058  3763.03388409   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  10
xxxxx
x.gax
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879 17835.16432783 -6245.61866138 -5080.29201136]
------
Step:11, Action:South
State  136
Old Q Values:  [ 1117.30787879 17835.16432783 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879  9273.2259553  -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  9
xxxxx
x..gx
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7132.53408058  3763.03388409   535.33196404 -3385.12952694]
------
Step:12, Action:South
State  208
Old Q Values:  [ 7132.53408058  3763.03388409   535.33196404 -3385.12952694]
New Q values:  [ 7132.53408058  2750.97674159   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  8
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3066.01276607 -9022.41491635 -7525.7277781   4154.54395986]
------
Step:13, Action:West
State  288
Old Q Values:  [ 3066.01276607 -9022.41491635 -7525.7277781   4154.54395986]
New Q values:  [ 3066.01276607 -9022.41491635 -7525.7277781   6442.95626525]
Reward: 9  Episode Reward:  17
xxxxx
xg. x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 10506.94479943 15919.12893768]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10506.94479943 15919.12893768]
New Q values:  [-2527.46239811 -8521.23367799 10506.94479943 36100.65149812]
Reward: 9  Episode Reward:  26
xxxxx
x.g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[99091.99974348  2256.66526474 12394.68064405  1875.31501677]
------
Step:15, Action:North
State  260
Old Q Values:  [  658.97356535 -8695.4397473   1003.72690283 -2601.74710518]
New Q values:  [ 1504.40217989 -8695.4397473   1003.72690283 -2601.74710518]
Reward: -1  Episode Reward:  25
xxxxx
xg. x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2200.79518731   266.69721195  4138.04251251 -4966.32149798]
------
Step:16, Action:East
State  176
Old Q Values:  [    0.          1327.79507613 75783.37654262     0.        ]
New Q values:  [    0.          1327.79507613 37555.85211252     0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x.. x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.11590444e+03 2.41236717e+04 2.91043938e+03]
------
Step:17, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.11590444e+03 2.41236717e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 4.11590444e+03 1.17886289e+04 2.91043938e+03]
Reward: -1  Episode Reward:  33
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7132.53408058  2750.97674159   535.33196404 -3385.12952694]
------
Step:18, Action:North
State  208
Old Q Values:  [ 7132.53408058  2750.97674159   535.33196404 -3385.12952694]
New Q values:  [31921.52856333  2750.97674159   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  32
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[30127.30102033  3716.87885517  -180.00807518 96897.04977034]
------
Step:19, Action:West
State  128
Old Q Values:  [ 8775.70846068 24033.32294218 -8652.84       89786.52031307]
New Q values:  [ 8775.70846068 24033.32294218 -8652.84       65154.63900523]
Reward: 9  Episode Reward:  41
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[    0.         18809.06432124  6789.02994987 97448.7696    ]
------
Step:20, Action:South
State  112
Old Q Values:  [    0.         18809.06432124  6789.02994987 97448.7696    ]
New Q values:  [    0.         11059.61439394  6789.02994987 97448.7696    ]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.11590444e+03 1.17886289e+04 2.91043938e+03]
------
Step:21, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.11590444e+03 1.17886289e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 4.11590444e+03 1.42913101e+04 2.91043938e+03]
Reward: -1  Episode Reward:  39
xxxxx
x.g x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31921.52856333  2750.97674159   535.33196404 -3385.12952694]
------
Step:22, Action:North
State  208
Old Q Values:  [31921.52856333  2750.97674159   535.33196404 -3385.12952694]
New Q values:  [41837.12635644  2750.97674159   535.33196404 -3385.12952694]
Reward: -1  Episode Reward:  38
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[30127.30102033  3716.87885517  -180.00807518 96897.04977034]
------
Step:23, Action:West
State  128
Old Q Values:  [ 8775.70846068 24033.32294218 -8652.84       65154.63900523]
New Q values:  [ 8775.70846068 24033.32294218 -8652.84       49295.88648209]
Reward: -10001  Episode Reward:  -9963
xxxxx
x.g x
x   x
x   x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 3278.60664912 1373.06166919]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10506.94479943 36100.65149812]
New Q values:  [-2527.46239811 -8521.23367799  6141.06479935 36100.65149812]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3066.01276607 -9022.41491635 -7525.7277781   6442.95626525]
------
Step:2, Action:West
State  288
Old Q Values:  [ 3066.01276607 -9022.41491635 -7525.7277781   6442.95626525]
New Q values:  [ 3066.01276607 -9022.41491635 -7525.7277781  13406.77795553]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6141.06479935 36100.65149812]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6141.06479935 36100.65149812]
New Q values:  [-2527.46239811 -8521.23367799  6141.06479935  8896.98125321]
Reward: -9991  Episode Reward:  -9983
xxxxx
x...x
x.. x
xg  x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 2426.41572332 6802.12277303 2546.60363946]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -6.70590551e+03  2.39187867e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -6.70590551e+03  1.22569104e+04  1.20371620e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.94045076e+03 8.94665224e+03 4.36673472e+03 3.52184257e+00]
------
Step:2, Action:South
State  210
Old Q Values:  [5.94045076e+03 8.94665224e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [5.94045076e+03 1.60009428e+03 4.36673472e+03 3.52184257e+00]
Reward: -10001  Episode Reward:  -9992
xxxxx
x...x
x.  x
x..gx
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.94045076e+03 1.60009428e+03 4.36673472e+03 3.52184257e+00]
------
Step:1, Action:North
State  210
Old Q Values:  [5.94045076e+03 1.60009428e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [3.64220465e+03 1.60009428e+03 4.36673472e+03 3.52184257e+00]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        4202.08114977 -180.6         117.8729306 ]
------
Step:2, Action:South
State  138
Old Q Values:  [-180.6        4202.08114977 -180.6         117.8729306 ]
New Q values:  [-180.6        2990.25287544 -180.6         117.8729306 ]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.64220465e+03 1.60009428e+03 4.36673472e+03 3.52184257e+00]
------
Step:3, Action:East
State  208
Old Q Values:  [41837.12635644  2750.97674159   535.33196404 -3385.12952694]
New Q values:  [41837.12635644  2750.97674159 12584.67069255 -3385.12952694]
Reward: -301  Episode Reward:  -293
xxxxx
x.. x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[41837.12635644  2750.97674159 12584.67069255 -3385.12952694]
------
Step:4, Action:North
State  208
Old Q Values:  [41837.12635644  2750.97674159 12584.67069255 -3385.12952694]
New Q values:  [17631.32640521  2750.97674159 12584.67069255 -3385.12952694]
Reward: -1  Episode Reward:  -294
xxxxx
x..ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2990.25287544 -180.6         117.8729306 ]
------
Step:5, Action:South
State  136
Old Q Values:  [ 1117.30787879  9273.2259553  -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879  8998.08830368 -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  -295
xxxxx
x.g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17631.32640521  2750.97674159 12584.67069255 -3385.12952694]
------
Step:6, Action:North
State  208
Old Q Values:  [17631.32640521  2750.97674159 12584.67069255 -3385.12952694]
New Q values:  [ 9751.35705319  2750.97674159 12584.67069255 -3385.12952694]
Reward: -1  Episode Reward:  -296
xxxxx
xg.ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  8998.08830368 -6245.61866138 -5080.29201136]
------
Step:7, Action:South
State  136
Old Q Values:  [ 1117.30787879  8998.08830368 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879  7374.03652924 -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  -297
xxxxx
x.g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9751.35705319  2750.97674159 12584.67069255 -3385.12952694]
------
Step:8, Action:East
State  208
Old Q Values:  [ 9751.35705319  2750.97674159 12584.67069255 -3385.12952694]
New Q values:  [ 9751.35705319  2750.97674159  8628.66948478 -3385.12952694]
Reward: -301  Episode Reward:  -598
xxxxx
xg. x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9751.35705319  2750.97674159  8628.66948478 -3385.12952694]
------
Step:9, Action:North
State  208
Old Q Values:  [ 9751.35705319  2750.97674159  8628.66948478 -3385.12952694]
New Q values:  [ 4797.01868391  2750.97674159  8628.66948478 -3385.12952694]
Reward: -1  Episode Reward:  -599
xxxxx
x..ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2990.25287544 -180.6         117.8729306 ]
------
Step:10, Action:South
State  136
Old Q Values:  [ 1117.30787879  7374.03652924 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879  5537.61545713 -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  -600
xxxxx
xg. x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4797.01868391  2750.97674159  8628.66948478 -3385.12952694]
------
Step:11, Action:East
State  208
Old Q Values:  [ 4797.01868391  2750.97674159  8628.66948478 -3385.12952694]
New Q values:  [ 4797.01868391  2750.97674159  5859.46863935 -3385.12952694]
Reward: -301  Episode Reward:  -901
xxxxx
x.. x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4797.01868391  2750.97674159  5859.46863935 -3385.12952694]
------
Step:12, Action:East
State  210
Old Q Values:  [3.64220465e+03 1.60009428e+03 4.36673472e+03 3.52184257e+00]
New Q values:  [3.64220465e+03 1.60009428e+03 2.87611430e+03 3.52184257e+00]
Reward: -301  Episode Reward:  -1202
xxxxx
x.. x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.64220465e+03 1.60009428e+03 2.87611430e+03 3.52184257e+00]
------
Step:13, Action:North
State  210
Old Q Values:  [3.64220465e+03 1.60009428e+03 2.87611430e+03 3.52184257e+00]
New Q values:  [2353.35772269 1600.09428144 2876.11430292    3.52184257]
Reward: -1  Episode Reward:  -1203
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2990.25287544 -180.6         117.8729306 ]
------
Step:14, Action:South
State  138
Old Q Values:  [-180.6        2990.25287544 -180.6         117.8729306 ]
New Q values:  [-180.6        2953.34174198 -180.6         117.8729306 ]
Reward: -1  Episode Reward:  -1204
xxxxx
x.. x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4797.01868391  2750.97674159  5859.46863935 -3385.12952694]
------
Step:15, Action:East
State  208
Old Q Values:  [ 4797.01868391  2750.97674159  5859.46863935 -3385.12952694]
New Q values:  [ 4797.01868391  2750.97674159  3921.02804754 -3385.12952694]
Reward: -301  Episode Reward:  -1505
xxxxx
x.. x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4797.01868391  2750.97674159  3921.02804754 -3385.12952694]
------
Step:16, Action:North
State  210
Old Q Values:  [2353.35772269 1600.09428144 2876.11430292    3.52184257]
New Q values:  [1826.74561167 1600.09428144 2876.11430292    3.52184257]
Reward: -1  Episode Reward:  -1506
xxxxx
x..ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2953.34174198 -180.6         117.8729306 ]
------
Step:17, Action:South
State  138
Old Q Values:  [-180.6        2953.34174198 -180.6         117.8729306 ]
New Q values:  [-180.6        2619.84230197 -180.6         117.8729306 ]
Reward: -1  Episode Reward:  -1507
xxxxx
x.. x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4797.01868391  2750.97674159  3921.02804754 -3385.12952694]
------
Step:18, Action:North
State  210
Old Q Values:  [1826.74561167 1600.09428144 2876.11430292    3.52184257]
New Q values:  [1516.05093526 1600.09428144 2876.11430292    3.52184257]
Reward: -1  Episode Reward:  -1508
xxxxx
x..ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        2619.84230197 -180.6         117.8729306 ]
------
Step:19, Action:South
State  138
Old Q Values:  [-180.6        2619.84230197 -180.6         117.8729306 ]
New Q values:  [-180.6        1910.17121166 -180.6         117.8729306 ]
Reward: -1  Episode Reward:  -1509
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1516.05093526 1600.09428144 2876.11430292    3.52184257]
------
Step:20, Action:East
State  210
Old Q Values:  [1516.05093526 1600.09428144 2876.11430292    3.52184257]
New Q values:  [1516.05093526 1600.09428144 1832.68001204    3.52184257]
Reward: -301  Episode Reward:  -1810
xxxxx
x.. x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1516.05093526 1600.09428144 1832.68001204    3.52184257]
------
Step:21, Action:East
State  210
Old Q Values:  [1516.05093526 1600.09428144 1832.68001204    3.52184257]
New Q values:  [1516.05093526 1600.09428144 1102.27600843    3.52184257]
Reward: -301  Episode Reward:  -2111
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1516.05093526 1600.09428144 1102.27600843    3.52184257]
------
Step:22, Action:South
State  208
Old Q Values:  [ 4797.01868391  2750.97674159  3921.02804754 -3385.12952694]
New Q values:  [ 4797.01868391  5121.8240833   3921.02804754 -3385.12952694]
Reward: -1  Episode Reward:  -2112
xxxxx
x.. x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3066.01276607 -9022.41491635 -7525.7277781  13406.77795553]
------
Step:23, Action:West
State  288
Old Q Values:  [ 3066.01276607 -9022.41491635 -7525.7277781  13406.77795553]
New Q values:  [ 3066.01276607 -9022.41491635 -7525.7277781   6351.69317695]
Reward: 9  Episode Reward:  -2103
xxxxx
x.. x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 3278.60664912 1373.06166919]
------
Step:24, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 3278.60664912 1373.06166919]
New Q values:  [  37.74111519 -168.92307549 3216.35061273 1373.06166919]
Reward: -1  Episode Reward:  -2104
xxxxx
x..gx
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3066.01276607 -9022.41491635 -7525.7277781   6351.69317695]
------
Step:25, Action:West
State  288
Old Q Values:  [ 3066.01276607 -9022.41491635 -7525.7277781   6351.69317695]
New Q values:  [ 3066.01276607 -9022.41491635 -7525.7277781   3504.9824546 ]
Reward: -1  Episode Reward:  -2105
xxxxx
x.. x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 3216.35061273 1373.06166919]
------
Step:26, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 3216.35061273 1373.06166919]
New Q values:  [   37.74111519  -168.92307549 -3662.56501853  1373.06166919]
Reward: -10001  Episode Reward:  -12106
xxxxx
x.. x
x.. x
x. gx
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2452.59180422   69.34437687 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6        2594.84003912    5.4           0.        ]
New Q values:  [-180.6       2798.0215515    5.4          0.       ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1297.25997987   15.18059333 5848.95178617 -180.6       ]
------
Step:2, Action:East
State  181
Old Q Values:  [1297.25997987   15.18059333 5848.95178617 -180.6       ]
New Q values:  [1297.25997987   15.18059333 3429.75596928 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x .gx
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831 13458.10471814   450.62327432  1909.41710691]
New Q values:  [-5922.26708831  5800.56038801   450.62327432  1909.41710691]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -3662.56501853  1373.06166919]
------
Step:4, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1427.88351859 1383.10673521]
New Q values:  [  16.82637525  495.22830495 1427.88351859 1245.34062374]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2288.99309885  -40.34168621  999.63796487  -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [2288.99309885  -40.34168621  999.63796487  -35.88578819]
New Q values:  [1943.92403032  -40.34168621  999.63796487  -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1297.25997987   15.18059333 3429.75596928 -180.6       ]
------
Step:6, Action:East
State  183
Old Q Values:  [ 1047.6630518   1654.71634746 14028.69700268     0.        ]
New Q values:  [1047.6630518  1654.71634746 6043.44871652    0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  22.48535485 1441.89971818  549.89931413  753.62201984]
------
Step:7, Action:South
State  198
Old Q Values:  [-2.78872080e-01 -2.56859888e+03  1.26155064e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -6.59967450e+03  1.26155064e+04  0.00000000e+00]
Reward: -10001  Episode Reward:  -9977
xxxxx
x ..x
x  .x
x g.x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3066.01276607 -9022.41491635 -7525.7277781   3504.9824546 ]
------
Step:1, Action:West
State  288
Old Q Values:  [ 3066.01276607 -9022.41491635 -7525.7277781   3504.9824546 ]
New Q values:  [ 3066.01276607 -9022.41491635 -7525.7277781   1819.3114826 ]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -3662.56501853  1373.06166919]
------
Step:2, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -3662.56501853  1373.06166919]
New Q values:  [   37.74111519  -168.92307549 -3662.56501853  1137.80187677]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1943.92403032  -40.34168621  999.63796487  -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [1943.92403032  -40.34168621  999.63796487  -35.88578819]
New Q values:  [1811.89640291  -40.34168621  999.63796487  -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1297.25997987   15.18059333 3429.75596928 -180.6       ]
------
Step:4, Action:East
State  181
Old Q Values:  [1297.25997987   15.18059333 3429.75596928 -180.6       ]
New Q values:  [1297.25997987   15.18059333 3117.47050412 -180.6       ]
Reward: 9  Episode Reward:  36
xxxxx
x..gx
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5800.56038801   450.62327432  1909.41710691]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 4.11590444e+03 1.42913101e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 4.31485615e+03 1.42913101e+04 2.91043938e+03]
Reward: -1  Episode Reward:  35
xxxxx
x.g x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6141.06479935  8896.98125321]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6141.06479935  8896.98125321]
New Q values:  [-2527.46239811 -8521.23367799  6141.06479935  4009.51315525]
Reward: -1  Episode Reward:  34
xxxxx
xg. x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1504.40217989 -8695.4397473   1003.72690283 -2601.74710518]
------
Step:7, Action:North
State  257
Old Q Values:  [99091.99974348  2256.66526474 12394.68064405  1875.31501677]
New Q values:  [59683.31532681  2256.66526474 12394.68064405  1875.31501677]
Reward: -1  Episode Reward:  33
xxxxx
x.g x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 49262.07387846     0.        ]
------
Step:8, Action:North
State  181
Old Q Values:  [1297.25997987   15.18059333 3117.47050412 -180.6       ]
New Q values:  [ 528.75331523   15.18059333 3117.47050412 -180.6       ]
Reward: 9  Episode Reward:  42
xxxxx
xa.gx
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[ 0.         14.83107761  0.          0.        ]
------
Step:9, Action:South
State  103
Old Q Values:  [-180.6       2798.0215515    5.4          0.       ]
New Q values:  [-180.6        2053.84977183    5.4           0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x . x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 528.75331523   15.18059333 3117.47050412 -180.6       ]
------
Step:10, Action:East
State  183
Old Q Values:  [1047.6630518  1654.71634746 6043.44871652    0.        ]
New Q values:  [1047.6630518  1654.71634746 4457.41631852    0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x . x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 2426.41572332 6802.12277303 2546.60363946]
------
Step:11, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -6.70590551e+03  1.22569104e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -6.70590551e+03  5.38819243e+03  1.20371620e+03]
Reward: 9  Episode Reward:  49
xxxxx
x . x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1516.05093526 1600.09428144 1102.27600843    3.52184257]
------
Step:12, Action:South
State  210
Old Q Values:  [1516.05093526 1600.09428144 1102.27600843    3.52184257]
New Q values:  [ 1.51605094e+03 -4.44075846e+03  1.10227601e+03  3.52184257e+00]
Reward: -10001  Episode Reward:  -9952
xxxxx
x . x
x   x
x  gx
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1910.17121166 -180.6         117.8729306 ]
------
Step:1, Action:South
State  136
Old Q Values:  [ 1117.30787879  5537.61545713 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879  4752.22744393 -6245.61866138 -5080.29201136]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 9.12617215e+01  8.43927087e+03 -6.17035694e+03  3.96578640e+00]
------
Step:2, Action:South
State  208
Old Q Values:  [ 4797.01868391  5121.8240833   3921.02804754 -3385.12952694]
New Q values:  [ 4797.01868391  2973.93346314  3921.02804754 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3066.01276607 -9022.41491635 -7525.7277781   1819.3114826 ]
------
Step:3, Action:North
State  288
Old Q Values:  [ 3066.01276607 -9022.41491635 -7525.7277781   1819.3114826 ]
New Q values:  [ 2664.9107116  -9022.41491635 -7525.7277781   1819.3114826 ]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4797.01868391  2973.93346314  3921.02804754 -3385.12952694]
------
Step:4, Action:North
State  208
Old Q Values:  [ 4797.01868391  2973.93346314  3921.02804754 -3385.12952694]
New Q values:  [ 3343.87570674  2973.93346314  3921.02804754 -3385.12952694]
Reward: -1  Episode Reward:  16
xxxxx
xg.ax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  4752.22744393 -6245.61866138 -5080.29201136]
------
Step:5, Action:South
State  136
Old Q Values:  [ 1117.30787879  4752.22744393 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879  3076.59939184 -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  15
xxxxx
x.g x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3343.87570674  2973.93346314  3921.02804754 -3385.12952694]
------
Step:6, Action:East
State  208
Old Q Values:  [ 3343.87570674  2973.93346314  3921.02804754 -3385.12952694]
New Q values:  [ 3343.87570674  2973.93346314  2564.11963328 -3385.12952694]
Reward: -301  Episode Reward:  -286
xxxxx
xg. x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3343.87570674  2973.93346314  2564.11963328 -3385.12952694]
------
Step:7, Action:North
State  208
Old Q Values:  [ 3343.87570674  2973.93346314  2564.11963328 -3385.12952694]
New Q values:  [ 2259.93010025  2973.93346314  2564.11963328 -3385.12952694]
Reward: -1  Episode Reward:  -287
xxxxx
x.gax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  3076.59939184 -6245.61866138 -5080.29201136]
------
Step:8, Action:South
State  136
Old Q Values:  [ 1117.30787879  3076.59939184 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879  2122.21979568 -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  -288
xxxxx
xg. x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2259.93010025  2973.93346314  2564.11963328 -3385.12952694]
------
Step:9, Action:South
State  208
Old Q Values:  [ 2259.93010025  2973.93346314  2564.11963328 -3385.12952694]
New Q values:  [ 2259.93010025  1988.44659874  2564.11963328 -3385.12952694]
Reward: -1  Episode Reward:  -289
xxxxx
x.. x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2664.9107116  -9022.41491635 -7525.7277781   1819.3114826 ]
------
Step:10, Action:North
State  288
Old Q Values:  [ 2664.9107116  -9022.41491635 -7525.7277781   1819.3114826 ]
New Q values:  [ 1834.60017462 -9022.41491635 -7525.7277781   1819.3114826 ]
Reward: -1  Episode Reward:  -290
xxxxx
xg. x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2259.93010025  1988.44659874  2564.11963328 -3385.12952694]
------
Step:11, Action:East
State  208
Old Q Values:  [ 2259.93010025  1988.44659874  2564.11963328 -3385.12952694]
New Q values:  [ 2259.93010025  1988.44659874  1614.2837433  -3385.12952694]
Reward: -301  Episode Reward:  -591
xxxxx
x.. x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2259.93010025  1988.44659874  1614.2837433  -3385.12952694]
------
Step:12, Action:North
State  208
Old Q Values:  [ 2259.93010025  1988.44659874  1614.2837433  -3385.12952694]
New Q values:  [ 1540.0379788   1988.44659874  1614.2837433  -3385.12952694]
Reward: -1  Episode Reward:  -592
xxxxx
xg.ax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  2122.21979568 -6245.61866138 -5080.29201136]
------
Step:13, Action:South
State  130
Old Q Values:  [30127.30102033  3716.87885517  -180.00807518 96897.04977034]
New Q values:  [30127.30102033  2082.68552169  -180.00807518 96897.04977034]
Reward: -1  Episode Reward:  -593
xxxxx
x.. x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1540.0379788   1988.44659874  1614.2837433  -3385.12952694]
------
Step:14, Action:South
State  208
Old Q Values:  [ 1540.0379788   1988.44659874  1614.2837433  -3385.12952694]
New Q values:  [ 1540.0379788   1345.15869188  1614.2837433  -3385.12952694]
Reward: -1  Episode Reward:  -594
xxxxx
xg. x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1834.60017462 -9022.41491635 -7525.7277781   1819.3114826 ]
------
Step:15, Action:North
State  288
Old Q Values:  [ 1834.60017462 -9022.41491635 -7525.7277781   1819.3114826 ]
New Q values:  [ 1217.52519284 -9022.41491635 -7525.7277781   1819.3114826 ]
Reward: -1  Episode Reward:  -595
xxxxx
x.g x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1540.0379788   1345.15869188  1614.2837433  -3385.12952694]
------
Step:16, Action:East
State  208
Old Q Values:  [ 1540.0379788   1345.15869188  1614.2837433  -3385.12952694]
New Q values:  [ 1540.0379788   1345.15869188   949.39862031 -3385.12952694]
Reward: -301  Episode Reward:  -896
xxxxx
xg. x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1540.0379788   1345.15869188   949.39862031 -3385.12952694]
------
Step:17, Action:North
State  208
Old Q Values:  [ 1540.0379788   1345.15869188   949.39862031 -3385.12952694]
New Q values:  [29684.53012262  1345.15869188   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -897
xxxxx
x..ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[30127.30102033  2082.68552169  -180.00807518 96897.04977034]
------
Step:18, Action:West
State  138
Old Q Values:  [-180.6        1910.17121166 -180.6         117.8729306 ]
New Q values:  [-180.6        1910.17121166 -180.6         586.23726197]
Reward: 9  Episode Reward:  -888
xxxxx
x.a x
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   1778.96029909    20.22221118]
------
Step:19, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   1778.96029909    20.22221118]
New Q values:  [ -281.736      -9545.4473624   1284.03548313    20.22221118]
Reward: -1  Episode Reward:  -889
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1910.17121166 -180.6         586.23726197]
------
Step:20, Action:South
State  136
Old Q Values:  [ 1117.30787879  2122.21979568 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879  3380.06917935 -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  -890
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 9.12617215e+01  8.43927087e+03 -6.17035694e+03  3.96578640e+00]
------
Step:21, Action:South
State  208
Old Q Values:  [29684.53012262  1345.15869188   949.39862031 -3385.12952694]
New Q values:  [29684.53012262  1083.25692153   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -891
xxxxx
x.g x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1217.52519284 -9022.41491635 -7525.7277781   1819.3114826 ]
------
Step:22, Action:West
State  288
Old Q Values:  [ 1217.52519284 -9022.41491635 -7525.7277781   1819.3114826 ]
New Q values:  [ 1217.52519284 -9022.41491635 -7525.7277781   2575.44403284]
Reward: 9  Episode Reward:  -882
xxxxx
xg  x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6141.06479935  4009.51315525]
------
Step:23, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6141.06479935  4009.51315525]
New Q values:  [-2527.46239811 -8521.23367799  3228.45912959  4009.51315525]
Reward: -1  Episode Reward:  -883
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1217.52519284 -9022.41491635 -7525.7277781   2575.44403284]
------
Step:24, Action:West
State  288
Old Q Values:  [ 1217.52519284 -9022.41491635 -7525.7277781   2575.44403284]
New Q values:  [ 1217.52519284 -9022.41491635 -7525.7277781   2232.43155971]
Reward: -1  Episode Reward:  -884
xxxxx
xg  x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3228.45912959  4009.51315525]
------
Step:25, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3228.45912959  4009.51315525]
New Q values:  [-2527.46239811 -8521.23367799  3228.45912959 19514.19986014]
Reward: 9  Episode Reward:  -875
xxxxx
x.g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[59683.31532681  2256.66526474 12394.68064405  1875.31501677]
------
Step:26, Action:North
State  256
Old Q Values:  [35558.86345191  7580.15833597  5576.40109469   644.94785455]
New Q values:  [25495.70101452  7580.15833597  5576.40109469   644.94785455]
Reward: 9  Episode Reward:  -866
xxxxx
xg  x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NW
[    0.          1327.79507613 37555.85211252     0.        ]
------
Step:27, Action:East
State  176
Old Q Values:  [    0.          1327.79507613 37555.85211252     0.        ]
New Q values:  [    0.          1327.79507613 19309.13388188     0.        ]
Reward: -1  Episode Reward:  -867
xxxxx
x.  x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.31485615e+03 1.42913101e+04 2.91043938e+03]
------
Step:28, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.31485615e+03 1.42913101e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 4.31485615e+03 1.46212831e+04 2.91043938e+03]
Reward: -1  Episode Reward:  -868
xxxxx
xg  x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[29684.53012262  1083.25692153   949.39862031 -3385.12952694]
------
Step:29, Action:North
State  208
Old Q Values:  [29684.53012262  1083.25692153   949.39862031 -3385.12952694]
New Q values:  [26661.97799368  1083.25692153   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -869
xxxxx
x.gax
x   x
x   x
xxxxx
Step:30, Action:North
State  130
Old Q Values:  [30127.30102033  2082.68552169  -180.00807518 96897.04977034]
New Q values:  [40939.43533923  2082.68552169  -180.00807518 96897.04977034]
Reward: -301  Episode Reward:  -1170
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[40939.43533923  2082.68552169  -180.00807518 96897.04977034]
------
Step:31, Action:West
State  128
Old Q Values:  [ 8775.70846068 24033.32294218 -8652.84       49295.88648209]
New Q values:  [ 8775.70846068 24033.32294218 -8652.84       42952.38547284]
Reward: -10001  Episode Reward:  -11171
xxxxx
x.g x
x   x
x   x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.51605094e+03 -4.44075846e+03  1.10227601e+03  3.52184257e+00]
------
Step:1, Action:North
State  208
Old Q Values:  [26661.97799368  1083.25692153   949.39862031 -3385.12952694]
New Q values:  [11243.24256097  1083.25692153   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1910.17121166 -180.6         586.23726197]
------
Step:2, Action:South
State  136
Old Q Values:  [ 1117.30787879  3380.06917935 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879  4724.40044003 -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  8
xxxxx
xg. x
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11243.24256097  1083.25692153   949.39862031 -3385.12952694]
------
Step:3, Action:North
State  208
Old Q Values:  [11243.24256097  1083.25692153   949.39862031 -3385.12952694]
New Q values:  [ 5914.0171564   1083.25692153   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  7
xxxxx
x.gax
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  4724.40044003 -6245.61866138 -5080.29201136]
------
Step:4, Action:South
State  136
Old Q Values:  [ 1117.30787879  4724.40044003 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879  3663.36532293 -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  6
xxxxx
xg. x
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5914.0171564   1083.25692153   949.39862031 -3385.12952694]
------
Step:5, Action:North
State  208
Old Q Values:  [ 5914.0171564   1083.25692153   949.39862031 -3385.12952694]
New Q values:  [ 2938.05822606  1083.25692153   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1910.17121166 -180.6         586.23726197]
------
Step:6, Action:South
State  138
Old Q Values:  [-180.6        1910.17121166 -180.6         586.23726197]
New Q values:  [-180.6        1218.28376524 -180.6         586.23726197]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.51605094e+03 -4.44075846e+03  1.10227601e+03  3.52184257e+00]
------
Step:7, Action:North
State  208
Old Q Values:  [ 2938.05822606  1083.25692153   949.39862031 -3385.12952694]
New Q values:  [ 1540.10842     1083.25692153   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  3
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6        1218.28376524 -180.6         586.23726197]
------
Step:8, Action:South
State  138
Old Q Values:  [-180.6        1218.28376524 -180.6         586.23726197]
New Q values:  [-180.6         941.52878667 -180.6         586.23726197]
Reward: -1  Episode Reward:  2
xxxxx
x.. x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.51605094e+03 -4.44075846e+03  1.10227601e+03  3.52184257e+00]
------
Step:9, Action:North
State  208
Old Q Values:  [ 1540.10842     1083.25692153   949.39862031 -3385.12952694]
New Q values:  [  897.902004    1083.25692153   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  1
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         941.52878667 -180.6         586.23726197]
------
Step:10, Action:South
State  138
Old Q Values:  [-180.6         941.52878667 -180.6         586.23726197]
New Q values:  [-180.6         830.82679525 -180.6         586.23726197]
Reward: -1  Episode Reward:  0
xxxxx
x.. x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.51605094e+03 -4.44075846e+03  1.10227601e+03  3.52184257e+00]
------
Step:11, Action:North
State  208
Old Q Values:  [  897.902004    1083.25692153   949.39862031 -3385.12952694]
New Q values:  [  607.80884017  1083.25692153   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -1
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         830.82679525 -180.6         586.23726197]
------
Step:12, Action:South
State  138
Old Q Values:  [-180.6         830.82679525 -180.6         586.23726197]
New Q values:  [-180.6         656.70779456 -180.6         586.23726197]
Reward: -1  Episode Reward:  -2
xxxxx
x.. x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  607.80884017  1083.25692153   949.39862031 -3385.12952694]
------
Step:13, Action:South
State  208
Old Q Values:  [  607.80884017  1083.25692153   949.39862031 -3385.12952694]
New Q values:  [  607.80884017  1108.43223653   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  7
xxxxx
x.. x
xg. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1217.52519284 -9022.41491635 -7525.7277781   2232.43155971]
------
Step:14, Action:West
State  288
Old Q Values:  [ 1217.52519284 -9022.41491635 -7525.7277781   2232.43155971]
New Q values:  [ 1217.52519284 -9022.41491635 -7525.7277781   6752.63258193]
Reward: 9  Episode Reward:  16
xxxxx
x.. x
x.. x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3228.45912959 19514.19986014]
------
Step:15, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3228.45912959 19514.19986014]
New Q values:  [-2527.46239811 -8521.23367799  3316.57342642 19514.19986014]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1217.52519284 -9022.41491635 -7525.7277781   6752.63258193]
------
Step:16, Action:North
State  288
Old Q Values:  [ 1217.52519284 -9022.41491635 -7525.7277781   6752.63258193]
New Q values:  [  941.22535771 -9022.41491635 -7525.7277781   6752.63258193]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.51605094e+03 -4.44075846e+03  1.10227601e+03  3.52184257e+00]
------
Step:17, Action:North
State  208
Old Q Values:  [  607.80884017  1108.43223653   949.39862031 -3385.12952694]
New Q values:  [29311.63846717  1108.43223653   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  13
xxxxx
x..ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[40939.43533923  2082.68552169  -180.00807518 96897.04977034]
------
Step:18, Action:West
State  130
Old Q Values:  [40939.43533923  2082.68552169  -180.00807518 96897.04977034]
New Q values:  [40939.43533923  2082.68552169  -180.00807518 74382.58177601]
Reward: 9  Episode Reward:  22
xxxxx
x.a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  47000.60902691 118727.87289289]
------
Step:19, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1294.91470007 -2122.74105737]
New Q values:  [-9594.56523706 -8069.05606225  1294.91470007  -841.67946252]
Reward: 9  Episode Reward:  31
xxxxx
xag x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6            6.72320144 -5974.07473851     0.        ]
------
Step:20, Action:South
State  99
Old Q Values:  [    0.         49063.04920959 59221.22700813     0.        ]
New Q values:  [    0.         39677.73511325 59221.22700813     0.        ]
Reward: 9  Episode Reward:  40
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 49262.07387846     0.        ]
------
Step:21, Action:North
State  180
Old Q Values:  [-2200.79518731   266.69721195  4138.04251251 -4966.32149798]
New Q values:  [ -874.30797323   266.69721195  4138.04251251 -4966.32149798]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684    22.03367232  -180.6       ]
------
Step:22, Action:East
State  99
Old Q Values:  [    0.         39677.73511325 59221.22700813     0.        ]
New Q values:  [    0.         39677.73511325 59306.25267112     0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  47000.60902691 118727.87289289]
------
Step:23, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624   1284.03548313    20.22221118]
New Q values:  [ -281.736      -9545.4473624   1284.03548313    14.09898617]
Reward: -1  Episode Reward:  37
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684    22.03367232  -180.6       ]
------
Step:24, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684    22.03367232  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   393.42411387  -180.6       ]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   1284.03548313    14.09898617]
------
Step:25, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   1284.03548313    14.09898617]
New Q values:  [ -281.736      -9545.4473624    710.02653162    14.09898617]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         656.70779456 -180.6         586.23726197]
------
Step:26, Action:South
State  136
Old Q Values:  [ 1117.30787879  3663.36532293 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879 10258.23766932 -6245.61866138 -5080.29201136]
Reward: -1  Episode Reward:  34
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[29311.63846717  1108.43223653   949.39862031 -3385.12952694]
------
Step:27, Action:North
State  208
Old Q Values:  [29311.63846717  1108.43223653   949.39862031 -3385.12952694]
New Q values:  [11921.06772524  1108.43223653   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         656.70779456 -180.6         586.23726197]
------
Step:28, Action:South
State  138
Old Q Values:  [-180.6         656.70779456 -180.6         586.23726197]
New Q values:  [-180.6         716.8983984  -180.6         586.23726197]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1.51605094e+03 -4.44075846e+03  1.10227601e+03  3.52184257e+00]
------
Step:29, Action:North
State  210
Old Q Values:  [ 1.51605094e+03 -4.44075846e+03  1.10227601e+03  3.52184257e+00]
New Q values:  [ 8.20889894e+02 -4.44075846e+03  1.10227601e+03  3.52184257e+00]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         716.8983984  -180.6         586.23726197]
------
Step:30, Action:South
State  138
Old Q Values:  [-180.6         716.8983984  -180.6         586.23726197]
New Q values:  [-180.6         616.84216189 -180.6         586.23726197]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 8.20889894e+02 -4.44075846e+03  1.10227601e+03  3.52184257e+00]
------
Step:31, Action:East
State  210
Old Q Values:  [ 8.20889894e+02 -4.44075846e+03  1.10227601e+03  3.52184257e+00]
New Q values:  [ 8.20889894e+02 -4.44075846e+03  5.90993206e+02  3.52184257e+00]
Reward: -301  Episode Reward:  -271
xxxxx
x   x
x .ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 8.20889894e+02 -4.44075846e+03  5.90993206e+02  3.52184257e+00]
------
Step:32, Action:North
State  208
Old Q Values:  [11921.06772524  1108.43223653   949.39862031 -3385.12952694]
New Q values:  [27082.6016229   1108.43223653   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -272
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[40939.43533923  2082.68552169  -180.00807518 74382.58177601]
------
Step:33, Action:West
State  136
Old Q Values:  [ 1117.30787879 10258.23766932 -6245.61866138 -5080.29201136]
New Q values:  [ 1117.30787879 10258.23766932 -6245.61866138 -7644.24239453]
Reward: -10001  Episode Reward:  -10273
xxxxx
x g x
x . x
x   x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-180.6         616.84216189 -180.6         586.23726197]
------
Step:1, Action:South
State  138
Old Q Values:  [-180.6         616.84216189 -180.6         586.23726197]
New Q values:  [-180.6         498.40383284 -180.6         586.23726197]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
x.g x
xxxxx
Step:2, Action:North
State  210
Old Q Values:  [ 8.20889894e+02 -4.44075846e+03  5.90993206e+02  3.52184257e+00]
New Q values:  [ 5.03627136e+02 -4.44075846e+03  5.90993206e+02  3.52184257e+00]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.. x
xg. x
xxxxx
Step:3, Action:North
State  138
Old Q Values:  [-180.6         498.40383284 -180.6         586.23726197]
New Q values:  [ -76.96882141  498.40383284 -180.6         586.23726197]
Reward: -301  Episode Reward:  -293
xxxxx
x..ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141  498.40383284 -180.6         586.23726197]
------
Step:4, Action:West
State  136
Old Q Values:  [ 1117.30787879 10258.23766932 -6245.61866138 -7644.24239453]
New Q values:  [ 1117.30787879 10258.23766932 -6245.61866138 -2663.82254779]
Reward: 9  Episode Reward:  -284
xxxxx
xga x
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1294.91470007  -841.67946252]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    710.02653162    14.09898617]
New Q values:  [ -281.736      -9545.4473624    459.28179124    14.09898617]
Reward: -1  Episode Reward:  -285
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141  498.40383284 -180.6         586.23726197]
------
Step:6, Action:West
State  138
Old Q Values:  [ -76.96882141  498.40383284 -180.6         586.23726197]
New Q values:  [ -76.96882141  498.40383284 -180.6         371.67944216]
Reward: -1  Episode Reward:  -286
xxxxx
x.a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    459.28179124    14.09898617]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    459.28179124    14.09898617]
New Q values:  [ -281.736      -9545.4473624    332.63386635    14.09898617]
Reward: -1  Episode Reward:  -287
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141  498.40383284 -180.6         371.67944216]
------
Step:8, Action:South
State  138
Old Q Values:  [ -76.96882141  498.40383284 -180.6         371.67944216]
New Q values:  [ -76.96882141 8323.54202001 -180.6         371.67944216]
Reward: -1  Episode Reward:  -288
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27082.6016229   1108.43223653   949.39862031 -3385.12952694]
------
Step:9, Action:North
State  210
Old Q Values:  [ 5.03627136e+02 -4.44075846e+03  5.90993206e+02  3.52184257e+00]
New Q values:  [ 2.69791346e+03 -4.44075846e+03  5.90993206e+02  3.52184257e+00]
Reward: -1  Episode Reward:  -289
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 8323.54202001 -180.6         371.67944216]
------
Step:10, Action:South
State  138
Old Q Values:  [ -76.96882141 8323.54202001 -180.6         371.67944216]
New Q values:  [  -76.96882141 11453.59729487  -180.6          371.67944216]
Reward: -1  Episode Reward:  -290
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27082.6016229   1108.43223653   949.39862031 -3385.12952694]
------
Step:11, Action:North
State  208
Old Q Values:  [27082.6016229   1108.43223653   949.39862031 -3385.12952694]
New Q values:  [14268.51983762  1108.43223653   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -291
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  -76.96882141 11453.59729487  -180.6          371.67944216]
------
Step:12, Action:West
State  138
Old Q Values:  [  -76.96882141 11453.59729487  -180.6          371.67944216]
New Q values:  [  -76.96882141 11453.59729487  -180.6          193.81634308]
Reward: -1  Episode Reward:  -292
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    56.02176457   152.48188739]
------
Step:13, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    56.02176457   152.48188739]
New Q values:  [ -253.44886264 -1902.20915811    56.02176457    86.98981138]
Reward: 9  Episode Reward:  -283
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   68.65685476 -252.78192178]
------
Step:14, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   68.65685476 -252.78192178]
New Q values:  [-252.35169558    7.11267516  126.65290181 -252.78192178]
Reward: -1  Episode Reward:  -284
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    332.63386635    14.09898617]
------
Step:15, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    332.63386635    14.09898617]
New Q values:  [ -281.736      -9545.4473624   3568.532735      14.09898617]
Reward: -1  Episode Reward:  -285
xxxxx
x  ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  -76.96882141 11453.59729487  -180.6          193.81634308]
------
Step:16, Action:South
State  136
Old Q Values:  [ 1117.30787879 10258.23766932 -6245.61866138 -2663.82254779]
New Q values:  [ 1117.30787879  8383.25101902 -6245.61866138 -2663.82254779]
Reward: -1  Episode Reward:  -286
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14268.51983762  1108.43223653   949.39862031 -3385.12952694]
------
Step:17, Action:North
State  216
Old Q Values:  [ 9.12617215e+01  8.43927087e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 3.47198388e+03  8.43927087e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  -287
xxxxx
x  ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  -76.96882141 11453.59729487  -180.6          193.81634308]
------
Step:18, Action:South
State  138
Old Q Values:  [  -76.96882141 11453.59729487  -180.6          193.81634308]
New Q values:  [ -76.96882141 7112.62017903 -180.6         193.81634308]
Reward: -1  Episode Reward:  -288
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.47198388e+03  8.43927087e+03 -6.17035694e+03  3.96578640e+00]
------
Step:19, Action:South
State  210
Old Q Values:  [ 2.69791346e+03 -4.44075846e+03  5.90993206e+02  3.52184257e+00]
New Q values:  [2697.91346042  248.88639154  590.9932059     3.52184257]
Reward: -1  Episode Reward:  -289
xxxxx
x   x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  941.22535771 -9022.41491635 -7525.7277781   6752.63258193]
------
Step:20, Action:North
State  288
Old Q Values:  [  941.22535771 -9022.41491635 -7525.7277781   6752.63258193]
New Q values:  [ 1185.26418121 -9022.41491635 -7525.7277781   6752.63258193]
Reward: -1  Episode Reward:  -290
xxxxx
x   x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2697.91346042  248.88639154  590.9932059     3.52184257]
------
Step:21, Action:North
State  208
Old Q Values:  [14268.51983762  1108.43223653   949.39862031 -3385.12952694]
New Q values:  [ 7840.59398876  1108.43223653   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -291
xxxxx
x  ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 7112.62017903 -180.6         193.81634308]
------
Step:22, Action:West
State  138
Old Q Values:  [ -76.96882141 7112.62017903 -180.6         193.81634308]
New Q values:  [ -76.96882141 7112.62017903 -180.6         103.02348065]
Reward: -1  Episode Reward:  -292
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    56.02176457    86.98981138]
------
Step:23, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    56.02176457    86.98981138]
New Q values:  [ -253.44886264 -1902.20915811    56.02176457    72.1917951 ]
Reward: -1  Episode Reward:  -293
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516  126.65290181 -252.78192178]
------
Step:24, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516  126.65290181 -252.78192178]
New Q values:  [-252.35169558    7.11267516   71.71869925 -252.78192178]
Reward: -1  Episode Reward:  -294
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    56.02176457    72.1917951 ]
------
Step:25, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    56.02176457    72.1917951 ]
New Q values:  [ -253.44886264 -1902.20915811    56.02176457    49.79232781]
Reward: -1  Episode Reward:  -295
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   71.71869925 -252.78192178]
------
Step:26, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   71.71869925 -252.78192178]
New Q values:  [-252.35169558    7.11267516   44.89400907 -252.78192178]
Reward: -1  Episode Reward:  -296
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    56.02176457    49.79232781]
------
Step:27, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   3568.532735      14.09898617]
New Q values:  [ -281.736      -9545.4473624   3560.59914771    14.09898617]
Reward: -1  Episode Reward:  -297
xxxxx
x  ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 7112.62017903 -180.6         103.02348065]
------
Step:28, Action:South
State  138
Old Q Values:  [ -76.96882141 7112.62017903 -180.6         103.02348065]
New Q values:  [ -76.96882141 5376.22933269 -180.6         103.02348065]
Reward: -1  Episode Reward:  -298
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.47198388e+03  8.43927087e+03 -6.17035694e+03  3.96578640e+00]
------
Step:29, Action:South
State  208
Old Q Values:  [ 7840.59398876  1108.43223653   949.39862031 -3385.12952694]
New Q values:  [ 7840.59398876  2468.56266919   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -299
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1185.26418121 -9022.41491635 -7525.7277781   6752.63258193]
------
Step:30, Action:West
State  288
Old Q Values:  [ 1185.26418121 -9022.41491635 -7525.7277781   6752.63258193]
New Q values:  [ 1185.26418121 -9022.41491635 -7525.7277781   8560.71299081]
Reward: 9  Episode Reward:  -290
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3316.57342642 19514.19986014]
------
Step:31, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3316.57342642 19514.19986014]
New Q values:  [-2527.46239811 -8521.23367799  3316.57342642 25716.0745421 ]
Reward: 9  Episode Reward:  -281
xxxxx
x g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[59683.31532681  2256.66526474 12394.68064405  1875.31501677]
------
Step:32, Action:North
State  261
Old Q Values:  [1811.89640291  -40.34168621  999.63796487  -35.88578819]
New Q values:  [1665.3997124   -40.34168621  999.63796487  -35.88578819]
Reward: 9  Episode Reward:  -272
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 528.75331523   15.18059333 3117.47050412 -180.6       ]
------
Step:33, Action:East
State  177
Old Q Values:  [66823.71809805  4025.17604709 49262.07387846     0.        ]
New Q values:  [66823.71809805  4025.17604709 84096.61447717     0.        ]
Reward: 100009  Episode Reward:  99737
xxxxx
x g x
x a x
x   x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1185.26418121 -9022.41491635 -7525.7277781   8560.71299081]
------
Step:1, Action:West
State  288
Old Q Values:  [ 1185.26418121 -9022.41491635 -7525.7277781   8560.71299081]
New Q values:  [ 1185.26418121 -9022.41491635 -7525.7277781  11144.50755896]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3316.57342642 25716.0745421 ]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3316.57342642 25716.0745421 ]
New Q values:  [-2527.46239811 -8521.23367799  3316.57342642  4743.15047081]
Reward: -9991  Episode Reward:  -9982
xxxxx
x...x
x. .x
xg  x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2452.59180422   69.34437687 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6        2053.84977183    5.4           0.        ]
New Q values:  [-180.6        2164.16480429    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1047.6630518  1654.71634746 4457.41631852    0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [1047.6630518  1654.71634746 4457.41631852    0.        ]
New Q values:  [1047.6630518  1654.71634746 5573.0184363     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -6.59967450e+03  1.26155064e+04  0.00000000e+00]
------
Step:3, Action:East
State  195
Old Q Values:  [  38.85388605 2426.41572332 6802.12277303 2546.60363946]
New Q values:  [  38.85388605 2426.41572332 3535.62314734 2546.60363946]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2697.91346042  248.88639154  590.9932059     3.52184257]
------
Step:4, Action:North
State  218
Old Q Values:  [  50.49262346 4760.18608796    0.          429.03841886]
New Q values:  [1638.46584919 4760.18608796    0.          429.03841886]
Reward: 9  Episode Reward:  36
xxxxx
x .ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 5376.22933269 -180.6         103.02348065]
------
Step:5, Action:South
State  130
Old Q Values:  [40939.43533923  2082.68552169  -180.00807518 74382.58177601]
New Q values:  [40939.43533923  1641.8482468   -180.00807518 74382.58177601]
Reward: -1  Episode Reward:  35
xxxxx
x . x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2697.91346042  248.88639154  590.9932059     3.52184257]
------
Step:6, Action:North
State  216
Old Q Values:  [ 3.47198388e+03  8.43927087e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 3.00106235e+03  8.43927087e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  34
xxxxx
x .ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 5376.22933269 -180.6         103.02348065]
------
Step:7, Action:South
State  136
Old Q Values:  [ 1117.30787879  8383.25101902 -6245.61866138 -2663.82254779]
New Q values:  [ 1117.30787879  5884.48166869 -6245.61866138 -2663.82254779]
Reward: -1  Episode Reward:  33
xxxxx
xg. x
x  ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.00106235e+03  8.43927087e+03 -6.17035694e+03  3.96578640e+00]
------
Step:8, Action:South
State  216
Old Q Values:  [ 3.00106235e+03  8.43927087e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 3.00106235e+03  6.72446062e+03 -6.17035694e+03  3.96578640e+00]
Reward: 9  Episode Reward:  42
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1185.26418121 -9022.41491635 -7525.7277781  11144.50755896]
------
Step:9, Action:West
State  288
Old Q Values:  [ 1185.26418121 -9022.41491635 -7525.7277781  11144.50755896]
New Q values:  [ 1185.26418121 -9022.41491635 -7525.7277781   5880.14816482]
Reward: -1  Episode Reward:  41
xxxxx
x . x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3316.57342642  4743.15047081]
------
Step:10, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3316.57342642  4743.15047081]
New Q values:  [-2527.46239811 -8521.23367799  3316.57342642  2402.28010204]
Reward: 9  Episode Reward:  50
xxxxx
x . x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1665.3997124   -40.34168621  999.63796487  -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [1665.3997124   -40.34168621  999.63796487  -35.88578819]
New Q values:  [2337.46541585  -40.34168621  999.63796487  -35.88578819]
Reward: -1  Episode Reward:  49
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1047.6630518  1654.71634746 5573.0184363     0.        ]
------
Step:12, Action:East
State  183
Old Q Values:  [1047.6630518  1654.71634746 5573.0184363     0.        ]
New Q values:  [1047.6630518  1654.71634746 3845.06510291    0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x . x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -6.70590551e+03  5.38819243e+03  1.20371620e+03]
------
Step:13, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -6.70590551e+03  5.38819243e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -6.70590551e+03  2.96405101e+03  1.20371620e+03]
Reward: -1  Episode Reward:  47
xxxxx
x . x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2697.91346042  248.88639154  590.9932059     3.52184257]
------
Step:14, Action:North
State  210
Old Q Values:  [2697.91346042  248.88639154  590.9932059     3.52184257]
New Q values:  [2.33933399e+04 2.48886392e+02 5.90993206e+02 3.52184257e+00]
Reward: -1  Episode Reward:  46
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[40939.43533923  1641.8482468   -180.00807518 74382.58177601]
------
Step:15, Action:West
State  130
Old Q Values:  [40939.43533923  1641.8482468   -180.00807518 74382.58177601]
New Q values:  [ 40939.43533923   1641.8482468    -180.00807518 125376.79457827]
Reward: 100009  Episode Reward:  100055
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 2426.41572332 3535.62314734 2546.60363946]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -6.70590551e+03  2.96405101e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -6.70590551e+03  8.20902238e+03  1.20371620e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.33933399e+04 2.48886392e+02 5.90993206e+02 3.52184257e+00]
------
Step:2, Action:North
State  210
Old Q Values:  [2.33933399e+04 2.48886392e+02 5.90993206e+02 3.52184257e+00]
New Q values:  [4.69757743e+04 2.48886392e+02 5.90993206e+02 3.52184257e+00]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 40939.43533923   1641.8482468    -180.00807518 125376.79457827]
------
Step:3, Action:West
State  138
Old Q Values:  [ -76.96882141 5376.22933269 -180.6         103.02348065]
New Q values:  [ -76.96882141 5376.22933269 -180.6        1114.78913657]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   3560.59914771    14.09898617]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   3560.59914771    14.09898617]
New Q values:  [ -281.736      -9545.4473624   3036.50845889    14.09898617]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 5376.22933269 -180.6        1114.78913657]
------
Step:5, Action:South
State  138
Old Q Values:  [ -76.96882141 5376.22933269 -180.6        1114.78913657]
New Q values:  [  -76.96882141 16242.62403516  -180.6         1114.78913657]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.69757743e+04 2.48886392e+02 5.90993206e+02 3.52184257e+00]
------
Step:6, Action:North
State  210
Old Q Values:  [4.69757743e+04 2.48886392e+02 5.90993206e+02 3.52184257e+00]
New Q values:  [2.36624969e+04 2.48886392e+02 5.90993206e+02 3.52184257e+00]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  -76.96882141 16242.62403516  -180.6         1114.78913657]
------
Step:7, Action:South
State  138
Old Q Values:  [  -76.96882141 16242.62403516  -180.6         1114.78913657]
New Q values:  [  -76.96882141 13595.19869806  -180.6         1114.78913657]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.36624969e+04 2.48886392e+02 5.90993206e+02 3.52184257e+00]
------
Step:8, Action:North
State  210
Old Q Values:  [2.36624969e+04 2.48886392e+02 5.90993206e+02 3.52184257e+00]
New Q values:  [1.35429584e+04 2.48886392e+02 5.90993206e+02 3.52184257e+00]
Reward: -1  Episode Reward:  22
xxxxx
x. ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  -76.96882141 13595.19869806  -180.6         1114.78913657]
------
Step:9, Action:South
State  138
Old Q Values:  [  -76.96882141 13595.19869806  -180.6         1114.78913657]
New Q values:  [ -76.96882141 9500.36699565 -180.6        1114.78913657]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.35429584e+04 2.48886392e+02 5.90993206e+02 3.52184257e+00]
------
Step:10, Action:North
State  210
Old Q Values:  [1.35429584e+04 2.48886392e+02 5.90993206e+02 3.52184257e+00]
New Q values:  [8.26669345e+03 2.48886392e+02 5.90993206e+02 3.52184257e+00]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
x.  x
x..gx
xxxxx
Step:11, Action:West
State  138
Old Q Values:  [ -76.96882141 9500.36699565 -180.6        1114.78913657]
New Q values:  [ -76.96882141 9500.36699565 -180.6        1356.2681923 ]
Reward: -1  Episode Reward:  19
xxxxx
x.a x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   3036.50845889    14.09898617]
------
Step:12, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    56.02176457    49.79232781]
New Q values:  [ -253.44886264 -1902.20915811  2871.91880452    49.79232781]
Reward: -1  Episode Reward:  18
xxxxx
x. ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 9500.36699565 -180.6        1356.2681923 ]
------
Step:13, Action:South
State  138
Old Q Values:  [ -76.96882141 9500.36699565 -180.6        1356.2681923 ]
New Q values:  [ -76.96882141 6279.55483444 -180.6        1356.2681923 ]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.26669345e+03 2.48886392e+02 5.90993206e+02 3.52184257e+00]
------
Step:14, Action:North
State  208
Old Q Values:  [ 7840.59398876  2468.56266919   949.39862031 -3385.12952694]
New Q values:  [ 5019.50404583  2468.56266919   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 6279.55483444 -180.6        1356.2681923 ]
------
Step:15, Action:South
State  138
Old Q Values:  [ -76.96882141 6279.55483444 -180.6        1356.2681923 ]
New Q values:  [ -76.96882141 4991.22996995 -180.6        1356.2681923 ]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.26669345e+03 2.48886392e+02 5.90993206e+02 3.52184257e+00]
------
Step:16, Action:North
State  210
Old Q Values:  [8.26669345e+03 2.48886392e+02 5.90993206e+02 3.52184257e+00]
New Q values:  [4.80344637e+03 2.48886392e+02 5.90993206e+02 3.52184257e+00]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 4991.22996995 -180.6        1356.2681923 ]
------
Step:17, Action:South
State  138
Old Q Values:  [ -76.96882141 4991.22996995 -180.6        1356.2681923 ]
New Q values:  [ -76.96882141 3436.92589975 -180.6        1356.2681923 ]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.80344637e+03 2.48886392e+02 5.90993206e+02 3.52184257e+00]
------
Step:18, Action:North
State  208
Old Q Values:  [ 5019.50404583  2468.56266919   949.39862031 -3385.12952694]
New Q values:  [ 3038.27938826  2468.56266919   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  12
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 3436.92589975 -180.6        1356.2681923 ]
------
Step:19, Action:South
State  138
Old Q Values:  [ -76.96882141 3436.92589975 -180.6        1356.2681923 ]
New Q values:  [ -76.96882141 3391.50854464 -180.6        1356.2681923 ]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.00106235e+03  6.72446062e+03 -6.17035694e+03  3.96578640e+00]
------
Step:20, Action:South
State  216
Old Q Values:  [ 3.00106235e+03  6.72446062e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 3.00106235e+03  4.45322870e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  10
xxxxx
xg  x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1185.26418121 -9022.41491635 -7525.7277781   5880.14816482]
------
Step:21, Action:West
State  288
Old Q Values:  [ 1185.26418121 -9022.41491635 -7525.7277781   5880.14816482]
New Q values:  [ 1185.26418121 -9022.41491635 -7525.7277781   3352.43129385]
Reward: 9  Episode Reward:  19
xxxxx
x.g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3316.57342642  2402.28010204]
------
Step:22, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 -3662.56501853  1137.80187677]
New Q values:  [  37.74111519 -168.92307549 -459.89661925 1137.80187677]
Reward: -1  Episode Reward:  18
xxxxx
x. gx
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1185.26418121 -9022.41491635 -7525.7277781   3352.43129385]
------
Step:23, Action:West
State  288
Old Q Values:  [ 1185.26418121 -9022.41491635 -7525.7277781   3352.43129385]
New Q values:  [ 1185.26418121 -9022.41491635 -7525.7277781   1681.71308057]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -459.89661925 1137.80187677]
------
Step:24, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -459.89661925 1137.80187677]
New Q values:  [   37.74111519  -168.92307549  -459.89661925 18365.51534875]
Reward: 9  Episode Reward:  26
xxxxx
x.  x
x.  x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[59683.31532681  2256.66526474 12394.68064405  1875.31501677]
------
Step:25, Action:North
State  257
Old Q Values:  [59683.31532681  2256.66526474 12394.68064405  1875.31501677]
New Q values:  [49107.71047387  2256.66526474 12394.68064405  1875.31501677]
Reward: 9  Episode Reward:  35
xxxxx
x.  x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 84096.61447717     0.        ]
------
Step:26, Action:East
State  177
Old Q Values:  [66823.71809805  4025.17604709 84096.61447717     0.        ]
New Q values:  [66823.71809805  4025.17604709 35378.21390727     0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x. gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5800.56038801   450.62327432  1909.41710691]
------
Step:27, Action:South
State  193
Old Q Values:  [-5922.26708831  5800.56038801   450.62327432  1909.41710691]
New Q values:  [-5922.26708831  7829.27875983   450.62327432  1909.41710691]
Reward: -1  Episode Reward:  33
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  -459.89661925 18365.51534875]
------
Step:28, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  -459.89661925 18365.51534875]
New Q values:  [   37.74111519  -168.92307549  -459.89661925 22077.91928166]
Reward: -1  Episode Reward:  32
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[49107.71047387  2256.66526474 12394.68064405  1875.31501677]
------
Step:29, Action:North
State  257
Old Q Values:  [49107.71047387  2256.66526474 12394.68064405  1875.31501677]
New Q values:  [39689.59961896  2256.66526474 12394.68064405  1875.31501677]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[66823.71809805  4025.17604709 35378.21390727     0.        ]
------
Step:30, Action:North
State  177
Old Q Values:  [66823.71809805  4025.17604709 35378.21390727     0.        ]
New Q values:  [86734.88723922  4025.17604709 35378.21390727     0.        ]
Reward: 100009  Episode Reward:  100040
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   3036.50845889    14.09898617]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1294.91470007  -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225  2288.71038063  -841.67946252]
Reward: 9  Episode Reward:  9
xxxxx
xg ax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  5884.48166869 -6245.61866138 -2663.82254779]
------
Step:2, Action:South
State  136
Old Q Values:  [ 1117.30787879  5884.48166869 -6245.61866138 -2663.82254779]
New Q values:  [ 1117.30787879  3695.1612762  -6245.61866138 -2663.82254779]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.00106235e+03  4.45322870e+03 -6.17035694e+03  3.96578640e+00]
------
Step:3, Action:South
State  216
Old Q Values:  [ 3.00106235e+03  4.45322870e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 3.00106235e+03  2.29120540e+03 -6.17035694e+03  3.96578640e+00]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1185.26418121 -9022.41491635 -7525.7277781   1681.71308057]
------
Step:4, Action:West
State  288
Old Q Values:  [ 1185.26418121 -9022.41491635 -7525.7277781   1681.71308057]
New Q values:  [ 1185.26418121 -9022.41491635 -7525.7277781   1673.05726015]
Reward: 9  Episode Reward:  36
xxxxx
x.  x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3316.57342642  2402.28010204]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3316.57342642  2402.28010204]
New Q values:  [-2527.46239811 -8521.23367799  1827.94654861  2402.28010204]
Reward: -1  Episode Reward:  35
xxxxx
xg  x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1185.26418121 -9022.41491635 -7525.7277781   1673.05726015]
------
Step:6, Action:West
State  288
Old Q Values:  [ 1185.26418121 -9022.41491635 -7525.7277781   1673.05726015]
New Q values:  [ 1185.26418121 -9022.41491635 -7525.7277781   1389.30693467]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1827.94654861  2402.28010204]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1827.94654861  2402.28010204]
New Q values:  [-2527.46239811 -8521.23367799  1827.94654861  1417.63269479]
Reward: 9  Episode Reward:  43
xxxxx
xg  x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1504.40217989 -8695.4397473   1003.72690283 -2601.74710518]
------
Step:8, Action:North
State  257
Old Q Values:  [39689.59961896  2256.66526474 12394.68064405  1875.31501677]
New Q values:  [41895.70601935  2256.66526474 12394.68064405  1875.31501677]
Reward: -1  Episode Reward:  42
xxxxx
x.g x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[86734.88723922  4025.17604709 35378.21390727     0.        ]
------
Step:9, Action:North
State  180
Old Q Values:  [ -874.30797323   266.69721195  4138.04251251 -4966.32149798]
New Q values:  [-5994.37761603   266.69721195  4138.04251251 -4966.32149798]
Reward: -9991  Episode Reward:  -9949
xxxxx
xg  x
x . x
x   x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2337.46541585  -40.34168621  999.63796487  -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [2337.46541585  -40.34168621  999.63796487  -35.88578819]
New Q values:  [2093.90569721  -40.34168621  999.63796487  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1047.6630518  1654.71634746 3845.06510291    0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [ 528.75331523   15.18059333 3117.47050412 -180.6       ]
New Q values:  [ 528.75331523   15.18059333 3601.1718296  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  7829.27875983   450.62327432  1909.41710691]
------
Step:3, Action:South
State  195
Old Q Values:  [  38.85388605 2426.41572332 3535.62314734 2546.60363946]
New Q values:  [  38.85388605 7593.34207383 3535.62314734 2546.60363946]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  -459.89661925 22077.91928166]
------
Step:4, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  -459.89661925 22077.91928166]
New Q values:  [  37.74111519 -168.92307549 -459.89661925 9458.73942183]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2093.90569721  -40.34168621  999.63796487  -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [2093.90569721  -40.34168621  999.63796487  -35.88578819]
New Q values:  [1990.48180976  -40.34168621  999.63796487  -35.88578819]
Reward: -1  Episode Reward:  15
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1047.6630518  1654.71634746 3845.06510291    0.        ]
------
Step:6, Action:East
State  181
Old Q Values:  [ 528.75331523   15.18059333 3601.1718296  -180.6       ]
New Q values:  [ 528.75331523   15.18059333 3788.65235979 -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  7829.27875983   450.62327432  1909.41710691]
------
Step:7, Action:South
State  195
Old Q Values:  [  38.85388605 7593.34207383 3535.62314734 2546.60363946]
New Q values:  [  38.85388605 5874.35865608 3535.62314734 2546.60363946]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -459.89661925 9458.73942183]
------
Step:8, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -459.89661925 9458.73942183]
New Q values:  [  37.74111519 -168.92307549 -459.89661925 4380.04031166]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1990.48180976  -40.34168621  999.63796487  -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [1990.48180976  -40.34168621  999.63796487  -35.88578819]
New Q values:  [1949.11225477  -40.34168621  999.63796487  -35.88578819]
Reward: -1  Episode Reward:  11
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1047.6630518  1654.71634746 3845.06510291    0.        ]
------
Step:10, Action:East
State  181
Old Q Values:  [ 528.75331523   15.18059333 3788.65235979 -180.6       ]
New Q values:  [ 528.75331523   15.18059333 3863.64457186 -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  7829.27875983   450.62327432  1909.41710691]
------
Step:11, Action:South
State  193
Old Q Values:  [-5922.26708831  7829.27875983   450.62327432  1909.41710691]
New Q values:  [-5922.26708831  4445.12359743   450.62327432  1909.41710691]
Reward: -1  Episode Reward:  9
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -459.89661925 4380.04031166]
------
Step:12, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1427.88351859 1245.34062374]
New Q values:  [  16.82637525  495.22830495 1427.88351859 1082.26992593]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1949.11225477  -40.34168621  999.63796487  -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [1949.11225477  -40.34168621  999.63796487  -35.88578819]
New Q values:  [1938.13827347  -40.34168621  999.63796487  -35.88578819]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 528.75331523   15.18059333 3863.64457186 -180.6       ]
------
Step:14, Action:North
State  181
Old Q Values:  [ 528.75331523   15.18059333 3863.64457186 -180.6       ]
New Q values:  [ 866.15076738   15.18059333 3863.64457186 -180.6       ]
Reward: 9  Episode Reward:  16
xxxxx
xa..x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        2164.16480429    5.4           0.        ]
------
Step:15, Action:South
State  103
Old Q Values:  [-180.6        2164.16480429    5.4           0.        ]
New Q values:  [-180.6        2018.58545259    5.4           0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1047.6630518  1654.71634746 3845.06510291    0.        ]
------
Step:16, Action:East
State  181
Old Q Values:  [ 866.15076738   15.18059333 3863.64457186 -180.6       ]
New Q values:  [ 866.15076738   15.18059333 2878.39490797 -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4445.12359743   450.62327432  1909.41710691]
------
Step:17, Action:South
State  199
Old Q Values:  [  22.48535485 1441.89971818  549.89931413  753.62201984]
New Q values:  [  22.48535485 1141.9741627   549.89931413  753.62201984]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 1.34623827e+03 1.88604758e+03]
------
Step:18, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 1.34623827e+03 1.88604758e+03]
New Q values:  [   1.64433       0.         1346.23826999 1335.26051595]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1938.13827347  -40.34168621  999.63796487  -35.88578819]
------
Step:19, Action:North
State  261
Old Q Values:  [1938.13827347  -40.34168621  999.63796487  -35.88578819]
New Q values:  [1638.17378178  -40.34168621  999.63796487  -35.88578819]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 866.15076738   15.18059333 2878.39490797 -180.6       ]
------
Step:20, Action:North
State  181
Old Q Values:  [ 866.15076738   15.18059333 2878.39490797 -180.6       ]
New Q values:  [ 726.59307628   15.18059333 2878.39490797 -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
xag.x
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1269.1092311    31.9495824  -180.6       ]
------
Step:21, Action:South
State  100
Old Q Values:  [ 0.00000000e+00  7.91118334e+02 -6.00000000e-01  0.00000000e+00]
New Q values:  [ 0.00000000e+00  1.55726009e+03 -6.00000000e-01  0.00000000e+00]
Reward: -1  Episode Reward:  9
xxxxx
xg..x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5994.37761603   266.69721195  4138.04251251 -4966.32149798]
------
Step:22, Action:East
State  180
Old Q Values:  [-5994.37761603   266.69721195  4138.04251251 -4966.32149798]
New Q values:  [-5994.37761603   266.69721195  2251.86294333 -4966.32149798]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1990.81979442   174.55451539     0.        ]
------
Step:23, Action:South
State  196
Old Q Values:  [-2469.90645144  1990.81979442   174.55451539     0.        ]
New Q values:  [-2469.90645144  1224.09297335   174.55451539     0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x g.x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1427.88351859 1082.26992593]
------
Step:24, Action:East
State  276
Old Q Values:  [  16.82637525  495.22830495 1427.88351859 1082.26992593]
New Q values:  [  16.82637525  495.22830495  993.34548784 1082.26992593]
Reward: 9  Episode Reward:  16
xxxxx
xg..x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1185.26418121 -9022.41491635 -7525.7277781   1389.30693467]
------
Step:25, Action:West
State  288
Old Q Values:  [ 1185.26418121 -9022.41491635 -7525.7277781   1389.30693467]
New Q values:  [ 1185.26418121 -9022.41491635 -7525.7277781    879.80375165]
Reward: -1  Episode Reward:  15
xxxxx
x g.x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495  993.34548784 1082.26992593]
------
Step:26, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495  993.34548784 1082.26992593]
New Q values:  [ 16.82637525 495.22830495 993.34548784 883.62862434]
Reward: -1  Episode Reward:  14
xxxxx
xg..x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1504.40217989 -8695.4397473   1003.72690283 -2601.74710518]
------
Step:27, Action:North
State  261
Old Q Values:  [1638.17378178  -40.34168621  999.63796487  -35.88578819]
New Q values:  [1518.1879851   -40.34168621  999.63796487  -35.88578819]
Reward: -1  Episode Reward:  13
xxxxx
x g.x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 726.59307628   15.18059333 2878.39490797 -180.6       ]
------
Step:28, Action:East
State  181
Old Q Values:  [ 726.59307628   15.18059333 2878.39490797 -180.6       ]
New Q values:  [ 726.59307628   15.18059333 2484.29504242 -180.6       ]
Reward: -1  Episode Reward:  12
xxxxx
x .gx
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4445.12359743   450.62327432  1909.41710691]
------
Step:29, Action:South
State  196
Old Q Values:  [-2469.90645144  1224.09297335   174.55451539     0.        ]
New Q values:  [-2469.90645144   787.04083569   174.55451539     0.        ]
Reward: -1  Episode Reward:  11
xxxxx
x g.x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 16.82637525 495.22830495 993.34548784 883.62862434]
------
Step:30, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 -459.89661925 4380.04031166]
New Q values:  [  37.74111519 -168.92307549  171.02060666 4380.04031166]
Reward: -1  Episode Reward:  10
xxxxx
x .gx
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1185.26418121 -9022.41491635 -7525.7277781    879.80375165]
------
Step:31, Action:North
State  288
Old Q Values:  [ 1185.26418121 -9022.41491635 -7525.7277781    879.80375165]
New Q values:  [ 1390.98948896 -9022.41491635 -7525.7277781    879.80375165]
Reward: 9  Episode Reward:  19
xxxxx
x g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3038.27938826  2468.56266919   949.39862031 -3385.12952694]
------
Step:32, Action:North
State  208
Old Q Values:  [ 3038.27938826  2468.56266919   949.39862031 -3385.12952694]
New Q values:  [38833.75012878  2468.56266919   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  28
xxxxx
x .ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 40939.43533923   1641.8482468    -180.00807518 125376.79457827]
------
Step:33, Action:West
State  130
Old Q Values:  [ 40939.43533923   1641.8482468    -180.00807518 125376.79457827]
New Q values:  [ 40939.43533923   1641.8482468    -180.00807518 148753.75744221]
Reward: 100009  Episode Reward:  100037
xxxxx
x a x
x  gx
x   x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1518.1879851   -40.34168621  999.63796487  -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [1518.1879851   -40.34168621  999.63796487  -35.88578819]
New Q values:  [1766.19472491  -40.34168621  999.63796487  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1047.6630518  1654.71634746 3845.06510291    0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [1047.6630518  1654.71634746 3845.06510291    0.        ]
New Q values:  [1047.6630518  1654.71634746 3305.73363799    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 5874.35865608 3535.62314734 2546.60363946]
------
Step:3, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -6.70590551e+03  8.20902238e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -8.12857824e+03  8.20902238e+03  1.20371620e+03]
Reward: -9991  Episode Reward:  -9973
xxxxx
x...x
x  .x
x g x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1390.98948896 -9022.41491635 -7525.7277781    879.80375165]
------
Step:1, Action:North
State  288
Old Q Values:  [ 1390.98948896 -9022.41491635 -7525.7277781    879.80375165]
New Q values:  [12211.92083422 -9022.41491635 -7525.7277781    879.80375165]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[38833.75012878  2468.56266919   949.39862031 -3385.12952694]
------
Step:2, Action:North
State  208
Old Q Values:  [38833.75012878  2468.56266919   949.39862031 -3385.12952694]
New Q values:  [60165.02728418  2468.56266919   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 40939.43533923   1641.8482468    -180.00807518 148753.75744221]
------
Step:3, Action:West
State  136
Old Q Values:  [ 1117.30787879  3695.1612762  -6245.61866138 -2663.82254779]
New Q values:  [ 1117.30787879  3695.1612762  -6245.61866138  -373.51590493]
Reward: 9  Episode Reward:  27
xxxxx
xga x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2288.71038063  -841.67946252]
------
Step:4, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2288.71038063  -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225  2023.43253512  -841.67946252]
Reward: -1  Episode Reward:  26
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  3695.1612762  -6245.61866138  -373.51590493]
------
Step:5, Action:South
State  136
Old Q Values:  [ 1117.30787879  3695.1612762  -6245.61866138  -373.51590493]
New Q values:  [ 1117.30787879 19526.97269573 -6245.61866138  -373.51590493]
Reward: -1  Episode Reward:  25
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[60165.02728418  2468.56266919   949.39862031 -3385.12952694]
------
Step:6, Action:North
State  208
Old Q Values:  [60165.02728418  2468.56266919   949.39862031 -3385.12952694]
New Q values:  [29923.50272239  2468.56266919   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  24
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879 19526.97269573 -6245.61866138  -373.51590493]
------
Step:7, Action:South
State  136
Old Q Values:  [ 1117.30787879 19526.97269573 -6245.61866138  -373.51590493]
New Q values:  [ 1117.30787879 16787.23989501 -6245.61866138  -373.51590493]
Reward: -1  Episode Reward:  23
xxxxx
x. gx
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[29923.50272239  2468.56266919   949.39862031 -3385.12952694]
------
Step:8, Action:South
State  208
Old Q Values:  [29923.50272239  2468.56266919   949.39862031 -3385.12952694]
New Q values:  [29923.50272239  4650.40131794   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12211.92083422 -9022.41491635 -7525.7277781    879.80375165]
------
Step:9, Action:West
State  288
Old Q Values:  [12211.92083422 -9022.41491635 -7525.7277781    879.80375165]
New Q values:  [12211.92083422 -9022.41491635 -7525.7277781   1665.33359416]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  171.02060666 4380.04031166]
------
Step:10, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1827.94654861  1417.63269479]
New Q values:  [-2527.46239811 -8521.23367799  1827.94654861 13141.16488372]
Reward: 9  Episode Reward:  30
xxxxx
x.  x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41895.70601935  2256.66526474 12394.68064405  1875.31501677]
------
Step:11, Action:North
State  256
Old Q Values:  [25495.70101452  7580.15833597  5576.40109469   644.94785455]
New Q values:  [36562.65200811  7580.15833597  5576.40109469   644.94785455]
Reward: 9  Episode Reward:  39
xxxxx
x.  x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:12, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 46379.00380351     0.        ]
New Q values:  [    0.          4614.46100011 21019.70823505     0.        ]
Reward: 9  Episode Reward:  48
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -8.12857824e+03  8.20902238e+03  1.20371620e+03]
------
Step:13, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -8.12857824e+03  8.20902238e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -8.12857824e+03  4.72404286e+03  1.20371620e+03]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.80344637e+03 2.48886392e+02 5.90993206e+02 3.52184257e+00]
------
Step:14, Action:North
State  208
Old Q Values:  [29923.50272239  4650.40131794   949.39862031 -3385.12952694]
New Q values:  [56594.92832162  4650.40131794   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  46
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 40939.43533923   1641.8482468    -180.00807518 148753.75744221]
------
Step:15, Action:West
State  130
Old Q Values:  [ 40939.43533923   1641.8482468    -180.00807518 148753.75744221]
New Q values:  [40939.43533923  1641.8482468   -180.00807518 95119.26484475]
Reward: -1  Episode Reward:  45
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  47000.60902691 118727.87289289]
------
Step:16, Action:West
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.28658799e+05]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29260795e+05]
Reward: 100009  Episode Reward:  100054
xxxxx
xa  x
x  gx
x   x
xxxxx
Episode # 500
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.80344637e+03 2.48886392e+02 5.90993206e+02 3.52184257e+00]
------
Step:1, Action:North
State  208
Old Q Values:  [56594.92832162  4650.40131794   949.39862031 -3385.12952694]
New Q values:  [23660.82389204  4650.40131794   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 3391.50854464 -180.6        1356.2681923 ]
------
Step:2, Action:South
State  136
Old Q Values:  [ 1117.30787879 16787.23989501 -6245.61866138  -373.51590493]
New Q values:  [ 1117.30787879 13812.54312562 -6245.61866138  -373.51590493]
Reward: -1  Episode Reward:  8
xxxxx
xg. x
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23660.82389204  4650.40131794   949.39862031 -3385.12952694]
------
Step:3, Action:North
State  208
Old Q Values:  [23660.82389204  4650.40131794   949.39862031 -3385.12952694]
New Q values:  [10481.18212021  4650.40131794   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 3391.50854464 -180.6        1356.2681923 ]
------
Step:4, Action:South
State  138
Old Q Values:  [ -76.96882141 3391.50854464 -180.6        1356.2681923 ]
New Q values:  [ -76.96882141 2797.03732962 -180.6        1356.2681923 ]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.80344637e+03 2.48886392e+02 5.90993206e+02 3.52184257e+00]
------
Step:5, Action:North
State  210
Old Q Values:  [4.80344637e+03 2.48886392e+02 5.90993206e+02 3.52184257e+00]
New Q values:  [2759.88974791  248.88639154  590.9932059     3.52184257]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 2797.03732962 -180.6        1356.2681923 ]
------
Step:6, Action:South
State  138
Old Q Values:  [ -76.96882141 2797.03732962 -180.6        1356.2681923 ]
New Q values:  [ -76.96882141 1946.18185622 -180.6        1356.2681923 ]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
x..ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2759.88974791  248.88639154  590.9932059     3.52184257]
------
Step:7, Action:North
State  208
Old Q Values:  [10481.18212021  4650.40131794   949.39862031 -3385.12952694]
New Q values:  [ 4775.72740495  4650.40131794   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  3
xxxxx
x..ax
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 1946.18185622 -180.6        1356.2681923 ]
------
Step:8, Action:West
State  138
Old Q Values:  [ -76.96882141 1946.18185622 -180.6        1356.2681923 ]
New Q values:  [ -76.96882141 1946.18185622 -180.6        1458.85981459]
Reward: 9  Episode Reward:  12
xxxxx
x.a x
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   3036.50845889    14.09898617]
------
Step:9, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2871.91880452    49.79232781]
New Q values:  [ -253.44886264 -1902.20915811  1732.02207868    49.79232781]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 1946.18185622 -180.6        1458.85981459]
------
Step:10, Action:West
State  138
Old Q Values:  [ -76.96882141 1946.18185622 -180.6        1458.85981459]
New Q values:  [ -76.96882141 1946.18185622 -180.6        1493.8964635 ]
Reward: -1  Episode Reward:  10
xxxxx
x.a x
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   3036.50845889    14.09898617]
------
Step:11, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2023.43253512  -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225  4952.53595173  -841.67946252]
Reward: -1  Episode Reward:  9
xxxxx
x.gax
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879 13812.54312562 -6245.61866138  -373.51590493]
------
Step:12, Action:South
State  136
Old Q Values:  [ 1117.30787879 13812.54312562 -6245.61866138  -373.51590493]
New Q values:  [ 1117.30787879  6957.13547173 -6245.61866138  -373.51590493]
Reward: -1  Episode Reward:  8
xxxxx
x. gx
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4775.72740495  4650.40131794   949.39862031 -3385.12952694]
------
Step:13, Action:South
State  208
Old Q Values:  [ 4775.72740495  4650.40131794   949.39862031 -3385.12952694]
New Q values:  [ 4775.72740495  5529.13677744   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  17
xxxxx
x.  x
x..gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12211.92083422 -9022.41491635 -7525.7277781   1665.33359416]
------
Step:14, Action:West
State  288
Old Q Values:  [12211.92083422 -9022.41491635 -7525.7277781   1665.33359416]
New Q values:  [12211.92083422 -9022.41491635 -7525.7277781   1985.54553116]
Reward: 9  Episode Reward:  26
xxxxx
x.  x
x.. x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  171.02060666 4380.04031166]
------
Step:15, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1827.94654861 13141.16488372]
New Q values:  [-2527.46239811 -8521.23367799  1827.94654861 17824.57775929]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41895.70601935  2256.66526474 12394.68064405  1875.31501677]
------
Step:16, Action:North
State  256
Old Q Values:  [36562.65200811  7580.15833597  5576.40109469   644.94785455]
New Q values:  [40989.43240555  7580.15833597  5576.40109469   644.94785455]
Reward: 9  Episode Reward:  34
xxxxx
x.  x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:17, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 21019.70823505     0.        ]
New Q values:  [   0.         4614.46100011 9830.49615301    0.        ]
Reward: 9  Episode Reward:  43
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -8.12857824e+03  4.72404286e+03  1.20371620e+03]
------
Step:18, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.31485615e+03 1.46212831e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 4.31485615e+03 7.50665427e+03 2.91043938e+03]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4775.72740495  5529.13677744   949.39862031 -3385.12952694]
------
Step:19, Action:South
State  210
Old Q Values:  [2759.88974791  248.88639154  590.9932059     3.52184257]
New Q values:  [2.75988975e+03 3.76253081e+03 5.90993206e+02 3.52184257e+00]
Reward: -1  Episode Reward:  41
xxxxx
x.  x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12211.92083422 -9022.41491635 -7525.7277781   1985.54553116]
------
Step:20, Action:North
State  288
Old Q Values:  [12211.92083422 -9022.41491635 -7525.7277781   1985.54553116]
New Q values:  [ 6012.92757575 -9022.41491635 -7525.7277781   1985.54553116]
Reward: -1  Episode Reward:  40
xxxxx
x.  x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.75988975e+03 3.76253081e+03 5.90993206e+02 3.52184257e+00]
------
Step:21, Action:North
State  210
Old Q Values:  [2.75988975e+03 3.76253081e+03 5.90993206e+02 3.52184257e+00]
New Q values:  [2.96391354e+04 3.76253081e+03 5.90993206e+02 3.52184257e+00]
Reward: -1  Episode Reward:  39
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[40939.43533923  1641.8482468   -180.00807518 95119.26484475]
------
Step:22, Action:West
State  130
Old Q Values:  [40939.43533923  1641.8482468   -180.00807518 95119.26484475]
New Q values:  [40939.43533923  1641.8482468   -180.00807518 73665.46780577]
Reward: -1  Episode Reward:  38
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  47000.60902691 118727.87289289]
------
Step:23, Action:West
State  114
Old Q Values:  [  -180.6          4272.38349051  47000.60902691 118727.87289289]
New Q values:  [  -180.6          4272.38349051  47000.60902691 125288.42495849]
Reward: 100009  Episode Reward:  100047
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  6957.13547173 -6245.61866138  -373.51590493]
------
Step:1, Action:South
State  136
Old Q Values:  [ 1117.30787879  6957.13547173 -6245.61866138  -373.51590493]
New Q values:  [ 1117.30787879  4446.99522193 -6245.61866138  -373.51590493]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4775.72740495  5529.13677744   949.39862031 -3385.12952694]
------
Step:2, Action:South
State  208
Old Q Values:  [ 4775.72740495  5529.13677744   949.39862031 -3385.12952694]
New Q values:  [ 4775.72740495  4020.9329837    949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6012.92757575 -9022.41491635 -7525.7277781   1985.54553116]
------
Step:3, Action:North
State  288
Old Q Values:  [ 6012.92757575 -9022.41491635 -7525.7277781   1985.54553116]
New Q values:  [ 3837.28925179 -9022.41491635 -7525.7277781   1985.54553116]
Reward: -1  Episode Reward:  17
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4775.72740495  4020.9329837    949.39862031 -3385.12952694]
------
Step:4, Action:South
State  208
Old Q Values:  [ 4775.72740495  4020.9329837    949.39862031 -3385.12952694]
New Q values:  [ 4775.72740495  2758.95996902   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3837.28925179 -9022.41491635 -7525.7277781   1985.54553116]
------
Step:5, Action:West
State  288
Old Q Values:  [ 3837.28925179 -9022.41491635 -7525.7277781   1985.54553116]
New Q values:  [ 3837.28925179 -9022.41491635 -7525.7277781   2113.63030596]
Reward: 9  Episode Reward:  25
xxxxx
x.  x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  171.02060666 4380.04031166]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1827.94654861 17824.57775929]
New Q values:  [-2527.46239811 -8521.23367799  1827.94654861 19703.94290952]
Reward: 9  Episode Reward:  34
xxxxx
x.  x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41895.70601935  2256.66526474 12394.68064405  1875.31501677]
------
Step:7, Action:North
State  257
Old Q Values:  [41895.70601935  2256.66526474 12394.68064405  1875.31501677]
New Q values:  [42784.14857951  2256.66526474 12394.68064405  1875.31501677]
Reward: 9  Episode Reward:  43
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[86734.88723922  4025.17604709 35378.21390727     0.        ]
------
Step:8, Action:North
State  183
Old Q Values:  [1047.6630518  1654.71634746 3305.73363799    0.        ]
New Q values:  [1160.24276199 1654.71634746 3305.73363799    0.        ]
Reward: 9  Episode Reward:  52
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2452.59180422   69.34437687 -120.29354603]
------
Step:9, Action:South
State  99
Old Q Values:  [    0.         39677.73511325 59306.25267112     0.        ]
New Q values:  [    0.         41890.96021707 59306.25267112     0.        ]
Reward: -1  Episode Reward:  51
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[86734.88723922  4025.17604709 35378.21390727     0.        ]
------
Step:10, Action:North
State  181
Old Q Values:  [ 726.59307628   15.18059333 2484.29504242 -180.6       ]
New Q values:  [ 670.76999984   15.18059333 2484.29504242 -180.6       ]
Reward: -1  Episode Reward:  50
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1269.1092311    31.9495824  -180.6       ]
------
Step:11, Action:South
State  99
Old Q Values:  [    0.         41890.96021707 59306.25267112     0.        ]
New Q values:  [    0.         42776.25025859 59306.25267112     0.        ]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[86734.88723922  4025.17604709 35378.21390727     0.        ]
------
Step:12, Action:North
State  183
Old Q Values:  [1160.24276199 1654.71634746 3305.73363799    0.        ]
New Q values:  [1199.27464606 1654.71634746 3305.73363799    0.        ]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2452.59180422   69.34437687 -120.29354603]
------
Step:13, Action:South
State  99
Old Q Values:  [    0.         42776.25025859 59306.25267112     0.        ]
New Q values:  [    0.         43130.3662752  59306.25267112     0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[86734.88723922  4025.17604709 35378.21390727     0.        ]
------
Step:14, Action:North
State  181
Old Q Values:  [ 670.76999984   15.18059333 2484.29504242 -180.6       ]
New Q values:  [ 648.44076927   15.18059333 2484.29504242 -180.6       ]
Reward: -1  Episode Reward:  46
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1269.1092311    31.9495824  -180.6       ]
------
Step:15, Action:South
State  109
Old Q Values:  [-241.10880094 1269.1092311    31.9495824  -180.6       ]
New Q values:  [-241.10880094 1252.33220517   31.9495824  -180.6       ]
Reward: -1  Episode Reward:  45
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 648.44076927   15.18059333 2484.29504242 -180.6       ]
------
Step:16, Action:East
State  177
Old Q Values:  [86734.88723922  4025.17604709 35378.21390727     0.        ]
New Q values:  [86734.88723922  4025.17604709 75490.22264214     0.        ]
Reward: 100009  Episode Reward:  100054
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.96391354e+04 3.76253081e+03 5.90993206e+02 3.52184257e+00]
------
Step:1, Action:North
State  208
Old Q Values:  [ 4775.72740495  2758.95996902   949.39862031 -3385.12952694]
New Q values:  [ 2499.54551885  2758.95996902   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 1946.18185622 -180.6        1493.8964635 ]
------
Step:2, Action:South
State  138
Old Q Values:  [ -76.96882141 1946.18185622 -180.6        1493.8964635 ]
New Q values:  [ -76.96882141 1605.56073319 -180.6        1493.8964635 ]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
xg.ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2499.54551885  2758.95996902   949.39862031 -3385.12952694]
------
Step:3, Action:South
State  208
Old Q Values:  [ 2499.54551885  2758.95996902   949.39862031 -3385.12952694]
New Q values:  [ 2499.54551885  2260.17076314   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  17
xxxxx
x.. x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3837.28925179 -9022.41491635 -7525.7277781   2113.63030596]
------
Step:4, Action:North
State  288
Old Q Values:  [ 3837.28925179 -9022.41491635 -7525.7277781   2113.63030596]
New Q values:  [ 2284.17935637 -9022.41491635 -7525.7277781   2113.63030596]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2499.54551885  2260.17076314   949.39862031 -3385.12952694]
------
Step:5, Action:North
State  208
Old Q Values:  [ 2499.54551885  2260.17076314   949.39862031 -3385.12952694]
New Q values:  [-3666.68322588  2260.17076314   949.39862031 -3385.12952694]
Reward: -10001  Episode Reward:  -9985
xxxxx
x..gx
x.. x
x.  x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1199.27464606 1654.71634746 3305.73363799    0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [1199.27464606 1654.71634746 3305.73363799    0.        ]
New Q values:  [1199.27464606 1654.71634746 2744.90631418    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -8.12857824e+03  4.72404286e+03  1.20371620e+03]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.31485615e+03 7.50665427e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 4.31485615e+03 3.68611294e+03 2.91043938e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-3666.68322588  2260.17076314   949.39862031 -3385.12952694]
------
Step:3, Action:South
State  210
Old Q Values:  [2.96391354e+04 3.76253081e+03 5.90993206e+02 3.52184257e+00]
New Q values:  [2.96391354e+04 2.18966613e+03 5.90993206e+02 3.52184257e+00]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2284.17935637 -9022.41491635 -7525.7277781   2113.63030596]
------
Step:4, Action:North
State  288
Old Q Values:  [ 2284.17935637 -9022.41491635 -7525.7277781   2113.63030596]
New Q values:  [ 9804.81234832 -9022.41491635 -7525.7277781   2113.63030596]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.96391354e+04 2.18966613e+03 5.90993206e+02 3.52184257e+00]
------
Step:5, Action:North
State  208
Old Q Values:  [-3666.68322588  2260.17076314   949.39862031 -3385.12952694]
New Q values:  [20638.36705138  2260.17076314   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  25
xxxxx
x..ax
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[40939.43533923  1641.8482468   -180.00807518 73665.46780577]
------
Step:6, Action:West
State  138
Old Q Values:  [ -76.96882141 1605.56073319 -180.6        1493.8964635 ]
New Q values:  [ -76.96882141 1605.56073319 -180.6        1122.565209  ]
Reward: 9  Episode Reward:  34
xxxxx
x.a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1732.02207868    49.79232781]
------
Step:7, Action:East
State  114
Old Q Values:  [  -180.6          4272.38349051  47000.60902691 125288.42495849]
New Q values:  [  -180.6          4272.38349051  40899.28395249 125288.42495849]
Reward: -1  Episode Reward:  33
xxxxx
x. ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[40939.43533923  1641.8482468   -180.00807518 73665.46780577]
------
Step:8, Action:West
State  138
Old Q Values:  [ -76.96882141 1605.56073319 -180.6        1122.565209  ]
New Q values:  [ -76.96882141 1605.56073319 -180.6        1359.37862127]
Reward: -1  Episode Reward:  32
xxxxx
x.a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   3036.50845889    14.09898617]
------
Step:9, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4952.53595173  -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225  3314.51294727  -841.67946252]
Reward: -1  Episode Reward:  31
xxxxx
x.gax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  4446.99522193 -6245.61866138  -373.51590493]
------
Step:10, Action:South
State  136
Old Q Values:  [ 1117.30787879  4446.99522193 -6245.61866138  -373.51590493]
New Q values:  [ 1117.30787879  2678.51679396 -6245.61866138  -373.51590493]
Reward: -1  Episode Reward:  30
xxxxx
xg  x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3.00106235e+03  2.29120540e+03 -6.17035694e+03  3.96578640e+00]
------
Step:11, Action:North
State  216
Old Q Values:  [ 3.00106235e+03  2.29120540e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 2.00337998e+03  2.29120540e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  29
xxxxx
x.gax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  2678.51679396 -6245.61866138  -373.51590493]
------
Step:12, Action:South
State  136
Old Q Values:  [ 1117.30787879  2678.51679396 -6245.61866138  -373.51590493]
New Q values:  [ 1117.30787879  1758.16833833 -6245.61866138  -373.51590493]
Reward: -1  Episode Reward:  28
xxxxx
xg  x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.00337998e+03  2.29120540e+03 -6.17035694e+03  3.96578640e+00]
------
Step:13, Action:South
State  216
Old Q Values:  [ 2.00337998e+03  2.29120540e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 2.00337998e+03  3.85732587e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  27
xxxxx
x.g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9804.81234832 -9022.41491635 -7525.7277781   2113.63030596]
------
Step:14, Action:North
State  288
Old Q Values:  [ 9804.81234832 -9022.41491635 -7525.7277781   2113.63030596]
New Q values:  [ 5078.52269898 -9022.41491635 -7525.7277781   2113.63030596]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.00337998e+03  3.85732587e+03 -6.17035694e+03  3.96578640e+00]
------
Step:15, Action:South
State  216
Old Q Values:  [ 2.00337998e+03  3.85732587e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 2.00337998e+03  3.06588716e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5078.52269898 -9022.41491635 -7525.7277781   2113.63030596]
------
Step:16, Action:North
State  288
Old Q Values:  [ 5078.52269898 -9022.41491635 -7525.7277781   2113.63030596]
New Q values:  [ 2950.57522636 -9022.41491635 -7525.7277781   2113.63030596]
Reward: -1  Episode Reward:  24
xxxxx
xg  x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.00337998e+03  3.06588716e+03 -6.17035694e+03  3.96578640e+00]
------
Step:17, Action:South
State  216
Old Q Values:  [ 2.00337998e+03  3.06588716e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 2.00337998e+03  2.11092743e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2950.57522636 -9022.41491635 -7525.7277781   2113.63030596]
------
Step:18, Action:North
State  288
Old Q Values:  [ 2950.57522636 -9022.41491635 -7525.7277781   2113.63030596]
New Q values:  [ 1812.90831962 -9022.41491635 -7525.7277781   2113.63030596]
Reward: -1  Episode Reward:  22
xxxxx
x. gx
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.00337998e+03  2.11092743e+03 -6.17035694e+03  3.96578640e+00]
------
Step:19, Action:South
State  216
Old Q Values:  [ 2.00337998e+03  2.11092743e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 2.00337998e+03  1.47786006e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  21
xxxxx
x. gx
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1812.90831962 -9022.41491635 -7525.7277781   2113.63030596]
------
Step:20, Action:West
State  288
Old Q Values:  [ 1812.90831962 -9022.41491635 -7525.7277781   2113.63030596]
New Q values:  [ 1812.90831962 -9022.41491635 -7525.7277781   6762.03499524]
Reward: 9  Episode Reward:  30
xxxxx
x.g x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1827.94654861 19703.94290952]
------
Step:21, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1827.94654861 19703.94290952]
New Q values:  [-2527.46239811 -8521.23367799  1827.94654861 20722.22173766]
Reward: 9  Episode Reward:  39
xxxxx
x.  x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[42784.14857951  2256.66526474 12394.68064405  1875.31501677]
------
Step:22, Action:North
State  257
Old Q Values:  [42784.14857951  2256.66526474 12394.68064405  1875.31501677]
New Q values:  [43133.52560357  2256.66526474 12394.68064405  1875.31501677]
Reward: -1  Episode Reward:  38
xxxxx
x.g x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[86734.88723922  4025.17604709 75490.22264214     0.        ]
------
Step:23, Action:North
State  177
Old Q Values:  [86734.88723922  4025.17604709 75490.22264214     0.        ]
New Q values:  [112491.23069702   4025.17604709  75490.22264214      0.        ]
Reward: 100009  Episode Reward:  100047
xxxxx
xa  x
x g x
x   x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1504.40217989 -8695.4397473   1003.72690283 -2601.74710518]
------
Step:1, Action:North
State  261
Old Q Values:  [1766.19472491  -40.34168621  999.63796487  -35.88578819]
New Q values:  [1457.16640269  -40.34168621  999.63796487  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 648.44076927   15.18059333 2484.29504242 -180.6       ]
------
Step:2, Action:East
State  181
Old Q Values:  [ 648.44076927   15.18059333 2484.29504242 -180.6       ]
New Q values:  [ 648.44076927   15.18059333 2332.6550962  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4445.12359743   450.62327432  1909.41710691]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144   787.04083569   174.55451539     0.        ]
New Q values:  [-2469.90645144   618.21998063   174.55451539     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 16.82637525 495.22830495 993.34548784 883.62862434]
------
Step:4, Action:East
State  276
Old Q Values:  [ 16.82637525 495.22830495 993.34548784 883.62862434]
New Q values:  [  16.82637525  495.22830495 2431.34869371  883.62862434]
Reward: 9  Episode Reward:  36
xxxxx
xg..x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1812.90831962 -9022.41491635 -7525.7277781   6762.03499524]
------
Step:5, Action:West
State  288
Old Q Values:  [ 1812.90831962 -9022.41491635 -7525.7277781   6762.03499524]
New Q values:  [ 1812.90831962 -9022.41491635 -7525.7277781   3433.61860621]
Reward: -1  Episode Reward:  35
xxxxx
x g.x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 2431.34869371  883.62862434]
------
Step:6, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  171.02060666 4380.04031166]
New Q values:  [  37.74111519 -168.92307549 1097.89382453 4380.04031166]
Reward: -1  Episode Reward:  34
xxxxx
x .gx
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1812.90831962 -9022.41491635 -7525.7277781   3433.61860621]
------
Step:7, Action:West
State  288
Old Q Values:  [ 1812.90831962 -9022.41491635 -7525.7277781   3433.61860621]
New Q values:  [ 1812.90831962 -9022.41491635 -7525.7277781   2102.2520506 ]
Reward: -1  Episode Reward:  33
xxxxx
x g.x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 2431.34869371  883.62862434]
------
Step:8, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 1097.89382453 4380.04031166]
New Q values:  [  37.74111519 -168.92307549 1069.23314499 4380.04031166]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1812.90831962 -9022.41491635 -7525.7277781   2102.2520506 ]
------
Step:9, Action:West
State  288
Old Q Values:  [ 1812.90831962 -9022.41491635 -7525.7277781   2102.2520506 ]
New Q values:  [ 1812.90831962 -9022.41491635 -7525.7277781   2154.31291374]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1069.23314499 4380.04031166]
------
Step:10, Action:West
State  277
Old Q Values:  [   1.64433       0.         1346.23826999 1335.26051595]
New Q values:  [   1.64433       0.         1346.23826999  970.65412719]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x  .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1457.16640269  -40.34168621  999.63796487  -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [1457.16640269  -40.34168621  999.63796487  -35.88578819]
New Q values:  [1405.73845533  -40.34168621  999.63796487  -35.88578819]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
xa .x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1199.27464606 1654.71634746 2744.90631418    0.        ]
------
Step:12, Action:East
State  183
Old Q Values:  [1199.27464606 1654.71634746 2744.90631418    0.        ]
New Q values:  [1199.27464606 1654.71634746 1439.95477448    0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  22.48535485 1141.9741627   549.89931413  753.62201984]
------
Step:13, Action:South
State  198
Old Q Values:  [-2.78872080e-01 -6.59967450e+03  1.26155064e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -7.91106519e+03  1.26155064e+04  0.00000000e+00]
Reward: -10001  Episode Reward:  -9973
xxxxx
x ..x
x  .x
x g x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1812.90831962 -9022.41491635 -7525.7277781   2154.31291374]
------
Step:1, Action:West
State  288
Old Q Values:  [ 1812.90831962 -9022.41491635 -7525.7277781   2154.31291374]
New Q values:  [ 1812.90831962 -9022.41491635 -7525.7277781   7083.79168679]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1827.94654861 20722.22173766]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1827.94654861 20722.22173766]
New Q values:  [-2527.46239811 -8521.23367799  1827.94654861  8716.01023166]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1405.73845533  -40.34168621  999.63796487  -35.88578819]
------
Step:3, Action:North
State  260
Old Q Values:  [ 1504.40217989 -8695.4397473   1003.72690283 -2601.74710518]
New Q values:  [ 1276.71975496 -8695.4397473   1003.72690283 -2601.74710518]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5994.37761603   266.69721195  2251.86294333 -4966.32149798]
------
Step:4, Action:East
State  180
Old Q Values:  [-5994.37761603   266.69721195  2251.86294333 -4966.32149798]
New Q values:  [-5994.37761603   266.69721195  2200.60202348 -4966.32149798]
Reward: 9  Episode Reward:  26
xxxxx
x...x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.31485615e+03 3.68611294e+03 2.91043938e+03]
------
Step:5, Action:South
State  196
Old Q Values:  [-2469.90645144   618.21998063   174.55451539     0.        ]
New Q values:  [-2469.90645144   976.09260036   174.55451539     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 2431.34869371  883.62862434]
------
Step:6, Action:East
State  276
Old Q Values:  [  16.82637525  495.22830495 2431.34869371  883.62862434]
New Q values:  [  16.82637525  495.22830495 3097.07698352  883.62862434]
Reward: -1  Episode Reward:  24
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1812.90831962 -9022.41491635 -7525.7277781   7083.79168679]
------
Step:7, Action:West
State  288
Old Q Values:  [ 1812.90831962 -9022.41491635 -7525.7277781   7083.79168679]
New Q values:  [ 1812.90831962 -9022.41491635 -7525.7277781   3762.03976977]
Reward: -1  Episode Reward:  23
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 3097.07698352  883.62862434]
------
Step:8, Action:East
State  276
Old Q Values:  [  16.82637525  495.22830495 3097.07698352  883.62862434]
New Q values:  [  16.82637525  495.22830495 2366.84272434  883.62862434]
Reward: -1  Episode Reward:  22
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1812.90831962 -9022.41491635 -7525.7277781   3762.03976977]
------
Step:9, Action:West
State  288
Old Q Values:  [ 1812.90831962 -9022.41491635 -7525.7277781   3762.03976977]
New Q values:  [ 1812.90831962 -9022.41491635 -7525.7277781   2214.26872521]
Reward: -1  Episode Reward:  21
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 2366.84272434  883.62862434]
------
Step:10, Action:East
State  276
Old Q Values:  [  16.82637525  495.22830495 2366.84272434  883.62862434]
New Q values:  [  16.82637525  495.22830495 1610.4177073   883.62862434]
Reward: -1  Episode Reward:  20
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1812.90831962 -9022.41491635 -7525.7277781   2214.26872521]
------
Step:11, Action:West
State  288
Old Q Values:  [ 1812.90831962 -9022.41491635 -7525.7277781   2214.26872521]
New Q values:  [ 1812.90831962 -9022.41491635 -7525.7277781   3499.91055958]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1827.94654861  8716.01023166]
------
Step:12, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1827.94654861  8716.01023166]
New Q values:  [-2527.46239811 -8521.23367799  1827.94654861  3907.52562926]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1405.73845533  -40.34168621  999.63796487  -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [1405.73845533  -40.34168621  999.63796487  -35.88578819]
New Q values:  [1058.11028637  -40.34168621  999.63796487  -35.88578819]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1199.27464606 1654.71634746 1439.95477448    0.        ]
------
Step:14, Action:South
State  181
Old Q Values:  [ 648.44076927   15.18059333 2332.6550962  -180.6       ]
New Q values:  [ 648.44076927  322.90532324 2332.6550962  -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1058.11028637  -40.34168621  999.63796487  -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [1058.11028637  -40.34168621  999.63796487  -35.88578819]
New Q values:  [919.05901879 -40.34168621 999.63796487 -35.88578819]
Reward: -1  Episode Reward:  15
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1199.27464606 1654.71634746 1439.95477448    0.        ]
------
Step:16, Action:South
State  181
Old Q Values:  [ 648.44076927  322.90532324 2332.6550962  -180.6       ]
New Q values:  [ 648.44076927  428.45351876 2332.6550962  -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[919.05901879 -40.34168621 999.63796487 -35.88578819]
------
Step:17, Action:East
State  261
Old Q Values:  [919.05901879 -40.34168621 999.63796487 -35.88578819]
New Q values:  [ 919.05901879  -40.34168621 1713.26727945  -35.88578819]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1069.23314499 4380.04031166]
------
Step:18, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 1069.23314499 4380.04031166]
New Q values:  [  37.74111519 -168.92307549 1069.23314499 2265.3963085 ]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 919.05901879  -40.34168621 1713.26727945  -35.88578819]
------
Step:19, Action:East
State  261
Old Q Values:  [ 919.05901879  -40.34168621 1713.26727945  -35.88578819]
New Q values:  [ 919.05901879  -40.34168621 1856.96460056  -35.88578819]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1827.94654861  3907.52562926]
------
Step:20, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1827.94654861  3907.52562926]
New Q values:  [-2527.46239811 -8521.23367799  1827.94654861  1945.42617819]
Reward: -1  Episode Reward:  10
xxxxx
x...x
xg .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1276.71975496 -8695.4397473   1003.72690283 -2601.74710518]
------
Step:21, Action:East
State  260
Old Q Values:  [ 1276.71975496 -8695.4397473   1003.72690283 -2601.74710518]
New Q values:  [ 1276.71975496 -8695.4397473    984.51861459 -2601.74710518]
Reward: -1  Episode Reward:  9
xxxxx
x...x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1827.94654861  1945.42617819]
------
Step:22, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1827.94654861  1945.42617819]
New Q values:  [-2527.46239811 -8521.23367799  1780.55178732  1945.42617819]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1812.90831962 -9022.41491635 -7525.7277781   3499.91055958]
------
Step:23, Action:North
State  288
Old Q Values:  [ 1812.90831962 -9022.41491635 -7525.7277781   3499.91055958]
New Q values:  [ 9622.30393363 -9022.41491635 -7525.7277781   3499.91055958]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.96391354e+04 2.18966613e+03 5.90993206e+02 3.52184257e+00]
------
Step:24, Action:North
State  208
Old Q Values:  [20638.36705138  2260.17076314   949.39862031 -3385.12952694]
New Q values:  [30360.38716228  2260.17076314   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  26
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[40939.43533923  1641.8482468   -180.00807518 73665.46780577]
------
Step:25, Action:West
State  130
Old Q Values:  [40939.43533923  1641.8482468   -180.00807518 73665.46780577]
New Q values:  [40939.43533923  1641.8482468   -180.00807518 68249.82570707]
Reward: 9  Episode Reward:  35
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29260795e+05]
------
Step:26, Action:West
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29260795e+05]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29501594e+05]
Reward: 100009  Episode Reward:  100044
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1780.55178732  1945.42617819]
------
Step:1, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 1069.23314499 2265.3963085 ]
New Q values:  [  37.74111519 -168.92307549 1069.23314499 1468.64790357]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 919.05901879  -40.34168621 1856.96460056  -35.88578819]
------
Step:2, Action:East
State  261
Old Q Values:  [ 919.05901879  -40.34168621 1856.96460056  -35.88578819]
New Q values:  [ 919.05901879  -40.34168621 1325.81369368  -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1780.55178732  1945.42617819]
------
Step:3, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1610.4177073   883.62862434]
New Q values:  [  16.82637525  495.22830495 1610.4177073   735.86737622]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1276.71975496 -8695.4397473    984.51861459 -2601.74710518]
------
Step:4, Action:East
State  260
Old Q Values:  [ 1276.71975496 -8695.4397473    984.51861459 -2601.74710518]
New Q values:  [ 1276.71975496 -8695.4397473    876.33275803 -2601.74710518]
Reward: -1  Episode Reward:  6
xxxxx
xg..x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1610.4177073   735.86737622]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1780.55178732  1945.42617819]
New Q values:  [-2527.46239811 -8521.23367799  3604.31189502  1945.42617819]
Reward: 9  Episode Reward:  15
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9622.30393363 -9022.41491635 -7525.7277781   3499.91055958]
------
Step:6, Action:North
State  288
Old Q Values:  [ 9622.30393363 -9022.41491635 -7525.7277781   3499.91055958]
New Q values:  [12746.06217923 -9022.41491635 -7525.7277781   3499.91055958]
Reward: 9  Episode Reward:  24
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.96391354e+04 2.18966613e+03 5.90993206e+02 3.52184257e+00]
------
Step:7, Action:North
State  210
Old Q Values:  [2.96391354e+04 2.18966613e+03 5.90993206e+02 3.52184257e+00]
New Q values:  [3.23360019e+04 2.18966613e+03 5.90993206e+02 3.52184257e+00]
Reward: 9  Episode Reward:  33
xxxxx
x..ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[40939.43533923  1641.8482468   -180.00807518 68249.82570707]
------
Step:8, Action:West
State  130
Old Q Values:  [40939.43533923  1641.8482468   -180.00807518 68249.82570707]
New Q values:  [40939.43533923  1641.8482468   -180.00807518 66155.80845713]
Reward: 9  Episode Reward:  42
xxxxx
x.a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29501594e+05]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624   3036.50845889    14.09898617]
New Q values:  [ -281.736      -9545.4473624   3036.50845889    24.50779719]
Reward: 9  Episode Reward:  51
xxxxx
xa  x
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   44.89400907 -252.78192178]
------
Step:10, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   44.89400907 -252.78192178]
New Q values:  [-252.35169558    7.11267516  536.96422723 -252.78192178]
Reward: -1  Episode Reward:  50
xxxxx
x a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1732.02207868    49.79232781]
------
Step:11, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1732.02207868    49.79232781]
New Q values:  [ -253.44886264 -1902.20915811  1173.87705143    49.79232781]
Reward: -1  Episode Reward:  49
xxxxx
x  ax
x. gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 1605.56073319 -180.6        1359.37862127]
------
Step:12, Action:West
State  138
Old Q Values:  [ -76.96882141 1605.56073319 -180.6        1359.37862127]
New Q values:  [ -76.96882141 1605.56073319 -180.6        1454.10398618]
Reward: -1  Episode Reward:  48
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   3036.50845889    24.50779719]
------
Step:13, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1173.87705143    49.79232781]
New Q values:  [ -253.44886264 -1902.20915811   950.61904053    49.79232781]
Reward: -1  Episode Reward:  47
xxxxx
x  ax
x. gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -76.96882141 1605.56073319 -180.6        1454.10398618]
------
Step:14, Action:West
State  138
Old Q Values:  [ -76.96882141 1605.56073319 -180.6        1454.10398618]
New Q values:  [ -76.96882141 1605.56073319 -180.6        1491.99413214]
Reward: -1  Episode Reward:  46
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   3036.50845889    24.50779719]
------
Step:15, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3314.51294727  -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225  1852.65568041  -841.67946252]
Reward: -1  Episode Reward:  45
xxxxx
x gax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  1758.16833833 -6245.61866138  -373.51590493]
------
Step:16, Action:South
State  138
Old Q Values:  [ -76.96882141 1605.56073319 -180.6        1491.99413214]
New Q values:  [ -76.96882141 9749.74044196 -180.6        1491.99413214]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30360.38716228  2260.17076314   949.39862031 -3385.12952694]
------
Step:17, Action:North
State  208
Old Q Values:  [30360.38716228  2260.17076314   949.39862031 -3385.12952694]
New Q values:  [31990.29740205  2260.17076314   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[40939.43533923  1641.8482468   -180.00807518 66155.80845713]
------
Step:18, Action:West
State  138
Old Q Values:  [ -76.96882141 9749.74044196 -180.6        1491.99413214]
New Q values:  [ -76.96882141 9749.74044196 -180.6        1507.15019052]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   3036.50845889    24.50779719]
------
Step:19, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   3036.50845889    24.50779719]
New Q values:  [ -281.736      -9545.4473624   4138.92551615    24.50779719]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x.  x
x g x
xxxxx
Step:20, Action:North
State  138
Old Q Values:  [ -76.96882141 9749.74044196 -180.6        1507.15019052]
New Q values:  [2713.53460402 9749.74044196 -180.6        1507.15019052]
Reward: -301  Episode Reward:  -260
xxxxx
x  ax
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[2713.53460402 9749.74044196 -180.6        1507.15019052]
------
Step:21, Action:South
State  138
Old Q Values:  [2713.53460402 9749.74044196 -180.6        1507.15019052]
New Q values:  [ 2713.53460402 13600.09673273  -180.6         1507.15019052]
Reward: -1  Episode Reward:  -261
xxxxx
x   x
x. ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.23360019e+04 2.18966613e+03 5.90993206e+02 3.52184257e+00]
------
Step:22, Action:North
State  210
Old Q Values:  [3.23360019e+04 2.18966613e+03 5.90993206e+02 3.52184257e+00]
New Q values:  [1.70138298e+04 2.18966613e+03 5.90993206e+02 3.52184257e+00]
Reward: -1  Episode Reward:  -262
xxxxx
x  ax
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2713.53460402 13600.09673273  -180.6         1507.15019052]
------
Step:23, Action:South
State  138
Old Q Values:  [ 2713.53460402 13600.09673273  -180.6         1507.15019052]
New Q values:  [ 2713.53460402 10543.58762142  -180.6         1507.15019052]
Reward: -1  Episode Reward:  -263
xxxxx
x   x
x. ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.70138298e+04 2.18966613e+03 5.90993206e+02 3.52184257e+00]
------
Step:24, Action:North
State  208
Old Q Values:  [31990.29740205  2260.17076314   949.39862031 -3385.12952694]
New Q values:  [15958.59524725  2260.17076314   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -264
xxxxx
x  ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2713.53460402 10543.58762142  -180.6         1507.15019052]
------
Step:25, Action:South
State  138
Old Q Values:  [ 2713.53460402 10543.58762142  -180.6         1507.15019052]
New Q values:  [2713.53460402 9320.98397689 -180.6        1507.15019052]
Reward: -1  Episode Reward:  -265
xxxxx
x   x
x. ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.70138298e+04 2.18966613e+03 5.90993206e+02 3.52184257e+00]
------
Step:26, Action:North
State  210
Old Q Values:  [1.70138298e+04 2.18966613e+03 5.90993206e+02 3.52184257e+00]
New Q values:  [9.60122710e+03 2.18966613e+03 5.90993206e+02 3.52184257e+00]
Reward: -1  Episode Reward:  -266
xxxxx
x  ax
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[2713.53460402 9320.98397689 -180.6        1507.15019052]
------
Step:27, Action:South
State  138
Old Q Values:  [2713.53460402 9320.98397689 -180.6        1507.15019052]
New Q values:  [2713.53460402 2515.37216493 -180.6        1507.15019052]
Reward: -10001  Episode Reward:  -10267
xxxxx
x   x
x. gx
x   x
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   4138.92551615    24.50779719]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1852.65568041  -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225  1273.91277366  -841.67946252]
Reward: 9  Episode Reward:  9
xxxxx
xg ax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  1758.16833833 -6245.61866138  -373.51590493]
------
Step:2, Action:South
State  136
Old Q Values:  [ 1117.30787879  1758.16833833 -6245.61866138  -373.51590493]
New Q values:  [ 1117.30787879  1309.68132886 -6245.61866138  -373.51590493]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.00337998e+03  1.47786006e+03 -6.17035694e+03  3.96578640e+00]
------
Step:3, Action:North
State  216
Old Q Values:  [ 2.00337998e+03  1.47786006e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 1.19365639e+03  1.47786006e+03 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  17
xxxxx
xg ax
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879  1309.68132886 -6245.61866138  -373.51590493]
------
Step:4, Action:South
State  136
Old Q Values:  [ 1117.30787879  1309.68132886 -6245.61866138  -373.51590493]
New Q values:  [ 1117.30787879   966.63055071 -6245.61866138  -373.51590493]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.19365639e+03  1.47786006e+03 -6.17035694e+03  3.96578640e+00]
------
Step:5, Action:South
State  208
Old Q Values:  [15958.59524725  2260.17076314   949.39862031 -3385.12952694]
New Q values:  [15958.59524725  4733.28695903   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  25
xxxxx
x. gx
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12746.06217923 -9022.41491635 -7525.7277781   3499.91055958]
------
Step:6, Action:North
State  288
Old Q Values:  [12746.06217923 -9022.41491635 -7525.7277781   3499.91055958]
New Q values:  [ 3885.40344586 -9022.41491635 -7525.7277781   3499.91055958]
Reward: -10001  Episode Reward:  -9976
xxxxx
x.  x
x .gx
x.. x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1069.23314499 1468.64790357]
------
Step:1, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 1069.23314499 1468.64790357]
New Q values:  [  37.74111519 -168.92307549 1069.23314499  990.60326953]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 919.05901879  -40.34168621 1325.81369368  -35.88578819]
------
Step:2, Action:East
State  261
Old Q Values:  [ 919.05901879  -40.34168621 1325.81369368  -35.88578819]
New Q values:  [919.05901879 -40.34168621 850.49542097 -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1069.23314499  990.60326953]
------
Step:3, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 1069.23314499  990.60326953]
New Q values:  [  37.74111519 -168.92307549 1592.71429176  990.60326953]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3885.40344586 -9022.41491635 -7525.7277781   3499.91055958]
------
Step:4, Action:West
State  288
Old Q Values:  [ 3885.40344586 -9022.41491635 -7525.7277781   3499.91055958]
New Q values:  [ 3885.40344586 -9022.41491635 -7525.7277781   1877.17851136]
Reward: -1  Episode Reward:  6
xxxxx
x..gx
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1592.71429176  990.60326953]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3604.31189502  1945.42617819]
New Q values:  [-2527.46239811 -8521.23367799  2606.74579177  1945.42617819]
Reward: -1  Episode Reward:  5
xxxxx
x.g.x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3885.40344586 -9022.41491635 -7525.7277781   1877.17851136]
------
Step:6, Action:North
State  288
Old Q Values:  [ 3885.40344586 -9022.41491635 -7525.7277781   1877.17851136]
New Q values:  [ 6347.13995252 -9022.41491635 -7525.7277781   1877.17851136]
Reward: 9  Episode Reward:  14
xxxxx
x...x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15958.59524725  4733.28695903   949.39862031 -3385.12952694]
------
Step:7, Action:North
State  208
Old Q Values:  [15958.59524725  4733.28695903   949.39862031 -3385.12952694]
New Q values:  [ 6724.03046254  4733.28695903   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  23
xxxxx
x.gax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879   966.63055071 -6245.61866138  -373.51590493]
------
Step:8, Action:North
State  130
Old Q Values:  [40939.43533923  1641.8482468   -180.00807518 66155.80845713]
New Q values:  [36041.91667283  1641.8482468   -180.00807518 66155.80845713]
Reward: -301  Episode Reward:  -278
xxxxx
x..ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  1641.8482468   -180.00807518 66155.80845713]
------
Step:9, Action:West
State  138
Old Q Values:  [2713.53460402 2515.37216493 -180.6        1507.15019052]
New Q values:  [2713.53460402 2515.37216493 -180.6         893.44578837]
Reward: 9  Episode Reward:  -269
xxxxx
x.a x
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   950.61904053    49.79232781]
------
Step:10, Action:East
State  114
Old Q Values:  [  -180.6          4272.38349051  40899.28395249 125288.42495849]
New Q values:  [  -180.6          4272.38349051  36205.85611814 125288.42495849]
Reward: -1  Episode Reward:  -270
xxxxx
x. ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  1641.8482468   -180.00807518 66155.80845713]
------
Step:11, Action:West
State  136
Old Q Values:  [ 1117.30787879   966.63055071 -6245.61866138  -373.51590493]
New Q values:  [ 1117.30787879   966.63055071 -6245.61866138 -5767.83252987]
Reward: -10001  Episode Reward:  -10271
xxxxx
x.g x
x.. x
x   x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1592.71429176  990.60326953]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2606.74579177  1945.42617819]
New Q values:  [-2527.46239811 -8521.23367799  2606.74579177  1059.28817691]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[919.05901879 -40.34168621 850.49542097 -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [919.05901879 -40.34168621 850.49542097 -35.88578819]
New Q values:  [1072.82013637  -40.34168621  850.49542097  -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 648.44076927  428.45351876 2332.6550962  -180.6       ]
------
Step:3, Action:North
State  181
Old Q Values:  [ 648.44076927  428.45351876 2332.6550962  -180.6       ]
New Q values:  [ 870.35194348  428.45351876 2332.6550962  -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
xa..x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        2018.58545259    5.4           0.        ]
------
Step:4, Action:South
State  103
Old Q Values:  [-180.6        2018.58545259    5.4           0.        ]
New Q values:  [-180.6        1303.24908527    5.4           0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1199.27464606 1654.71634746 1439.95477448    0.        ]
------
Step:5, Action:South
State  181
Old Q Values:  [ 870.35194348  428.45351876 2332.6550962  -180.6       ]
New Q values:  [ 870.35194348  492.62744842 2332.6550962  -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1072.82013637  -40.34168621  850.49542097  -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [1072.82013637  -40.34168621  850.49542097  -35.88578819]
New Q values:  [1128.32458341  -40.34168621  850.49542097  -35.88578819]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 870.35194348  492.62744842 2332.6550962  -180.6       ]
------
Step:7, Action:North
State  181
Old Q Values:  [ 870.35194348  492.62744842 2332.6550962  -180.6       ]
New Q values:  [ 723.24043894  492.62744842 2332.6550962  -180.6       ]
Reward: -1  Episode Reward:  23
xxxxx
xag.x
x ..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1252.33220517   31.9495824  -180.6       ]
------
Step:8, Action:South
State  103
Old Q Values:  [-180.6        1303.24908527    5.4           0.        ]
New Q values:  [-180.6        1220.49616297    5.4           0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 723.24043894  492.62744842 2332.6550962  -180.6       ]
------
Step:9, Action:North
State  181
Old Q Values:  [ 723.24043894  492.62744842 2332.6550962  -180.6       ]
New Q values:  [ 654.84502447  492.62744842 2332.6550962  -180.6       ]
Reward: -1  Episode Reward:  21
xxxxx
xa..x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        1220.49616297    5.4           0.        ]
------
Step:10, Action:South
State  103
Old Q Values:  [-180.6        1220.49616297    5.4           0.        ]
New Q values:  [-180.6        1187.39499405    5.4           0.        ]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 654.84502447  492.62744842 2332.6550962  -180.6       ]
------
Step:11, Action:North
State  180
Old Q Values:  [-5994.37761603   266.69721195  2200.60202348 -4966.32149798]
New Q values:  [-2396.80304641   266.69721195  2200.60202348 -4966.32149798]
Reward: -1  Episode Reward:  19
xxxxx
xa..x
xg..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[ -180.6       -2257.0253383     5.16       -180.6      ]
------
Step:12, Action:East
State  108
Old Q Values:  [-8463.16477134  1166.4852442    971.47281877     0.        ]
New Q values:  [-8463.16477134  1166.4852442    393.98912751     0.        ]
Reward: 9  Episode Reward:  28
xxxxx
xga.x
x ..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:NW
[0. 0. 0. 0.]
------
Step:13, Action:North
State  118
Old Q Values:  [ -252.84 -6000.6      0.       0.  ]
New Q values:  [ -281.736 -6000.6       0.        0.   ]
Reward: -301  Episode Reward:  -273
xxxxx
x a.x
xg..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[ -281.736 -6000.6       0.        0.   ]
------
Step:14, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1273.91277366  -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225   850.1574731   -841.67946252]
Reward: 9  Episode Reward:  -264
xxxxx
xg ax
x ..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 1117.30787879   966.63055071 -6245.61866138 -5767.83252987]
------
Step:15, Action:North
State  138
Old Q Values:  [2713.53460402 2515.37216493 -180.6         893.44578837]
New Q values:  [1718.87422282 2515.37216493 -180.6         893.44578837]
Reward: -301  Episode Reward:  -565
xxxxx
x  ax
xg..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1718.87422282 2515.37216493 -180.6         893.44578837]
------
Step:16, Action:South
State  138
Old Q Values:  [1718.87422282 2515.37216493 -180.6         893.44578837]
New Q values:  [1718.87422282 3891.91699522 -180.6         893.44578837]
Reward: 9  Episode Reward:  -556
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9.60122710e+03 2.18966613e+03 5.90993206e+02 3.52184257e+00]
------
Step:17, Action:North
State  210
Old Q Values:  [9.60122710e+03 2.18966613e+03 5.90993206e+02 3.52184257e+00]
New Q values:  [5.00746594e+03 2.18966613e+03 5.90993206e+02 3.52184257e+00]
Reward: -1  Episode Reward:  -557
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1718.87422282 3891.91699522 -180.6         893.44578837]
------
Step:18, Action:South
State  138
Old Q Values:  [1718.87422282 3891.91699522 -180.6         893.44578837]
New Q values:  [1718.87422282 3058.40657936 -180.6         893.44578837]
Reward: -1  Episode Reward:  -558
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.00746594e+03 2.18966613e+03 5.90993206e+02 3.52184257e+00]
------
Step:19, Action:North
State  208
Old Q Values:  [ 6724.03046254  4733.28695903   949.39862031 -3385.12952694]
New Q values:  [ 3606.53415882  4733.28695903   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -559
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1718.87422282 3058.40657936 -180.6         893.44578837]
------
Step:20, Action:South
State  136
Old Q Values:  [ 1117.30787879   966.63055071 -6245.61866138 -5767.83252987]
New Q values:  [ 1117.30787879  1806.03830799 -6245.61866138 -5767.83252987]
Reward: -1  Episode Reward:  -560
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3606.53415882  4733.28695903   949.39862031 -3385.12952694]
------
Step:21, Action:South
State  208
Old Q Values:  [ 3606.53415882  4733.28695903   949.39862031 -3385.12952694]
New Q values:  [ 3606.53415882  3796.85676937   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -561
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6347.13995252 -9022.41491635 -7525.7277781   1877.17851136]
------
Step:22, Action:North
State  288
Old Q Values:  [ 6347.13995252 -9022.41491635 -7525.7277781   1877.17851136]
New Q values:  [ 3677.31301182 -9022.41491635 -7525.7277781   1877.17851136]
Reward: -1  Episode Reward:  -562
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3606.53415882  3796.85676937   949.39862031 -3385.12952694]
------
Step:23, Action:South
State  208
Old Q Values:  [ 3606.53415882  3796.85676937   949.39862031 -3385.12952694]
New Q values:  [ 3606.53415882  2621.33661129   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -563
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3677.31301182 -9022.41491635 -7525.7277781   1877.17851136]
------
Step:24, Action:North
State  288
Old Q Values:  [ 3677.31301182 -9022.41491635 -7525.7277781   1877.17851136]
New Q values:  [ 2552.28545237 -9022.41491635 -7525.7277781   1877.17851136]
Reward: -1  Episode Reward:  -564
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3606.53415882  2621.33661129   949.39862031 -3385.12952694]
------
Step:25, Action:North
State  208
Old Q Values:  [ 3606.53415882  2621.33661129   949.39862031 -3385.12952694]
New Q values:  [ 2359.53563734  2621.33661129   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -565
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1718.87422282 3058.40657936 -180.6         893.44578837]
------
Step:26, Action:South
State  136
Old Q Values:  [ 1117.30787879  1806.03830799 -6245.61866138 -5767.83252987]
New Q values:  [ 1117.30787879  1508.21630658 -6245.61866138 -5767.83252987]
Reward: -1  Episode Reward:  -566
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2359.53563734  2621.33661129   949.39862031 -3385.12952694]
------
Step:27, Action:South
State  208
Old Q Values:  [ 2359.53563734  2621.33661129   949.39862031 -3385.12952694]
New Q values:  [ 2359.53563734  1813.62028023   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -567
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2552.28545237 -9022.41491635 -7525.7277781   1877.17851136]
------
Step:28, Action:North
State  288
Old Q Values:  [ 2552.28545237 -9022.41491635 -7525.7277781   1877.17851136]
New Q values:  [ 2522.55396222 -9022.41491635 -7525.7277781   1877.17851136]
Reward: -1  Episode Reward:  -568
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.00746594e+03 2.18966613e+03 5.90993206e+02 3.52184257e+00]
------
Step:29, Action:North
State  208
Old Q Values:  [ 2359.53563734  1813.62028023   949.39862031 -3385.12952694]
New Q values:  [ 1860.73622874  1813.62028023   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -569
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1718.87422282 3058.40657936 -180.6         893.44578837]
------
Step:30, Action:South
State  138
Old Q Values:  [1718.87422282 3058.40657936 -180.6         893.44578837]
New Q values:  [1718.87422282 2725.00241301 -180.6         893.44578837]
Reward: -1  Episode Reward:  -570
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.00746594e+03 2.18966613e+03 5.90993206e+02 3.52184257e+00]
------
Step:31, Action:North
State  210
Old Q Values:  [5.00746594e+03 2.18966613e+03 5.90993206e+02 3.52184257e+00]
New Q values:  [2819.88709893 2189.66612966  590.9932059     3.52184257]
Reward: -1  Episode Reward:  -571
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1718.87422282 2725.00241301 -180.6         893.44578837]
------
Step:32, Action:South
State  130
Old Q Values:  [36041.91667283  1641.8482468   -180.00807518 66155.80845713]
New Q values:  [36041.91667283  1214.36016734  -180.00807518 66155.80845713]
Reward: -1  Episode Reward:  -572
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1860.73622874  1813.62028023   949.39862031 -3385.12952694]
------
Step:33, Action:North
State  208
Old Q Values:  [ 1860.73622874  1813.62028023   949.39862031 -3385.12952694]
New Q values:  [ 1561.1952154   1813.62028023   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -573
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1718.87422282 2725.00241301 -180.6         893.44578837]
------
Step:34, Action:North
State  138
Old Q Values:  [1718.87422282 2725.00241301 -180.6         893.44578837]
New Q values:  [1324.45041303 2725.00241301 -180.6         893.44578837]
Reward: -301  Episode Reward:  -874
xxxxx
x  ax
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1324.45041303 2725.00241301 -180.6         893.44578837]
------
Step:35, Action:South
State  138
Old Q Values:  [1324.45041303 2725.00241301 -180.6         893.44578837]
New Q values:  [1324.45041303 1935.36709488 -180.6         893.44578837]
Reward: -1  Episode Reward:  -875
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2819.88709893 2189.66612966  590.9932059     3.52184257]
------
Step:36, Action:North
State  208
Old Q Values:  [ 1561.1952154   1813.62028023   949.39862031 -3385.12952694]
New Q values:  [ 1204.48821463  1813.62028023   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -876
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1324.45041303 1935.36709488 -180.6         893.44578837]
------
Step:37, Action:North
State  136
Old Q Values:  [ 1117.30787879  1508.21630658 -6245.61866138 -5767.83252987]
New Q values:  [-5281.21195651  1508.21630658 -6245.61866138 -5767.83252987]
Reward: -10301  Episode Reward:  -11177
xxxxx
x  gx
x . x
x   x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2522.55396222 -9022.41491635 -7525.7277781   1877.17851136]
------
Step:1, Action:North
State  288
Old Q Values:  [ 2522.55396222 -9022.41491635 -7525.7277781   1877.17851136]
New Q values:  [ 1558.50766896 -9022.41491635 -7525.7277781   1877.17851136]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1204.48821463  1813.62028023   949.39862031 -3385.12952694]
------
Step:2, Action:South
State  208
Old Q Values:  [ 1204.48821463  1813.62028023   949.39862031 -3385.12952694]
New Q values:  [ 1204.48821463  1288.0016655    949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1558.50766896 -9022.41491635 -7525.7277781   1877.17851136]
------
Step:3, Action:West
State  288
Old Q Values:  [ 1558.50766896 -9022.41491635 -7525.7277781   1877.17851136]
New Q values:  [ 1558.50766896 -9022.41491635 -7525.7277781   1538.29514207]
Reward: 9  Episode Reward:  17
xxxxx
x. .x
x.. x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2606.74579177  1059.28817691]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2606.74579177  1059.28817691]
New Q values:  [-2527.46239811 -8521.23367799  1509.65061739  1059.28817691]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1558.50766896 -9022.41491635 -7525.7277781   1538.29514207]
------
Step:5, Action:North
State  288
Old Q Values:  [ 1558.50766896 -9022.41491635 -7525.7277781   1538.29514207]
New Q values:  [ 1468.76919726 -9022.41491635 -7525.7277781   1538.29514207]
Reward: -1  Episode Reward:  15
xxxxx
x. .x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2819.88709893 2189.66612966  590.9932059     3.52184257]
------
Step:6, Action:North
State  208
Old Q Values:  [ 1204.48821463  1288.0016655    949.39862031 -3385.12952694]
New Q values:  [ 1067.80541432  1288.0016655    949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  24
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1324.45041303 1935.36709488 -180.6         893.44578837]
------
Step:7, Action:South
State  136
Old Q Values:  [-5281.21195651  1508.21630658 -6245.61866138 -5767.83252987]
New Q values:  [-5281.21195651   989.08702228 -6245.61866138 -5767.83252987]
Reward: -1  Episode Reward:  23
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1067.80541432  1288.0016655    949.39862031 -3385.12952694]
------
Step:8, Action:South
State  208
Old Q Values:  [ 1067.80541432  1288.0016655    949.39862031 -3385.12952694]
New Q values:  [ 1067.80541432   976.08920882   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  22
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1468.76919726 -9022.41491635 -7525.7277781   1538.29514207]
------
Step:9, Action:West
State  288
Old Q Values:  [ 1468.76919726 -9022.41491635 -7525.7277781   1538.29514207]
New Q values:  [ 1468.76919726 -9022.41491635 -7525.7277781   1067.61324205]
Reward: -1  Episode Reward:  21
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1509.65061739  1059.28817691]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1509.65061739  1059.28817691]
New Q values:  [-2527.46239811 -8521.23367799  1043.89100614  1059.28817691]
Reward: -1  Episode Reward:  20
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1468.76919726 -9022.41491635 -7525.7277781   1067.61324205]
------
Step:11, Action:North
State  288
Old Q Values:  [ 1468.76919726 -9022.41491635 -7525.7277781   1067.61324205]
New Q values:  [  907.2493032  -9022.41491635 -7525.7277781   1067.61324205]
Reward: -1  Episode Reward:  19
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1067.80541432   976.08920882   949.39862031 -3385.12952694]
------
Step:12, Action:North
State  208
Old Q Values:  [ 1067.80541432   976.08920882   949.39862031 -3385.12952694]
New Q values:  [ 1007.13229419   976.08920882   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  18
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1324.45041303 1935.36709488 -180.6         893.44578837]
------
Step:13, Action:South
State  138
Old Q Values:  [1324.45041303 1935.36709488 -180.6         893.44578837]
New Q values:  [1324.45041303 1619.51296763 -180.6         893.44578837]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2819.88709893 2189.66612966  590.9932059     3.52184257]
------
Step:14, Action:North
State  210
Old Q Values:  [2819.88709893 2189.66612966  590.9932059     3.52184257]
New Q values:  [1613.20872986 2189.66612966  590.9932059     3.52184257]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1324.45041303 1619.51296763 -180.6         893.44578837]
------
Step:15, Action:South
State  138
Old Q Values:  [1324.45041303 1619.51296763 -180.6         893.44578837]
New Q values:  [1324.45041303  949.34487531 -180.6         893.44578837]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1007.13229419   976.08920882   949.39862031 -3385.12952694]
------
Step:16, Action:North
State  208
Old Q Values:  [ 1007.13229419   976.08920882   949.39862031 -3385.12952694]
New Q values:  [  799.58804159   976.08920882   949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[1324.45041303  949.34487531 -180.6         893.44578837]
------
Step:17, Action:North
State  138
Old Q Values:  [1324.45041303  949.34487531 -180.6         893.44578837]
New Q values:  [ 746.51528912  949.34487531 -180.6         893.44578837]
Reward: -301  Episode Reward:  -287
xxxxx
x. ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 746.51528912  949.34487531 -180.6         893.44578837]
------
Step:18, Action:South
State  138
Old Q Values:  [ 746.51528912  949.34487531 -180.6         893.44578837]
New Q values:  [  746.51528912 -5328.03528723  -180.6          893.44578837]
Reward: -10001  Episode Reward:  -10288
xxxxx
x.  x
x..gx
x.  x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -8.12857824e+03  4.72404286e+03  1.20371620e+03]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -8.12857824e+03  4.72404286e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -8.12857824e+03  2.55191698e+03  1.20371620e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1613.20872986 2189.66612966  590.9932059     3.52184257]
------
Step:2, Action:South
State  208
Old Q Values:  [  799.58804159   976.08920882   949.39862031 -3385.12952694]
New Q values:  [  799.58804159   716.11965614   949.39862031 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  907.2493032  -9022.41491635 -7525.7277781   1067.61324205]
------
Step:3, Action:West
State  288
Old Q Values:  [  907.2493032  -9022.41491635 -7525.7277781   1067.61324205]
New Q values:  [  907.2493032  -9022.41491635 -7525.7277781    910.25958435]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x. gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1592.71429176  990.60326953]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1043.89100614  1059.28817691]
New Q values:  [-2527.46239811 -8521.23367799   690.03427776  1059.28817691]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x.g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  907.2493032  -9022.41491635 -7525.7277781    910.25958435]
------
Step:5, Action:West
State  288
Old Q Values:  [  907.2493032  -9022.41491635 -7525.7277781    910.25958435]
New Q values:  [  907.2493032  -9022.41491635 -7525.7277781    681.29028681]
Reward: -1  Episode Reward:  25
xxxxx
x.g.x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   690.03427776  1059.28817691]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   690.03427776  1059.28817691]
New Q values:  [-2527.46239811 -8521.23367799   690.03427776 13363.17295184]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[43133.52560357  2256.66526474 12394.68064405  1875.31501677]
------
Step:7, Action:North
State  260
Old Q Values:  [ 1276.71975496 -8695.4397473    876.33275803 -2601.74710518]
New Q values:  [-4823.73149097 -8695.4397473    876.33275803 -2601.74710518]
Reward: -9991  Episode Reward:  -9967
xxxxx
x...x
xg  x
x   x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.31485615e+03 3.68611294e+03 2.91043938e+03]
------
Step:1, Action:South
State  196
Old Q Values:  [-2469.90645144   976.09260036   174.55451539     0.        ]
New Q values:  [-2469.90645144   878.96235234   174.55451539     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1610.4177073   735.86737622]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   690.03427776 13363.17295184]
New Q values:  [-2527.46239811 -8521.23367799   553.58850206 13363.17295184]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  907.2493032  -9022.41491635 -7525.7277781    681.29028681]
------
Step:3, Action:North
State  288
Old Q Values:  [  907.2493032  -9022.41491635 -7525.7277781    681.29028681]
New Q values:  [  653.11930737 -9022.41491635 -7525.7277781    681.29028681]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  799.58804159   716.11965614   949.39862031 -3385.12952694]
------
Step:4, Action:East
State  210
Old Q Values:  [1613.20872986 2189.66612966  590.9932059     3.52184257]
New Q values:  [1613.20872986 2189.66612966  712.69712126    3.52184257]
Reward: -301  Episode Reward:  -274
xxxxx
x ..x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1613.20872986 2189.66612966  712.69712126    3.52184257]
------
Step:5, Action:South
State  208
Old Q Values:  [  799.58804159   716.11965614   949.39862031 -3385.12952694]
New Q values:  [  799.58804159   490.2349485    949.39862031 -3385.12952694]
Reward: -1  Episode Reward:  -275
xxxxx
x ..x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  653.11930737 -9022.41491635 -7525.7277781    681.29028681]
------
Step:6, Action:West
State  288
Old Q Values:  [  653.11930737 -9022.41491635 -7525.7277781    681.29028681]
New Q values:  [  653.11930737 -9022.41491635 -7525.7277781   4280.86800028]
Reward: -1  Episode Reward:  -276
xxxxx
xg..x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   553.58850206 13363.17295184]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   553.58850206 13363.17295184]
New Q values:  [-2527.46239811 -8521.23367799   553.58850206  5613.56900814]
Reward: 9  Episode Reward:  -267
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4823.73149097 -8695.4397473    876.33275803 -2601.74710518]
------
Step:8, Action:East
State  260
Old Q Values:  [-4823.73149097 -8695.4397473    876.33275803 -2601.74710518]
New Q values:  [-4823.73149097 -8695.4397473   2034.00380565 -2601.74710518]
Reward: -1  Episode Reward:  -268
xxxxx
xg..x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   553.58850206  5613.56900814]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   553.58850206  5613.56900814]
New Q values:  [-2527.46239811 -8521.23367799   553.58850206  2855.02874495]
Reward: -1  Episode Reward:  -269
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4823.73149097 -8695.4397473   2034.00380565 -2601.74710518]
------
Step:10, Action:East
State  261
Old Q Values:  [1128.32458341  -40.34168621  850.49542097  -35.88578819]
New Q values:  [1128.32458341  -40.34168621 1196.10679187  -35.88578819]
Reward: -1  Episode Reward:  -270
xxxxx
x ..x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   553.58850206  2855.02874495]
------
Step:11, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   553.58850206  2855.02874495]
New Q values:  [-2527.46239811 -8521.23367799   553.58850206  1751.61263968]
Reward: -1  Episode Reward:  -271
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4823.73149097 -8695.4397473   2034.00380565 -2601.74710518]
------
Step:12, Action:East
State  261
Old Q Values:  [1128.32458341  -40.34168621 1196.10679187  -35.88578819]
New Q values:  [1128.32458341  -40.34168621 1003.32650865  -35.88578819]
Reward: -1  Episode Reward:  -272
xxxxx
x ..x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   553.58850206  1751.61263968]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   553.58850206  1751.61263968]
New Q values:  [-2527.46239811 -8521.23367799   553.58850206 13640.10273694]
Reward: -1  Episode Reward:  -273
xxxxx
x g.x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[43133.52560357  2256.66526474 12394.68064405  1875.31501677]
------
Step:14, Action:North
State  261
Old Q Values:  [1128.32458341  -40.34168621 1003.32650865  -35.88578819]
New Q values:  [1156.52636222  -40.34168621 1003.32650865  -35.88578819]
Reward: 9  Episode Reward:  -264
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 654.84502447  492.62744842 2332.6550962  -180.6       ]
------
Step:15, Action:East
State  181
Old Q Values:  [ 654.84502447  492.62744842 2332.6550962  -180.6       ]
New Q values:  [ 654.84502447  492.62744842 2265.99911771 -180.6       ]
Reward: -1  Episode Reward:  -265
xxxxx
x .gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4445.12359743   450.62327432  1909.41710691]
------
Step:16, Action:South
State  193
Old Q Values:  [-5922.26708831  4445.12359743   450.62327432  1909.41710691]
New Q values:  [-5922.26708831  2255.2637265    450.62327432  1909.41710691]
Reward: -1  Episode Reward:  -266
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1592.71429176  990.60326953]
------
Step:17, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 1592.71429176  990.60326953]
New Q values:  [   37.74111519  -168.92307549 -4079.25388322   990.60326953]
Reward: -10001  Episode Reward:  -10267
xxxxx
x ..x
x   x
x  gx
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4079.25388322   990.60326953]
------
Step:1, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4079.25388322   990.60326953]
New Q values:  [   37.74111519  -168.92307549 -4079.25388322   748.59921648]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1156.52636222  -40.34168621 1003.32650865  -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [1156.52636222  -40.34168621 1003.32650865  -35.88578819]
New Q values:  [1147.8102802   -40.34168621 1003.32650865  -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 654.84502447  492.62744842 2265.99911771 -180.6       ]
------
Step:3, Action:East
State  177
Old Q Values:  [112491.23069702   4025.17604709  75490.22264214      0.        ]
New Q values:  [112491.23069702   4025.17604709  31495.945903        0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.31485615e+03 3.68611294e+03 2.91043938e+03]
------
Step:4, Action:South
State  193
Old Q Values:  [-5922.26708831  2255.2637265    450.62327432  1909.41710691]
New Q values:  [-5922.26708831  1126.08525554   450.62327432  1909.41710691]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4079.25388322   748.59921648]
------
Step:5, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1610.4177073   735.86737622]
New Q values:  [  16.82637525  495.22830495 1610.4177073   638.09003455]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1147.8102802   -40.34168621 1003.32650865  -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [1147.8102802   -40.34168621 1003.32650865  -35.88578819]
New Q values:  [1138.32384739  -40.34168621 1003.32650865  -35.88578819]
Reward: -1  Episode Reward:  24
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 654.84502447  492.62744842 2265.99911771 -180.6       ]
------
Step:7, Action:East
State  177
Old Q Values:  [112491.23069702   4025.17604709  31495.945903        0.        ]
New Q values:  [112491.23069702   4025.17604709  13170.60349328      0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1126.08525554   450.62327432  1909.41710691]
------
Step:8, Action:West
State  195
Old Q Values:  [  38.85388605 5874.35865608 3535.62314734 2546.60363946]
New Q values:  [  38.85388605 5874.35865608 3535.62314734 1514.45636002]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1199.27464606 1654.71634746 1439.95477448    0.        ]
------
Step:9, Action:South
State  177
Old Q Values:  [112491.23069702   4025.17604709  13170.60349328      0.        ]
New Q values:  [112491.23069702   1950.96757305  13170.60349328      0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1138.32384739  -40.34168621 1003.32650865  -35.88578819]
------
Step:10, Action:North
State  261
Old Q Values:  [1138.32384739  -40.34168621 1003.32650865  -35.88578819]
New Q values:  [1134.52927427  -40.34168621 1003.32650865  -35.88578819]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 654.84502447  492.62744842 2265.99911771 -180.6       ]
------
Step:11, Action:North
State  181
Old Q Values:  [ 654.84502447  492.62744842 2265.99911771 -180.6       ]
New Q values:  [ 643.03767134  492.62744842 2265.99911771 -180.6       ]
Reward: 9  Episode Reward:  29
xxxxx
xag x
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1252.33220517   31.9495824  -180.6       ]
------
Step:12, Action:South
State  109
Old Q Values:  [-241.10880094 1252.33220517   31.9495824  -180.6       ]
New Q values:  [-241.10880094 1180.13261738   31.9495824  -180.6       ]
Reward: -1  Episode Reward:  28
xxxxx
x .gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 643.03767134  492.62744842 2265.99911771 -180.6       ]
------
Step:13, Action:East
State  181
Old Q Values:  [ 643.03767134  492.62744842 2265.99911771 -180.6       ]
New Q values:  [ 643.03767134  492.62744842 1478.62477916 -180.6       ]
Reward: -1  Episode Reward:  27
xxxxx
x . x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1126.08525554   450.62327432  1909.41710691]
------
Step:14, Action:West
State  195
Old Q Values:  [  38.85388605 5874.35865608 3535.62314734 1514.45636002]
New Q values:  [  38.85388605 5874.35865608 3535.62314734 1101.59744825]
Reward: -1  Episode Reward:  26
xxxxx
x . x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1199.27464606 1654.71634746 1439.95477448    0.        ]
------
Step:15, Action:South
State  181
Old Q Values:  [ 643.03767134  492.62744842 1478.62477916 -180.6       ]
New Q values:  [ 643.03767134  536.80976165 1478.62477916 -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
x . x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1134.52927427  -40.34168621 1003.32650865  -35.88578819]
------
Step:16, Action:North
State  261
Old Q Values:  [1134.52927427  -40.34168621 1003.32650865  -35.88578819]
New Q values:  [ 949.62661395  -40.34168621 1003.32650865  -35.88578819]
Reward: -1  Episode Reward:  24
xxxxx
x . x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1199.27464606 1654.71634746 1439.95477448    0.        ]
------
Step:17, Action:South
State  181
Old Q Values:  [ 643.03767134  536.80976165 1478.62477916 -180.6       ]
New Q values:  [ 643.03767134  515.12185725 1478.62477916 -180.6       ]
Reward: -1  Episode Reward:  23
xxxxx
x . x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 949.62661395  -40.34168621 1003.32650865  -35.88578819]
------
Step:18, Action:East
State  261
Old Q Values:  [ 949.62661395  -40.34168621 1003.32650865  -35.88578819]
New Q values:  [949.62661395 -40.34168621 625.3103684  -35.88578819]
Reward: -1  Episode Reward:  22
xxxxx
x . x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4079.25388322   748.59921648]
------
Step:19, Action:West
State  276
Old Q Values:  [  16.82637525  495.22830495 1610.4177073   638.09003455]
New Q values:  [  16.82637525  495.22830495 1610.4177073   539.523998  ]
Reward: -1  Episode Reward:  21
xxxxx
x . x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[949.62661395 -40.34168621 625.3103684  -35.88578819]
------
Step:20, Action:North
State  261
Old Q Values:  [949.62661395 -40.34168621 625.3103684  -35.88578819]
New Q values:  [822.83807933 -40.34168621 625.3103684  -35.88578819]
Reward: -1  Episode Reward:  20
xxxxx
x . x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 643.03767134  515.12185725 1478.62477916 -180.6       ]
------
Step:21, Action:North
State  181
Old Q Values:  [ 643.03767134  515.12185725 1478.62477916 -180.6       ]
New Q values:  [ 612.83356675  515.12185725 1478.62477916 -180.6       ]
Reward: -1  Episode Reward:  19
xxxxx
xa. x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        1187.39499405    5.4           0.        ]
------
Step:22, Action:South
State  111
Old Q Values:  [-177.44732869 2452.59180422   69.34437687 -120.29354603]
New Q values:  [-177.44732869 1424.02415544   69.34437687 -120.29354603]
Reward: -1  Episode Reward:  18
xxxxx
x . x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 612.83356675  515.12185725 1478.62477916 -180.6       ]
------
Step:23, Action:North
State  180
Old Q Values:  [-2396.80304641   266.69721195  2200.60202348 -4966.32149798]
New Q values:  [ -841.2939844    266.69721195  2200.60202348 -4966.32149798]
Reward: -1  Episode Reward:  17
xxxxx
xa. x
xg .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   393.42411387  -180.6       ]
------
Step:24, Action:East
State  111
Old Q Values:  [-177.44732869 1424.02415544   69.34437687 -120.29354603]
New Q values:  [-177.44732869 1424.02415544  158.31192399 -120.29354603]
Reward: 9  Episode Reward:  26
xxxxx
x a x
x g.x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         417.24724415 268.83765721 204.22976196]
------
Step:25, Action:East
State  126
Old Q Values:  [  0.         417.24724415 268.83765721 204.22976196]
New Q values:  [  0.         417.24724415 374.96879939 204.22976196]
Reward: -1  Episode Reward:  25
xxxxx
x  ax
xg .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  746.51528912 -5328.03528723  -180.6          893.44578837]
------
Step:26, Action:West
State  138
Old Q Values:  [  746.51528912 -5328.03528723  -180.6          893.44578837]
New Q values:  [  746.51528912 -5328.03528723  -180.6          481.95248859]
Reward: -1  Episode Reward:  24
xxxxx
x a x
x  .x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         417.24724415 374.96879939 204.22976196]
------
Step:27, Action:South
State  126
Old Q Values:  [  0.         417.24724415 374.96879939 204.22976196]
New Q values:  [  0.         429.98760336 374.96879939 204.22976196]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   878.96235234   174.55451539     0.        ]
------
Step:28, Action:South
State  196
Old Q Values:  [-2469.90645144   878.96235234   174.55451539     0.        ]
New Q values:  [-2469.90645144   834.11025312   174.55451539     0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1610.4177073   539.523998  ]
------
Step:29, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4079.25388322   748.59921648]
New Q values:  [  37.74111519 -168.92307549 -342.0411532   748.59921648]
Reward: 9  Episode Reward:  31
xxxxx
x   x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  653.11930737 -9022.41491635 -7525.7277781   4280.86800028]
------
Step:30, Action:West
State  288
Old Q Values:  [  653.11930737 -9022.41491635 -7525.7277781   4280.86800028]
New Q values:  [  653.11930737 -9022.41491635 -7525.7277781   2194.8725123 ]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1610.4177073   539.523998  ]
------
Step:31, Action:East
State  276
Old Q Values:  [  16.82637525  495.22830495 1610.4177073   539.523998  ]
New Q values:  [  16.82637525  495.22830495 1302.02883661  539.523998  ]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  653.11930737 -9022.41491635 -7525.7277781   2194.8725123 ]
------
Step:32, Action:West
State  288
Old Q Values:  [  653.11930737 -9022.41491635 -7525.7277781   2194.8725123 ]
New Q values:  [  653.11930737 -9022.41491635 -7525.7277781   1267.9576559 ]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[  16.82637525  495.22830495 1302.02883661  539.523998  ]
------
Step:33, Action:East
State  276
Old Q Values:  [  16.82637525  495.22830495 1302.02883661  539.523998  ]
New Q values:  [ 16.82637525 495.22830495 900.59883141 539.523998  ]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  653.11930737 -9022.41491635 -7525.7277781   1267.9576559 ]
------
Step:34, Action:North
State  288
Old Q Values:  [  653.11930737 -9022.41491635 -7525.7277781   1267.9576559 ]
New Q values:  [60923.54756185 -9022.41491635 -7525.7277781   1267.9576559 ]
Reward: 100009  Episode Reward:  100036
xxxxx
x   x
x  ax
x  gx
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   553.58850206 13640.10273694]
------
Step:1, Action:West
State  276
Old Q Values:  [ 16.82637525 495.22830495 900.59883141 539.523998  ]
New Q values:  [ 16.82637525 495.22830495 900.59883141 831.4107409 ]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4823.73149097 -8695.4397473   2034.00380565 -2601.74710518]
------
Step:2, Action:East
State  260
Old Q Values:  [-4823.73149097 -8695.4397473   2034.00380565 -2601.74710518]
New Q values:  [-4823.73149097 -8695.4397473   1083.18117169 -2601.74710518]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 16.82637525 495.22830495 900.59883141 831.4107409 ]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   553.58850206 13640.10273694]
New Q values:  [-2527.46239811 -8521.23367799 18503.89966938 13640.10273694]
Reward: 9  Episode Reward:  17
xxxxx
xg .x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[60923.54756185 -9022.41491635 -7525.7277781   1267.9576559 ]
------
Step:4, Action:North
State  288
Old Q Values:  [60923.54756185 -9022.41491635 -7525.7277781   1267.9576559 ]
New Q values:  [24659.63861083 -9022.41491635 -7525.7277781   1267.9576559 ]
Reward: 9  Episode Reward:  26
xxxxx
x.g.x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  799.58804159   490.2349485    949.39862031 -3385.12952694]
------
Step:5, Action:East
State  208
Old Q Values:  [  799.58804159   490.2349485    949.39862031 -3385.12952694]
New Q values:  [  799.58804159   490.2349485    483.97903422 -3385.12952694]
Reward: -301  Episode Reward:  -275
xxxxx
xg .x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  799.58804159   490.2349485    483.97903422 -3385.12952694]
------
Step:6, Action:North
State  208
Old Q Values:  [  799.58804159   490.2349485    483.97903422 -3385.12952694]
New Q values:  [20171.97775377   490.2349485    483.97903422 -3385.12952694]
Reward: 9  Episode Reward:  -266
xxxxx
x. ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  1214.36016734  -180.00807518 66155.80845713]
------
Step:7, Action:West
State  136
Old Q Values:  [-5281.21195651   989.08702228 -6245.61866138 -5767.83252987]
New Q values:  [-5281.21195651   989.08702228 -6245.61866138 -2052.68577002]
Reward: -1  Episode Reward:  -267
xxxxx
xga x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   850.1574731   -841.67946252]
------
Step:8, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   850.1574731   -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225   636.18909593  -841.67946252]
Reward: -1  Episode Reward:  -268
xxxxx
x.gax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   989.08702228 -6245.61866138 -2052.68577002]
------
Step:9, Action:South
State  136
Old Q Values:  [-5281.21195651   989.08702228 -6245.61866138 -2052.68577002]
New Q values:  [-5281.21195651  6446.62813505 -6245.61866138 -2052.68577002]
Reward: -1  Episode Reward:  -269
xxxxx
xg  x
x..ax
x   x
xxxxx
Step:10, Action:North
State  208
Old Q Values:  [20171.97775377   490.2349485    483.97903422 -3385.12952694]
New Q values:  [10002.17954202   490.2349485    483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  -270
xxxxx
x.gax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  6446.62813505 -6245.61866138 -2052.68577002]
------
Step:11, Action:South
State  130
Old Q Values:  [36041.91667283  1214.36016734  -180.00807518 66155.80845713]
New Q values:  [36041.91667283  3485.79792954  -180.00807518 66155.80845713]
Reward: -1  Episode Reward:  -271
xxxxx
x.  x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10002.17954202   490.2349485    483.97903422 -3385.12952694]
------
Step:12, Action:North
State  208
Old Q Values:  [10002.17954202   490.2349485    483.97903422 -3385.12952694]
New Q values:  [ 5934.26025732   490.2349485    483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  -272
xxxxx
x.gax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  6446.62813505 -6245.61866138 -2052.68577002]
------
Step:13, Action:South
State  136
Old Q Values:  [-5281.21195651  6446.62813505 -6245.61866138 -2052.68577002]
New Q values:  [-5281.21195651  4358.32933122 -6245.61866138 -2052.68577002]
Reward: -1  Episode Reward:  -273
xxxxx
x. gx
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5934.26025732   490.2349485    483.97903422 -3385.12952694]
------
Step:14, Action:South
State  208
Old Q Values:  [ 5934.26025732   490.2349485    483.97903422 -3385.12952694]
New Q values:  [ 5934.26025732  7593.38556265   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  -274
xxxxx
x.  x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[24659.63861083 -9022.41491635 -7525.7277781   1267.9576559 ]
------
Step:15, Action:West
State  288
Old Q Values:  [24659.63861083 -9022.41491635 -7525.7277781   1267.9576559 ]
New Q values:  [24659.63861083 -9022.41491635 -7525.7277781   6057.75296318]
Reward: -1  Episode Reward:  -275
xxxxx
x.  x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 18503.89966938 13640.10273694]
------
Step:16, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 18503.89966938 13640.10273694]
New Q values:  [-2527.46239811 -8521.23367799 14798.851451   13640.10273694]
Reward: -1  Episode Reward:  -276
xxxxx
x.  x
x.. x
x gax
xxxxx
Step:17, Action:South
State  288
Old Q Values:  [24659.63861083 -9022.41491635 -7525.7277781   6057.75296318]
New Q values:  [24659.63861083  3608.32561671 -7525.7277781   6057.75296318]
Reward: -301  Episode Reward:  -577
xxxxx
x.  x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[24659.63861083  3608.32561671 -7525.7277781   6057.75296318]
------
Step:18, Action:North
State  288
Old Q Values:  [24659.63861083  3608.32561671 -7525.7277781   6057.75296318]
New Q values:  [12141.27111313  3608.32561671 -7525.7277781   6057.75296318]
Reward: -1  Episode Reward:  -578
xxxxx
x.  x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5934.26025732  7593.38556265   483.97903422 -3385.12952694]
------
Step:19, Action:South
State  208
Old Q Values:  [ 5934.26025732  7593.38556265   483.97903422 -3385.12952694]
New Q values:  [ 5934.26025732  6679.135559     483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  -579
xxxxx
x.  x
x.g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12141.27111313  3608.32561671 -7525.7277781   6057.75296318]
------
Step:20, Action:North
State  288
Old Q Values:  [12141.27111313  3608.32561671 -7525.7277781   6057.75296318]
New Q values:  [  859.64911295  3608.32561671 -7525.7277781   6057.75296318]
Reward: -10001  Episode Reward:  -10580
xxxxx
x.  x
x..gx
x   x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[822.83807933 -40.34168621 625.3103684  -35.88578819]
------
Step:1, Action:North
State  260
Old Q Values:  [-4823.73149097 -8695.4397473   1083.18117169 -2601.74710518]
New Q values:  [-7263.91198935 -8695.4397473   1083.18117169 -2601.74710518]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
xg .x
x ..x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   393.42411387  -180.6       ]
------
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869 1424.02415544  158.31192399 -120.29354603]
New Q values:  [-177.44732869 1424.02415544 1310.40242444 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   4138.92551615    24.50779719]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   4138.92551615    24.50779719]
New Q values:  [ -281.736      -9545.4473624   1884.92479319    24.50779719]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  746.51528912 -5328.03528723  -180.6          481.95248859]
------
Step:3, Action:North
State  138
Old Q Values:  [  746.51528912 -5328.03528723  -180.6          481.95248859]
New Q values:  [  341.96070239 -5328.03528723  -180.6          481.95248859]
Reward: -301  Episode Reward:  -283
xxxxx
x  ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  341.96070239 -5328.03528723  -180.6          481.95248859]
------
Step:4, Action:West
State  138
Old Q Values:  [  341.96070239 -5328.03528723  -180.6          481.95248859]
New Q values:  [  341.96070239 -5328.03528723  -180.6          757.6584334 ]
Reward: -1  Episode Reward:  -284
xxxxx
x a x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   1884.92479319    24.50779719]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   1884.92479319    24.50779719]
New Q values:  [ -281.736      -9545.4473624    980.6674473     24.50779719]
Reward: -1  Episode Reward:  -285
xxxxx
x  ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  341.96070239 -5328.03528723  -180.6          757.6584334 ]
------
Step:6, Action:West
State  138
Old Q Values:  [  341.96070239 -5328.03528723  -180.6          757.6584334 ]
New Q values:  [  341.96070239 -5328.03528723  -180.6          596.66360755]
Reward: -1  Episode Reward:  -286
xxxxx
x a x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    980.6674473     24.50779719]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    980.6674473     24.50779719]
New Q values:  [ -281.736      -9545.4473624    570.66606118    24.50779719]
Reward: -1  Episode Reward:  -287
xxxxx
x  ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  341.96070239 -5328.03528723  -180.6          596.66360755]
------
Step:8, Action:West
State  138
Old Q Values:  [  341.96070239 -5328.03528723  -180.6          596.66360755]
New Q values:  [  341.96070239 -5328.03528723  -180.6          409.26526137]
Reward: -1  Episode Reward:  -288
xxxxx
x a x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    570.66606118    24.50779719]
------
Step:9, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    570.66606118    24.50779719]
New Q values:  [ -281.736      -9545.4473624    350.44600289    24.50779719]
Reward: -1  Episode Reward:  -289
xxxxx
x  ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  341.96070239 -5328.03528723  -180.6          409.26526137]
------
Step:10, Action:West
State  138
Old Q Values:  [  341.96070239 -5328.03528723  -180.6          409.26526137]
New Q values:  [  341.96070239 -5328.03528723  -180.6          268.23990542]
Reward: -1  Episode Reward:  -290
xxxxx
x a x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    350.44600289    24.50779719]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    350.44600289    24.50779719]
New Q values:  [ -281.736      -9545.4473624    242.16661187    24.50779719]
Reward: -1  Episode Reward:  -291
xxxxx
x  ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  341.96070239 -5328.03528723  -180.6          268.23990542]
------
Step:12, Action:North
State  138
Old Q Values:  [  341.96070239 -5328.03528723  -180.6          268.23990542]
New Q values:  [   58.77249167 -5328.03528723  -180.6          268.23990542]
Reward: -301  Episode Reward:  -592
xxxxx
x  ax
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[   58.77249167 -5328.03528723  -180.6          268.23990542]
------
Step:13, Action:West
State  138
Old Q Values:  [   58.77249167 -5328.03528723  -180.6          268.23990542]
New Q values:  [   58.77249167 -5328.03528723  -180.6          179.34594573]
Reward: -1  Episode Reward:  -593
xxxxx
x a x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    242.16661187    24.50779719]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    242.16661187    24.50779719]
New Q values:  [ -281.736      -9545.4473624    150.07042847    24.50779719]
Reward: -1  Episode Reward:  -594
xxxxx
x  ax
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[   58.77249167 -5328.03528723  -180.6          179.34594573]
------
Step:15, Action:West
State  138
Old Q Values:  [   58.77249167 -5328.03528723  -180.6          179.34594573]
New Q values:  [   58.77249167 -5328.03528723  -180.6          116.15950683]
Reward: -1  Episode Reward:  -595
xxxxx
x a x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    150.07042847    24.50779719]
------
Step:16, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    150.07042847    24.50779719]
New Q values:  [ -281.736      -9545.4473624     94.27602344    24.50779719]
Reward: -1  Episode Reward:  -596
xxxxx
x  ax
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[   58.77249167 -5328.03528723  -180.6          116.15950683]
------
Step:17, Action:West
State  138
Old Q Values:  [   58.77249167 -5328.03528723  -180.6          116.15950683]
New Q values:  [   58.77249167 -5328.03528723  -180.6           74.14660976]
Reward: -1  Episode Reward:  -597
xxxxx
x a x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     94.27602344    24.50779719]
------
Step:18, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   950.61904053    49.79232781]
New Q values:  [ -253.44886264 -1902.20915811   401.89159914    49.79232781]
Reward: -1  Episode Reward:  -598
xxxxx
x  ax
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[   58.77249167 -5328.03528723  -180.6           74.14660976]
------
Step:19, Action:West
State  138
Old Q Values:  [   58.77249167 -5328.03528723  -180.6           74.14660976]
New Q values:  [   58.77249167 -5328.03528723  -180.6           57.34145094]
Reward: -1  Episode Reward:  -599
xxxxx
x a x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     94.27602344    24.50779719]
------
Step:20, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     94.27602344    24.50779719]
New Q values:  [ -281.736      -9545.4473624     54.74215688    24.50779719]
Reward: -1  Episode Reward:  -600
xxxxx
x  ax
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[   58.77249167 -5328.03528723  -180.6           57.34145094]
------
Step:21, Action:North
State  138
Old Q Values:  [   58.77249167 -5328.03528723  -180.6           57.34145094]
New Q values:  [ -139.45925583 -5328.03528723  -180.6           57.34145094]
Reward: -301  Episode Reward:  -901
xxxxx
x  ax
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 -5328.03528723  -180.6           57.34145094]
------
Step:22, Action:West
State  138
Old Q Values:  [ -139.45925583 -5328.03528723  -180.6           57.34145094]
New Q values:  [ -139.45925583 -5328.03528723  -180.6          142.90406012]
Reward: -1  Episode Reward:  -902
xxxxx
x a x
x...x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   401.89159914    49.79232781]
------
Step:23, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   401.89159914    49.79232781]
New Q values:  [ -253.44886264 -1902.20915811   203.02785769    49.79232781]
Reward: -1  Episode Reward:  -903
xxxxx
x  ax
x...x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 -5328.03528723  -180.6          142.90406012]
------
Step:24, Action:West
State  138
Old Q Values:  [ -139.45925583 -5328.03528723  -180.6          142.90406012]
New Q values:  [ -139.45925583 -5328.03528723  -180.6           72.98427111]
Reward: -1  Episode Reward:  -904
xxxxx
x a x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     54.74215688    24.50779719]
------
Step:25, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   203.02785769    49.79232781]
New Q values:  [ -253.44886264 -1902.20915811   102.50642441    49.79232781]
Reward: -1  Episode Reward:  -905
xxxxx
x  ax
x...x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 -5328.03528723  -180.6           72.98427111]
------
Step:26, Action:West
State  138
Old Q Values:  [ -139.45925583 -5328.03528723  -180.6           72.98427111]
New Q values:  [ -139.45925583 -5328.03528723  -180.6           45.01635551]
Reward: -1  Episode Reward:  -906
xxxxx
x a x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     54.74215688    24.50779719]
------
Step:27, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   102.50642441    49.79232781]
New Q values:  [ -253.44886264 -1902.20915811    53.90747642    49.79232781]
Reward: -1  Episode Reward:  -907
xxxxx
x  ax
x...x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 -5328.03528723  -180.6           45.01635551]
------
Step:28, Action:West
State  138
Old Q Values:  [ -139.45925583 -5328.03528723  -180.6           45.01635551]
New Q values:  [ -139.45925583 -5328.03528723  -180.6           33.57878513]
Reward: -1  Episode Reward:  -908
xxxxx
x a x
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    53.90747642    49.79232781]
------
Step:29, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    53.90747642    49.79232781]
New Q values:  [ -253.44886264 -1902.20915811    31.0366261     49.79232781]
Reward: -1  Episode Reward:  -909
xxxxx
x  ax
x...x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 -5328.03528723  -180.6           33.57878513]
------
Step:30, Action:West
State  138
Old Q Values:  [ -139.45925583 -5328.03528723  -180.6           33.57878513]
New Q values:  [ -139.45925583 -5328.03528723  -180.6           29.25416111]
Reward: -1  Episode Reward:  -910
xxxxx
x a x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     54.74215688    24.50779719]
------
Step:31, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     54.74215688    24.50779719]
New Q values:  [ -281.736      -9545.4473624     30.07311108    24.50779719]
Reward: -1  Episode Reward:  -911
xxxxx
x  ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 -5328.03528723  -180.6           29.25416111]
------
Step:32, Action:West
State  138
Old Q Values:  [ -139.45925583 -5328.03528723  -180.6           29.25416111]
New Q values:  [ -139.45925583 -5328.03528723  -180.6           20.12359777]
Reward: -1  Episode Reward:  -912
xxxxx
x a x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.07311108    24.50779719]
------
Step:33, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   636.18909593  -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225  1561.37443774  -841.67946252]
Reward: -1  Episode Reward:  -913
xxxxx
xg ax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  4358.32933122 -6245.61866138 -2052.68577002]
------
Step:34, Action:South
State  138
Old Q Values:  [ -139.45925583 -5328.03528723  -180.6           20.12359777]
New Q values:  [ -139.45925583 -1682.45609572  -180.6           20.12359777]
Reward: 9  Episode Reward:  -904
xxxxx
x   x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.19365639e+03  1.47786006e+03 -6.17035694e+03  3.96578640e+00]
------
Step:35, Action:South
State  208
Old Q Values:  [ 5934.26025732  6679.135559     483.97903422 -3385.12952694]
New Q values:  [ 5934.26025732  4494.38011255   483.97903422 -3385.12952694]
Reward: 9  Episode Reward:  -895
xxxxx
x   x
x.g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  859.64911295  3608.32561671 -7525.7277781   6057.75296318]
------
Step:36, Action:West
State  288
Old Q Values:  [  859.64911295  3608.32561671 -7525.7277781   6057.75296318]
New Q values:  [  859.64911295  3608.32561671 -7525.7277781    868.15662057]
Reward: -9991  Episode Reward:  -10886
xxxxx
x   x
x.. x
x g x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     30.07311108    24.50779719]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     30.07311108    24.50779719]
New Q values:  [ -281.736      -9545.4473624     23.46632376    24.50779719]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 -1682.45609572  -180.6           20.12359777]
------
Step:2, Action:West
State  136
Old Q Values:  [-5281.21195651  4358.32933122 -6245.61866138 -2052.68577002]
New Q values:  [-5281.21195651  4358.32933122 -6245.61866138  -353.26197669]
Reward: -1  Episode Reward:  8
xxxxx
xga x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1561.37443774  -841.67946252]
------
Step:3, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1561.37443774  -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225  1931.44857446  -841.67946252]
Reward: -1  Episode Reward:  7
xxxxx
x.gax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  4358.32933122 -6245.61866138  -353.26197669]
------
Step:4, Action:South
State  136
Old Q Values:  [-5281.21195651  4358.32933122 -6245.61866138  -353.26197669]
New Q values:  [-5281.21195651  3529.00980968 -6245.61866138  -353.26197669]
Reward: 9  Episode Reward:  16
xxxxx
xg  x
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5934.26025732  4494.38011255   483.97903422 -3385.12952694]
------
Step:5, Action:North
State  208
Old Q Values:  [ 5934.26025732  4494.38011255   483.97903422 -3385.12952694]
New Q values:  [ 3431.80704583  4494.38011255   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  15
xxxxx
x.gax
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  3529.00980968 -6245.61866138  -353.26197669]
------
Step:6, Action:South
State  136
Old Q Values:  [-5281.21195651  3529.00980968 -6245.61866138  -353.26197669]
New Q values:  [-5281.21195651  2759.31795764 -6245.61866138  -353.26197669]
Reward: -1  Episode Reward:  14
xxxxx
x. gx
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3431.80704583  4494.38011255   483.97903422 -3385.12952694]
------
Step:7, Action:South
State  208
Old Q Values:  [ 3431.80704583  4494.38011255   483.97903422 -3385.12952694]
New Q values:  [ 3431.80704583  2885.64973003   483.97903422 -3385.12952694]
Reward: 9  Episode Reward:  23
xxxxx
x.g x
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  859.64911295  3608.32561671 -7525.7277781    868.15662057]
------
Step:8, Action:South
State  288
Old Q Values:  [  859.64911295  3608.32561671 -7525.7277781    868.15662057]
New Q values:  [  859.64911295  2345.2279317  -7525.7277781    868.15662057]
Reward: -301  Episode Reward:  -278
xxxxx
x. gx
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  859.64911295  2345.2279317  -7525.7277781    868.15662057]
------
Step:9, Action:South
State  288
Old Q Values:  [  859.64911295  2345.2279317  -7525.7277781    868.15662057]
New Q values:  [  859.64911295  1461.05955219 -7525.7277781    868.15662057]
Reward: -301  Episode Reward:  -579
xxxxx
x.  x
x..gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  859.64911295  1461.05955219 -7525.7277781    868.15662057]
------
Step:10, Action:South
State  288
Old Q Values:  [  859.64911295  1461.05955219 -7525.7277781    868.15662057]
New Q values:  [  859.64911295   842.14168653 -7525.7277781    868.15662057]
Reward: -301  Episode Reward:  -880
xxxxx
x.  x
x.g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  859.64911295   842.14168653 -7525.7277781    868.15662057]
------
Step:11, Action:West
State  288
Old Q Values:  [  859.64911295   842.14168653 -7525.7277781    868.15662057]
New Q values:  [  859.64911295   842.14168653 -7525.7277781   4792.31808353]
Reward: 9  Episode Reward:  -871
xxxxx
x.g x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 14798.851451   13640.10273694]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 14798.851451   13640.10273694]
New Q values:  [-2527.46239811 -8521.23367799  7356.63600546 13640.10273694]
Reward: -1  Episode Reward:  -872
xxxxx
x.  x
x.g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  859.64911295   842.14168653 -7525.7277781   4792.31808353]
------
Step:13, Action:West
State  288
Old Q Values:  [  859.64911295   842.14168653 -7525.7277781   4792.31808353]
New Q values:  [  859.64911295   842.14168653 -7525.7277781      8.35805449]
Reward: -10001  Episode Reward:  -10873
xxxxx
x.  x
x.. x
x g x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3431.80704583  2885.64973003   483.97903422 -3385.12952694]
------
Step:1, Action:North
State  216
Old Q Values:  [ 1.19365639e+03  1.47786006e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 1.31065794e+03  1.47786006e+03 -6.17035694e+03  3.96578640e+00]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  2759.31795764 -6245.61866138  -353.26197669]
------
Step:2, Action:South
State  138
Old Q Values:  [ -139.45925583 -1682.45609572  -180.6           20.12359777]
New Q values:  [-139.45925583 -230.22441912 -180.6          20.12359777]
Reward: -1  Episode Reward:  8
xxxxx
x . x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.31065794e+03  1.47786006e+03 -6.17035694e+03  3.96578640e+00]
------
Step:3, Action:South
State  210
Old Q Values:  [1613.20872986 2189.66612966  712.69712126    3.52184257]
New Q values:  [1613.20872986 1139.16118575  712.69712126    3.52184257]
Reward: 9  Episode Reward:  17
xxxxx
x . x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  859.64911295   842.14168653 -7525.7277781      8.35805449]
------
Step:4, Action:North
State  288
Old Q Values:  [  859.64911295   842.14168653 -7525.7277781      8.35805449]
New Q values:  [  827.22226414   842.14168653 -7525.7277781      8.35805449]
Reward: -1  Episode Reward:  16
xxxxx
x . x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1613.20872986 1139.16118575  712.69712126    3.52184257]
------
Step:5, Action:North
State  210
Old Q Values:  [1613.20872986 1139.16118575  712.69712126    3.52184257]
New Q values:  [ 650.72057128 1139.16118575  712.69712126    3.52184257]
Reward: -1  Episode Reward:  15
xxxxx
x .ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 -230.22441912 -180.6          20.12359777]
------
Step:6, Action:West
State  138
Old Q Values:  [-139.45925583 -230.22441912 -180.6          20.12359777]
New Q values:  [-139.45925583 -230.22441912 -180.6          20.80177827]
Reward: 9  Episode Reward:  24
xxxxx
x a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     23.46632376    24.50779719]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     23.46632376    24.50779719]
New Q values:  [ -281.736      -9545.4473624     23.46632376   170.29238705]
Reward: -1  Episode Reward:  23
xxxxx
xa  x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516  536.96422723 -252.78192178]
------
Step:8, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   393.42411387  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   207.85736166  -180.6       ]
Reward: -1  Episode Reward:  22
xxxxx
x a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     23.46632376   170.29238705]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     23.46632376   170.29238705]
New Q values:  [ -281.736      -9545.4473624     23.46632376   129.87416332]
Reward: -1  Episode Reward:  21
xxxxx
xa  x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   207.85736166  -180.6       ]
------
Step:10, Action:East
State  104
Old Q Values:  [-8652.84         853.17632684  1248.75158638 -8652.84      ]
New Q values:  [-8652.84         853.17632684  1078.33520689 -8652.84      ]
Reward: -1  Episode Reward:  20
xxxxx
xga x
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1931.44857446  -841.67946252]
------
Step:11, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1931.44857446  -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225  1599.77481708  -841.67946252]
Reward: -1  Episode Reward:  19
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  2759.31795764 -6245.61866138  -353.26197669]
------
Step:12, Action:South
State  138
Old Q Values:  [-139.45925583 -230.22441912 -180.6          20.80177827]
New Q values:  [-139.45925583  350.66825152 -180.6          20.80177827]
Reward: -1  Episode Reward:  18
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.31065794e+03  1.47786006e+03 -6.17035694e+03  3.96578640e+00]
------
Step:13, Action:South
State  216
Old Q Values:  [ 1.31065794e+03  1.47786006e+03 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 1.31065794e+03  8.43186532e+02 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  17
xxxxx
x   x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  827.22226414   842.14168653 -7525.7277781      8.35805449]
------
Step:14, Action:South
State  288
Old Q Values:  [  827.22226414   842.14168653 -7525.7277781      8.35805449]
New Q values:  [  827.22226414   408.89918057 -7525.7277781      8.35805449]
Reward: -301  Episode Reward:  -284
xxxxx
x   x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  827.22226414   408.89918057 -7525.7277781      8.35805449]
------
Step:15, Action:North
State  288
Old Q Values:  [  827.22226414   408.89918057 -7525.7277781      8.35805449]
New Q values:  [  723.48628865   408.89918057 -7525.7277781      8.35805449]
Reward: -1  Episode Reward:  -285
xxxxx
x   x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.31065794e+03  8.43186532e+02 -6.17035694e+03  3.96578640e+00]
------
Step:16, Action:North
State  208
Old Q Values:  [ 3431.80704583  2885.64973003   483.97903422 -3385.12952694]
New Q values:  [ 2199.91820563  2885.64973003   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  -286
xxxxx
xg ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  2759.31795764 -6245.61866138  -353.26197669]
------
Step:17, Action:South
State  138
Old Q Values:  [-139.45925583  350.66825152 -180.6          20.80177827]
New Q values:  [-139.45925583  532.8646836  -180.6          20.80177827]
Reward: -1  Episode Reward:  -287
xxxxx
x   x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.31065794e+03  8.43186532e+02 -6.17035694e+03  3.96578640e+00]
------
Step:18, Action:North
State  208
Old Q Values:  [ 2199.91820563  2885.64973003   483.97903422 -3385.12952694]
New Q values:  [ 1707.16266954  2885.64973003   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  -288
xxxxx
xg ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  2759.31795764 -6245.61866138  -353.26197669]
------
Step:19, Action:South
State  136
Old Q Values:  [-5281.21195651  2759.31795764 -6245.61866138  -353.26197669]
New Q values:  [-5281.21195651  1968.82210207 -6245.61866138  -353.26197669]
Reward: -1  Episode Reward:  -289
xxxxx
x g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1707.16266954  2885.64973003   483.97903422 -3385.12952694]
------
Step:20, Action:South
State  208
Old Q Values:  [ 1707.16266954  2885.64973003   483.97903422 -3385.12952694]
New Q values:  [ 1707.16266954  1370.70577861   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  -290
xxxxx
x  gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  723.48628865   408.89918057 -7525.7277781      8.35805449]
------
Step:21, Action:North
State  288
Old Q Values:  [  723.48628865   408.89918057 -7525.7277781      8.35805449]
New Q values:  [-5199.05668368   408.89918057 -7525.7277781      8.35805449]
Reward: -10001  Episode Reward:  -10291
xxxxx
x   x
x..gx
x.. x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.31485615e+03 3.68611294e+03 2.91043938e+03]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 4.31485615e+03 3.68611294e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 5.82337328e+03 3.68611294e+03 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7356.63600546 13640.10273694]
------
Step:2, Action:West
State  276
Old Q Values:  [ 16.82637525 495.22830495 900.59883141 831.4107409 ]
New Q values:  [ 16.82637525 495.22830495 900.59883141 584.81572016]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
x. .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[822.83807933 -40.34168621 625.3103684  -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [822.83807933 -40.34168621 625.3103684  -35.88578819]
New Q values:  [778.12266548 -40.34168621 625.3103684  -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 612.83356675  515.12185725 1478.62477916 -180.6       ]
------
Step:4, Action:North
State  180
Old Q Values:  [ -841.2939844    266.69721195  2200.60202348 -4966.32149798]
New Q values:  [ -268.76038526   266.69721195  2200.60202348 -4966.32149798]
Reward: 9  Episode Reward:  36
xxxxx
xa .x
xg .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   207.85736166  -180.6       ]
------
Step:5, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   207.85736166  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   211.53922567  -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x a.x
x  .x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         429.98760336 374.96879939 204.22976196]
------
Step:6, Action:South
State  126
Old Q Values:  [  0.         429.98760336 374.96879939 204.22976196]
New Q values:  [  0.         421.62811728 374.96879939 204.22976196]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   834.11025312   174.55451539     0.        ]
------
Step:7, Action:South
State  198
Old Q Values:  [-2.78872080e-01 -7.91106519e+03  1.26155064e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.89484643e+03  1.26155064e+04  0.00000000e+00]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 16.82637525 495.22830495 900.59883141 584.81572016]
------
Step:8, Action:East
State  276
Old Q Values:  [ 16.82637525 495.22830495 900.59883141 584.81572016]
New Q values:  [ 16.82637525 495.22830495 488.30928674 584.81572016]
Reward: 9  Episode Reward:  42
xxxxx
x  .x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368   408.89918057 -7525.7277781      8.35805449]
------
Step:9, Action:South
State  288
Old Q Values:  [-5199.05668368   408.89918057 -7525.7277781      8.35805449]
New Q values:  [-5199.05668368   105.6294264  -7525.7277781      8.35805449]
Reward: -301  Episode Reward:  -259
xxxxx
xg .x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368   105.6294264  -7525.7277781      8.35805449]
------
Step:10, Action:South
State  288
Old Q Values:  [-5199.05668368   105.6294264  -7525.7277781      8.35805449]
New Q values:  [-5199.05668368  -106.65940152 -7525.7277781      8.35805449]
Reward: -301  Episode Reward:  -560
xxxxx
x  .x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  -106.65940152 -7525.7277781      8.35805449]
------
Step:11, Action:West
State  288
Old Q Values:  [-5199.05668368  -106.65940152 -7525.7277781      8.35805449]
New Q values:  [-5199.05668368  -106.65940152 -7525.7277781    178.18793784]
Reward: -1  Episode Reward:  -561
xxxxx
x  .x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 16.82637525 495.22830495 488.30928674 584.81572016]
------
Step:12, Action:South
State  276
Old Q Values:  [ 16.82637525 495.22830495 488.30928674 584.81572016]
New Q values:  [   16.82637525 -5807.06396197   488.30928674   584.81572016]
Reward: -10301  Episode Reward:  -10862
xxxxx
x  .x
x  .x
x g x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   211.53922567  -180.6       ]
------
Step:1, Action:East
State  108
Old Q Values:  [-8463.16477134  1166.4852442    393.98912751     0.        ]
New Q values:  [-8463.16477134  1166.4852442    642.92809613     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xga.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1599.77481708  -841.67946252]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     23.46632376   129.87416332]
New Q values:  [ -281.736      -9545.4473624    174.64593459   129.87416332]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  532.8646836  -180.6          20.80177827]
------
Step:3, Action:South
State  136
Old Q Values:  [-5281.21195651  1968.82210207 -6245.61866138  -353.26197669]
New Q values:  [-5281.21195651  1186.12622382 -6245.61866138  -353.26197669]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1.31065794e+03  8.43186532e+02 -6.17035694e+03  3.96578640e+00]
------
Step:4, Action:North
State  216
Old Q Values:  [ 1.31065794e+03  8.43186532e+02 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 6.83522582e+02  8.43186532e+02 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  532.8646836  -180.6          20.80177827]
------
Step:5, Action:South
State  138
Old Q Values:  [-139.45925583  532.8646836  -180.6          20.80177827]
New Q values:  [-139.45925583 1640.60169983 -180.6          20.80177827]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1638.46584919 4760.18608796    0.          429.03841886]
------
Step:6, Action:South
State  216
Old Q Values:  [ 6.83522582e+02  8.43186532e+02 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 6.83522582e+02  3.96130994e+02 -6.17035694e+03  3.96578640e+00]
Reward: 9  Episode Reward:  34
xxxxx
x   x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  -106.65940152 -7525.7277781    178.18793784]
------
Step:7, Action:West
State  288
Old Q Values:  [-5199.05668368  -106.65940152 -7525.7277781    178.18793784]
New Q values:  [-5199.05668368  -106.65940152 -7525.7277781   4168.70599622]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7356.63600546 13640.10273694]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7356.63600546 13640.10273694]
New Q values:  [-2527.46239811 -8521.23367799  7356.63600546  5694.87789442]
Reward: 9  Episode Reward:  52
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[778.12266548 -40.34168621 625.3103684  -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [778.12266548 -40.34168621 625.3103684  -35.88578819]
New Q values:  [807.06397043 -40.34168621 625.3103684  -35.88578819]
Reward: -1  Episode Reward:  51
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1199.27464606 1654.71634746 1439.95477448    0.        ]
------
Step:10, Action:South
State  181
Old Q Values:  [ 612.83356675  515.12185725 1478.62477916 -180.6       ]
New Q values:  [ 612.83356675  447.56793403 1478.62477916 -180.6       ]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[807.06397043 -40.34168621 625.3103684  -35.88578819]
------
Step:11, Action:North
State  257
Old Q Values:  [43133.52560357  2256.66526474 12394.68064405  1875.31501677]
New Q values:  [51000.17945053  2256.66526474 12394.68064405  1875.31501677]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[112491.23069702   1950.96757305  13170.60349328      0.        ]
------
Step:12, Action:North
State  183
Old Q Values:  [1199.27464606 1654.71634746 1439.95477448    0.        ]
New Q values:  [ 906.31710506 1654.71634746 1439.95477448    0.        ]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1424.02415544 1310.40242444 -120.29354603]
------
Step:13, Action:South
State  99
Old Q Values:  [    0.         43130.3662752  59306.25267112     0.        ]
New Q values:  [    0.         50998.91571919 59306.25267112     0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[112491.23069702   1950.96757305  13170.60349328      0.        ]
------
Step:14, Action:North
State  181
Old Q Values:  [ 612.83356675  447.56793403 1478.62477916 -180.6       ]
New Q values:  [ 671.74067333  447.56793403 1478.62477916 -180.6       ]
Reward: -1  Episode Reward:  46
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1424.02415544 1310.40242444 -120.29354603]
------
Step:15, Action:South
State  99
Old Q Values:  [    0.         50998.91571919 59306.25267112     0.        ]
New Q values:  [    0.         54146.33549678 59306.25267112     0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[112491.23069702   1950.96757305  13170.60349328      0.        ]
------
Step:16, Action:North
State  181
Old Q Values:  [ 671.74067333  447.56793403 1478.62477916 -180.6       ]
New Q values:  [ 622.13605455  447.56793403 1478.62477916 -180.6       ]
Reward: -1  Episode Reward:  44
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1180.13261738   31.9495824  -180.6       ]
------
Step:17, Action:South
State  109
Old Q Values:  [-241.10880094 1180.13261738   31.9495824  -180.6       ]
New Q values:  [-241.10880094  915.0404807    31.9495824  -180.6       ]
Reward: -1  Episode Reward:  43
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 622.13605455  447.56793403 1478.62477916 -180.6       ]
------
Step:18, Action:East
State  177
Old Q Values:  [112491.23069702   1950.96757305  13170.60349328      0.        ]
New Q values:  [112491.23069702   1950.96757305  67020.65338209      0.        ]
Reward: 100009  Episode Reward:  100052
xxxxx
x g x
x a x
x   x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  1186.12622382 -6245.61866138  -353.26197669]
------
Step:1, Action:South
State  136
Old Q Values:  [-5281.21195651  1186.12622382 -6245.61866138  -353.26197669]
New Q values:  [-5281.21195651   684.90726425 -6245.61866138  -353.26197669]
Reward: 9  Episode Reward:  9
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 6.83522582e+02  3.96130994e+02 -6.17035694e+03  3.96578640e+00]
------
Step:2, Action:North
State  208
Old Q Values:  [ 1707.16266954  1370.70577861   483.97903422 -3385.12952694]
New Q values:  [  887.73724709  1370.70577861   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  8
xxxxx
x.gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   684.90726425 -6245.61866138  -353.26197669]
------
Step:3, Action:South
State  138
Old Q Values:  [-139.45925583 1640.60169983 -180.6          20.80177827]
New Q values:  [-139.45925583  860.69745465 -180.6          20.80177827]
Reward: -1  Episode Reward:  7
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 6.83522582e+02  3.96130994e+02 -6.17035694e+03  3.96578640e+00]
------
Step:4, Action:North
State  216
Old Q Values:  [ 6.83522582e+02  3.96130994e+02 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 5.31018269e+02  3.96130994e+02 -6.17035694e+03  3.96578640e+00]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  860.69745465 -180.6          20.80177827]
------
Step:5, Action:South
State  138
Old Q Values:  [-139.45925583  860.69745465 -180.6          20.80177827]
New Q values:  [-139.45925583  685.42733759 -180.6          20.80177827]
Reward: -1  Episode Reward:  5
xxxxx
x.  x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 650.72057128 1139.16118575  712.69712126    3.52184257]
------
Step:6, Action:South
State  210
Old Q Values:  [ 650.72057128 1139.16118575  712.69712126    3.52184257]
New Q values:  [ 650.72057128 1711.67627317  712.69712126    3.52184257]
Reward: 9  Episode Reward:  14
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  -106.65940152 -7525.7277781   4168.70599622]
------
Step:7, Action:South
State  288
Old Q Values:  [-5199.05668368  -106.65940152 -7525.7277781   4168.70599622]
New Q values:  [-5199.05668368  1027.34803826 -7525.7277781   4168.70599622]
Reward: -301  Episode Reward:  -287
xxxxx
x.  x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  1027.34803826 -7525.7277781   4168.70599622]
------
Step:8, Action:West
State  288
Old Q Values:  [-5199.05668368  1027.34803826 -7525.7277781   4168.70599622]
New Q values:  [-5199.05668368  1027.34803826 -7525.7277781   3879.87320013]
Reward: 9  Episode Reward:  -278
xxxxx
x.  x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7356.63600546  5694.87789442]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7356.63600546  5694.87789442]
New Q values:  [-2527.46239811 -8521.23367799  4106.01636222  5694.87789442]
Reward: -1  Episode Reward:  -279
xxxxx
xg  x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  1027.34803826 -7525.7277781   3879.87320013]
------
Step:10, Action:West
State  288
Old Q Values:  [-5199.05668368  1027.34803826 -7525.7277781   3879.87320013]
New Q values:  [-5199.05668368  1027.34803826 -7525.7277781   3259.81264838]
Reward: -1  Episode Reward:  -280
xxxxx
x.g x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4106.01636222  5694.87789442]
------
Step:11, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4106.01636222  5694.87789442]
New Q values:  [-2527.46239811 -8521.23367799  4106.01636222 17583.40499293]
Reward: 9  Episode Reward:  -271
xxxxx
x.  x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[51000.17945053  2256.66526474 12394.68064405  1875.31501677]
------
Step:12, Action:North
State  257
Old Q Values:  [51000.17945053  2256.66526474 12394.68064405  1875.31501677]
New Q values:  [54152.84098932  2256.66526474 12394.68064405  1875.31501677]
Reward: 9  Episode Reward:  -262
xxxxx
x.  x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[112491.23069702   1950.96757305  67020.65338209      0.        ]
------
Step:13, Action:North
State  183
Old Q Values:  [ 906.31710506 1654.71634746 1439.95477448    0.        ]
New Q values:  [ 795.13408865 1654.71634746 1439.95477448    0.        ]
Reward: 9  Episode Reward:  -253
xxxxx
xa  x
x . x
x  gx
xxxxx
Step:14, Action:East
State  111
Old Q Values:  [-177.44732869 1424.02415544 1310.40242444 -120.29354603]
New Q values:  [-177.44732869 1424.02415544  575.95475015 -120.29354603]
Reward: -1  Episode Reward:  -254
xxxxx
x a x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    174.64593459   129.87416332]
------
Step:15, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    31.0366261     49.79232781]
New Q values:  [ -253.44886264 -1902.20915811   217.44285172    49.79232781]
Reward: -1  Episode Reward:  -255
xxxxx
x  ax
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  685.42733759 -180.6          20.80177827]
------
Step:16, Action:South
State  138
Old Q Values:  [-139.45925583  685.42733759 -180.6          20.80177827]
New Q values:  [-139.45925583  787.07381698 -180.6          20.80177827]
Reward: -1  Episode Reward:  -256
xxxxx
x   x
x .ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 650.72057128 1711.67627317  712.69712126    3.52184257]
------
Step:17, Action:South
State  208
Old Q Values:  [  887.73724709  1370.70577861   483.97903422 -3385.12952694]
New Q values:  [  887.73724709  1525.62610596   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  -257
xxxxx
x   x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  1027.34803826 -7525.7277781   3259.81264838]
------
Step:18, Action:West
State  288
Old Q Values:  [-5199.05668368  1027.34803826 -7525.7277781   3259.81264838]
New Q values:  [-5199.05668368  1027.34803826 -7525.7277781    578.34655723]
Reward: -10001  Episode Reward:  -10258
xxxxx
x   x
x . x
x g x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -342.0411532   748.59921648]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4106.01636222 17583.40499293]
New Q values:  [-2527.46239811 -8521.23367799  4106.01636222  7280.8811883 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[807.06397043 -40.34168621 625.3103684  -35.88578819]
------
Step:2, Action:North
State  260
Old Q Values:  [-7263.91198935 -8695.4397473   1083.18117169 -2601.74710518]
New Q values:  [-1809.65945727 -8695.4397473   1083.18117169 -2601.74710518]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa..x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3635.01779491     0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [ 795.13408865 1654.71634746 1439.95477448    0.        ]
New Q values:  [ 795.13408865 1654.71634746 1346.95700506    0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -8.12857824e+03  2.55191698e+03  1.20371620e+03]
------
Step:4, Action:East
State  192
Old Q Values:  [3.89777037e-01 5.82337328e+03 3.68611294e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 5.82337328e+03 1.93753301e+03 2.91043938e+03]
Reward: 9  Episode Reward:  36
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  887.73724709  1525.62610596   483.97903422 -3385.12952694]
------
Step:5, Action:South
State  208
Old Q Values:  [  887.73724709  1525.62610596   483.97903422 -3385.12952694]
New Q values:  [  887.73724709   917.85485386   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  35
xxxxx
x.g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  1027.34803826 -7525.7277781    578.34655723]
------
Step:6, Action:South
State  288
Old Q Values:  [-5199.05668368  1027.34803826 -7525.7277781    578.34655723]
New Q values:  [-5199.05668368   538.54362678 -7525.7277781    578.34655723]
Reward: -301  Episode Reward:  -266
xxxxx
x..gx
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368   538.54362678 -7525.7277781    578.34655723]
------
Step:7, Action:West
State  288
Old Q Values:  [-5199.05668368   538.54362678 -7525.7277781    578.34655723]
New Q values:  [-5199.05668368   538.54362678 -7525.7277781   2415.00297938]
Reward: -1  Episode Reward:  -267
xxxxx
x.g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4106.01636222  7280.8811883 ]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4106.01636222  7280.8811883 ]
New Q values:  [-2527.46239811 -8521.23367799  4106.01636222  3153.87166645]
Reward: -1  Episode Reward:  -268
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[807.06397043 -40.34168621 625.3103684  -35.88578819]
------
Step:9, Action:North
State  257
Old Q Values:  [54152.84098932  2256.66526474 12394.68064405  1875.31501677]
New Q values:  [55407.90560484  2256.66526474 12394.68064405  1875.31501677]
Reward: -1  Episode Reward:  -269
xxxxx
x.g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[112491.23069702   1950.96757305  67020.65338209      0.        ]
------
Step:10, Action:North
State  180
Old Q Values:  [ -268.76038526   266.69721195  2200.60202348 -4966.32149798]
New Q values:  [-5634.92612794   266.69721195  2200.60202348 -4966.32149798]
Reward: -9991  Episode Reward:  -10260
xxxxx
xg..x
x   x
x   x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1126.08525554   450.62327432  1909.41710691]
------
Step:1, Action:West
State  200
Old Q Values:  [  62.8218634  1206.93052108 1340.18546859  181.20343395]
New Q values:  [  62.8218634  1206.93052108 1340.18546859  408.67479662]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[    9.84673294  1102.64474348 -1189.2802612    940.95197235]
------
Step:2, Action:South
State  180
Old Q Values:  [-5634.92612794   266.69721195  2200.60202348 -4966.32149798]
New Q values:  [-5634.92612794   437.03323629  2200.60202348 -4966.32149798]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1809.65945727 -8695.4397473   1083.18117169 -2601.74710518]
------
Step:3, Action:East
State  260
Old Q Values:  [-1809.65945727 -8695.4397473   1083.18117169 -2601.74710518]
New Q values:  [-1809.65945727 -8695.4397473   1670.47737734 -2601.74710518]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4106.01636222  3153.87166645]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4106.01636222  3153.87166645]
New Q values:  [-2527.46239811 -8521.23367799  2372.3074387   3153.87166645]
Reward: 9  Episode Reward:  36
xxxxx
x.. x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368   538.54362678 -7525.7277781   2415.00297938]
------
Step:5, Action:West
State  288
Old Q Values:  [-5199.05668368   538.54362678 -7525.7277781   2415.00297938]
New Q values:  [-5199.05668368   538.54362678 -7525.7277781  -4088.43730831]
Reward: -10001  Episode Reward:  -9965
xxxxx
x.. x
x  .x
x g x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  887.73724709   917.85485386   483.97903422 -3385.12952694]
------
Step:1, Action:South
State  208
Old Q Values:  [  887.73724709   917.85485386   483.97903422 -3385.12952694]
New Q values:  [  887.73724709   534.10502958   483.97903422 -3385.12952694]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368   538.54362678 -7525.7277781  -4088.43730831]
------
Step:2, Action:South
State  288
Old Q Values:  [-5199.05668368   538.54362678 -7525.7277781  -4088.43730831]
New Q values:  [-5199.05668368   196.38053875 -7525.7277781  -4088.43730831]
Reward: -301  Episode Reward:  -292
xxxxx
x...x
x .gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368   196.38053875 -7525.7277781  -4088.43730831]
------
Step:3, Action:South
State  288
Old Q Values:  [-5199.05668368   196.38053875 -7525.7277781  -4088.43730831]
New Q values:  [-5199.05668368   -43.13362288 -7525.7277781  -4088.43730831]
Reward: -301  Episode Reward:  -593
xxxxx
x..gx
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368   -43.13362288 -7525.7277781  -4088.43730831]
------
Step:4, Action:South
State  288
Old Q Values:  [-5199.05668368   -43.13362288 -7525.7277781  -4088.43730831]
New Q values:  [-5199.05668368  -210.79353601 -7525.7277781  -4088.43730831]
Reward: -301  Episode Reward:  -894
xxxxx
x.g.x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  -210.79353601 -7525.7277781  -4088.43730831]
------
Step:5, Action:South
State  288
Old Q Values:  [-5199.05668368  -210.79353601 -7525.7277781  -4088.43730831]
New Q values:  [-5199.05668368  -328.15547521 -7525.7277781  -4088.43730831]
Reward: -301  Episode Reward:  -1195
xxxxx
x...x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  -328.15547521 -7525.7277781  -4088.43730831]
------
Step:6, Action:South
State  288
Old Q Values:  [-5199.05668368  -328.15547521 -7525.7277781  -4088.43730831]
New Q values:  [-5199.05668368  -410.30883265 -7525.7277781  -4088.43730831]
Reward: -301  Episode Reward:  -1496
xxxxx
x...x
x .gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  -410.30883265 -7525.7277781  -4088.43730831]
------
Step:7, Action:South
State  288
Old Q Values:  [-5199.05668368  -410.30883265 -7525.7277781  -4088.43730831]
New Q values:  [-5199.05668368  -467.81618285 -7525.7277781  -4088.43730831]
Reward: -301  Episode Reward:  -1797
xxxxx
x..gx
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  -467.81618285 -7525.7277781  -4088.43730831]
------
Step:8, Action:South
State  288
Old Q Values:  [-5199.05668368  -467.81618285 -7525.7277781  -4088.43730831]
New Q values:  [-5199.05668368  -508.071328   -7525.7277781  -4088.43730831]
Reward: -301  Episode Reward:  -2098
xxxxx
x.g.x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  -508.071328   -7525.7277781  -4088.43730831]
------
Step:9, Action:South
State  288
Old Q Values:  [-5199.05668368  -508.071328   -7525.7277781  -4088.43730831]
New Q values:  [-5199.05668368  -536.2499296  -7525.7277781  -4088.43730831]
Reward: -301  Episode Reward:  -2399
xxxxx
x..gx
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  -536.2499296  -7525.7277781  -4088.43730831]
------
Step:10, Action:South
State  288
Old Q Values:  [-5199.05668368  -536.2499296  -7525.7277781  -4088.43730831]
New Q values:  [-5199.05668368  -555.97495072 -7525.7277781  -4088.43730831]
Reward: -301  Episode Reward:  -2700
xxxxx
x...x
x .gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368  -555.97495072 -7525.7277781  -4088.43730831]
------
Step:11, Action:South
State  288
Old Q Values:  [-5199.05668368  -555.97495072 -7525.7277781  -4088.43730831]
New Q values:  [-5199.05668368 -6569.7824655  -7525.7277781  -4088.43730831]
Reward: -10301  Episode Reward:  -13001
xxxxx
x...x
x . x
x..gx
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    174.64593459   129.87416332]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    174.64593459   129.87416332]
New Q values:  [ -281.736      -9545.4473624    311.38051893   129.87416332]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  787.07381698 -180.6          20.80177827]
------
Step:2, Action:South
State  138
Old Q Values:  [-139.45925583  787.07381698 -180.6          20.80177827]
New Q values:  [-139.45925583  833.73240874 -180.6          20.80177827]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 650.72057128 1711.67627317  712.69712126    3.52184257]
------
Step:3, Action:East
State  216
Old Q Values:  [ 5.31018269e+02  3.96130994e+02 -6.17035694e+03  3.96578640e+00]
New Q values:  [ 5.31018269e+02  3.96130994e+02 -8.48943729e+03  3.96578640e+00]
Reward: -10301  Episode Reward:  -10283
xxxxx
x.  x
x. gx
x...x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  833.73240874 -180.6          20.80177827]
------
Step:1, Action:South
State  138
Old Q Values:  [-139.45925583  833.73240874 -180.6          20.80177827]
New Q values:  [-139.45925583  852.39584545 -180.6          20.80177827]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 650.72057128 1711.67627317  712.69712126    3.52184257]
------
Step:2, Action:South
State  210
Old Q Values:  [ 650.72057128 1711.67627317  712.69712126    3.52184257]
New Q values:  [ 6.50720571e+02 -6.54246068e+03  7.12697121e+02  3.52184257e+00]
Reward: -10001  Episode Reward:  -9992
xxxxx
x.. x
x.. x
x..gx
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1599.77481708  -841.67946252]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1599.77481708  -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225   850.7821061   -841.67946252]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   684.90726425 -6245.61866138  -353.26197669]
------
Step:2, Action:South
State  136
Old Q Values:  [-5281.21195651   684.90726425 -6245.61866138  -353.26197669]
New Q values:  [-5281.21195651   438.66838651 -6245.61866138  -353.26197669]
Reward: 9  Episode Reward:  18
xxxxx
x  gx
x..ax
x...x
xxxxx
Step:3, Action:West
State  216
Old Q Values:  [ 5.31018269e+02  3.96130994e+02 -8.48943729e+03  3.96578640e+00]
New Q values:  [  531.01826936   396.13099396 -8489.43729461   409.04195514]
Reward: 9  Episode Reward:  27
xxxxx
x g x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1206.93052108 1340.18546859  408.67479662]
------
Step:4, Action:East
State  200
Old Q Values:  [  62.8218634  1206.93052108 1340.18546859  408.67479662]
New Q values:  [  62.8218634  1206.93052108  694.77966824  408.67479662]
Reward: -1  Episode Reward:  26
xxxxx
xg  x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  531.01826936   396.13099396 -8489.43729461   409.04195514]
------
Step:5, Action:North
State  216
Old Q Values:  [  531.01826936   396.13099396 -8489.43729461   409.04195514]
New Q values:  [  343.40782369   396.13099396 -8489.43729461   409.04195514]
Reward: -1  Episode Reward:  25
xxxxx
x gax
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   438.66838651 -6245.61866138  -353.26197669]
------
Step:6, Action:South
State  138
Old Q Values:  [-139.45925583  852.39584545 -180.6          20.80177827]
New Q values:  [-139.45925583  463.07092472 -180.6          20.80177827]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  343.40782369   396.13099396 -8489.43729461   409.04195514]
------
Step:7, Action:South
State  216
Old Q Values:  [  343.40782369   396.13099396 -8489.43729461   409.04195514]
New Q values:  [  343.40782369 -1062.67879491 -8489.43729461   409.04195514]
Reward: 9  Episode Reward:  33
xxxxx
x g x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368 -6569.7824655  -7525.7277781  -4088.43730831]
------
Step:8, Action:West
State  288
Old Q Values:  [-5199.05668368 -6569.7824655  -7525.7277781  -4088.43730831]
New Q values:  [-5199.05668368 -6569.7824655  -7525.7277781   -683.81342339]
Reward: 9  Episode Reward:  42
xxxxx
xg  x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2372.3074387   3153.87166645]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2372.3074387   3153.87166645]
New Q values:  [-2527.46239811 -8521.23367799  2372.3074387  13563.77838824]
Reward: 9  Episode Reward:  51
xxxxx
x   x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[40989.43240555  7580.15833597  5576.40109469   644.94785455]
------
Step:10, Action:South
State  256
Old Q Values:  [40989.43240555  7580.15833597  5576.40109469   644.94785455]
New Q values:  [40989.43240555 15148.29305605  5576.40109469   644.94785455]
Reward: -301  Episode Reward:  -250
xxxxx
xg  x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[40989.43240555 15148.29305605  5576.40109469   644.94785455]
------
Step:11, Action:North
State  257
Old Q Values:  [55407.90560484  2256.66526474 12394.68064405  1875.31501677]
New Q values:  [115915.93145104   2256.66526474  12394.68064405   1875.31501677]
Reward: 100009  Episode Reward:  99759
xxxxx
x g x
xa  x
x   x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -8.12857824e+03  2.55191698e+03  1.20371620e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 5.82337328e+03 1.93753301e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 5.82337328e+03 1.04673438e+03 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  887.73724709   534.10502958   483.97903422 -3385.12952694]
------
Step:2, Action:North
State  208
Old Q Values:  [  887.73724709   534.10502958   483.97903422 -3385.12952694]
New Q values:  [  492.09541479   534.10502958   483.97903422 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
xg.ax
x.  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   438.66838651 -6245.61866138  -353.26197669]
------
Step:3, Action:South
State  130
Old Q Values:  [36041.91667283  3485.79792954  -180.00807518 66155.80845713]
New Q values:  [36041.91667283  1553.95068069  -180.00807518 66155.80845713]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  492.09541479   534.10502958   483.97903422 -3385.12952694]
------
Step:4, Action:South
State  210
Old Q Values:  [ 6.50720571e+02 -6.54246068e+03  7.12697121e+02  3.52184257e+00]
New Q values:  [  650.72057128 -2816.72830031   712.69712126     3.52184257]
Reward: 9  Episode Reward:  26
xxxxx
x.. x
x.  x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368 -6569.7824655  -7525.7277781   -683.81342339]
------
Step:5, Action:West
State  288
Old Q Values:  [-5199.05668368 -6569.7824655  -7525.7277781   -683.81342339]
New Q values:  [-5199.05668368 -6569.7824655  -7525.7277781  -2198.99185288]
Reward: -9991  Episode Reward:  -9965
xxxxx
x.. x
x.  x
x g x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2372.3074387  13563.77838824]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2372.3074387  13563.77838824]
New Q values:  [-2527.46239811 -8521.23367799   294.62541962 13563.77838824]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368 -6569.7824655  -7525.7277781  -2198.99185288]
------
Step:2, Action:West
State  288
Old Q Values:  [-5199.05668368 -6569.7824655  -7525.7277781  -2198.99185288]
New Q values:  [-5199.05668368 -6569.7824655  -7525.7277781   3188.93677532]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   294.62541962 13563.77838824]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   294.62541962 13563.77838824]
New Q values:  [-2527.46239811 -8521.23367799   294.62541962  5667.03054643]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[807.06397043 -40.34168621 625.3103684  -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [807.06397043 -40.34168621 625.3103684  -35.88578819]
New Q values:  [771.81302192 -40.34168621 625.3103684  -35.88578819]
Reward: 9  Episode Reward:  16
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 622.13605455  447.56793403 1478.62477916 -180.6       ]
------
Step:5, Action:North
State  180
Old Q Values:  [-5634.92612794   437.03323629  2200.60202348 -4966.32149798]
New Q values:  [-2247.02245117   437.03323629  2200.60202348 -4966.32149798]
Reward: 9  Episode Reward:  25
xxxxx
xa..x
xg..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[ -180.6       -2257.0253383     5.16       -180.6      ]
------
Step:6, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   211.53922567  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684    90.01569027  -180.6       ]
Reward: 9  Episode Reward:  34
xxxxx
x a.x
x ..x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[ -281.736 -6000.6       0.        0.   ]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    311.38051893   129.87416332]
New Q values:  [ -281.736      -9545.4473624    268.87348499   129.87416332]
Reward: 9  Episode Reward:  43
xxxxx
x  ax
xg..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  463.07092472 -180.6          20.80177827]
------
Step:8, Action:South
State  138
Old Q Values:  [-139.45925583  463.07092472 -180.6          20.80177827]
New Q values:  [-139.45925583  404.43750627 -180.6          20.80177827]
Reward: 9  Episode Reward:  52
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  650.72057128 -2816.72830031   712.69712126     3.52184257]
------
Step:9, Action:East
State  210
Old Q Values:  [  650.72057128 -2816.72830031   712.69712126     3.52184257]
New Q values:  [  650.72057128 -2816.72830031   318.28798488     3.52184257]
Reward: -301  Episode Reward:  -249
xxxxx
x   x
x .ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  650.72057128 -2816.72830031   318.28798488     3.52184257]
------
Step:10, Action:North
State  210
Old Q Values:  [  650.72057128 -2816.72830031   318.28798488     3.52184257]
New Q values:  [  381.01948039 -2816.72830031   318.28798488     3.52184257]
Reward: -1  Episode Reward:  -250
xxxxx
x  ax
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  404.43750627 -180.6          20.80177827]
------
Step:11, Action:South
State  138
Old Q Values:  [-139.45925583  404.43750627 -180.6          20.80177827]
New Q values:  [-139.45925583  321.40651138 -180.6          20.80177827]
Reward: -1  Episode Reward:  -251
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  492.09541479   534.10502958   483.97903422 -3385.12952694]
------
Step:12, Action:South
State  210
Old Q Values:  [  381.01948039 -2816.72830031   318.28798488     3.52184257]
New Q values:  [ 381.01948039 -170.61028753  318.28798488    3.52184257]
Reward: -1  Episode Reward:  -252
xxxxx
x   x
x . x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368 -6569.7824655  -7525.7277781   3188.93677532]
------
Step:13, Action:West
State  288
Old Q Values:  [-5199.05668368 -6569.7824655  -7525.7277781   3188.93677532]
New Q values:  [-5199.05668368 -6569.7824655  -7525.7277781   2975.08387406]
Reward: -1  Episode Reward:  -253
xxxxx
x   x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   294.62541962  5667.03054643]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   294.62541962  5667.03054643]
New Q values:  [-2527.46239811 -8521.23367799   294.62541962 -3232.64456823]
Reward: -10001  Episode Reward:  -10254
xxxxx
x   x
x . x
xg  x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 622.13605455  447.56793403 1478.62477916 -180.6       ]
------
Step:1, Action:East
State  181
Old Q Values:  [ 622.13605455  447.56793403 1478.62477916 -180.6       ]
New Q values:  [  622.13605455   447.56793403 -5152.9170124   -180.6       ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x. .x
x g.x
x...x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -342.0411532   748.59921648]
------
Step:1, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -342.0411532   748.59921648]
New Q values:  [  37.74111519 -168.92307549 -342.0411532   536.38359317]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[771.81302192 -40.34168621 625.3103684  -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [771.81302192 -40.34168621 625.3103684  -35.88578819]
New Q values:  [500.76602513 -40.34168621 625.3103684  -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  622.13605455   447.56793403 -5152.9170124   -180.6       ]
------
Step:3, Action:North
State  181
Old Q Values:  [  622.13605455   447.56793403 -5152.9170124   -180.6       ]
New Q values:  [  528.76656603   447.56793403 -5152.9170124   -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
xag x
x ..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  915.0404807    31.9495824  -180.6       ]
------
Step:4, Action:South
State  109
Old Q Values:  [-241.10880094  915.0404807    31.9495824  -180.6       ]
New Q values:  [-241.10880094  524.04616209   31.9495824  -180.6       ]
Reward: -1  Episode Reward:  26
xxxxx
x .gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  528.76656603   447.56793403 -5152.9170124   -180.6       ]
------
Step:5, Action:North
State  181
Old Q Values:  [  528.76656603   447.56793403 -5152.9170124   -180.6       ]
New Q values:  [  638.11387304   447.56793403 -5152.9170124   -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
xa. x
x .gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1424.02415544  575.95475015 -120.29354603]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869 1424.02415544  575.95475015 -120.29354603]
New Q values:  [-177.44732869  760.44382409  575.95475015 -120.29354603]
Reward: -1  Episode Reward:  24
xxxxx
x . x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  638.11387304   447.56793403 -5152.9170124   -180.6       ]
------
Step:7, Action:North
State  183
Old Q Values:  [ 795.13408865 1654.71634746 1346.95700506    0.        ]
New Q values:  [ 545.58678269 1654.71634746 1346.95700506    0.        ]
Reward: -1  Episode Reward:  23
xxxxx
xa. x
x ..x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  760.44382409  575.95475015 -120.29354603]
------
Step:8, Action:South
State  111
Old Q Values:  [-177.44732869  760.44382409  575.95475015 -120.29354603]
New Q values:  [-177.44732869  799.99243387  575.95475015 -120.29354603]
Reward: -1  Episode Reward:  22
xxxxx
x . x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 545.58678269 1654.71634746 1346.95700506    0.        ]
------
Step:9, Action:South
State  183
Old Q Values:  [ 545.58678269 1654.71634746 1346.95700506    0.        ]
New Q values:  [ 545.58678269  848.8796495  1346.95700506    0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x . x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[500.76602513 -40.34168621 625.3103684  -35.88578819]
------
Step:10, Action:North
State  260
Old Q Values:  [-1809.65945727 -8695.4397473   1670.47737734 -2601.74710518]
New Q values:  [  366.04155557 -8695.4397473   1670.47737734 -2601.74710518]
Reward: -1  Episode Reward:  20
xxxxx
x . x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3635.01779491     0.        ]
------
Step:11, Action:East
State  180
Old Q Values:  [-2247.02245117   437.03323629  2200.60202348 -4966.32149798]
New Q values:  [-2247.02245117   437.03323629  2632.65279418 -4966.32149798]
Reward: 9  Episode Reward:  29
xxxxx
x . x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.82337328e+03 1.04673438e+03 2.91043938e+03]
------
Step:12, Action:South
State  192
Old Q Values:  [3.89777037e-01 5.82337328e+03 1.04673438e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.50419403e+03 1.04673438e+03 2.91043938e+03]
Reward: -1  Episode Reward:  28
xxxxx
xg. x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   488.30928674   584.81572016]
------
Step:13, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   488.30928674   584.81572016]
New Q values:  [   16.82637525 -5807.06396197   488.30928674   420.91939858]
Reward: -1  Episode Reward:  27
xxxxx
x g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[500.76602513 -40.34168621 625.3103684  -35.88578819]
------
Step:14, Action:East
State  261
Old Q Values:  [500.76602513 -40.34168621 625.3103684  -35.88578819]
New Q values:  [500.76602513 -40.34168621 396.01693338 -35.88578819]
Reward: -1  Episode Reward:  26
xxxxx
x . x
x g.x
x a.x
xxxxx
Step:15, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   294.62541962 -3232.64456823]
New Q values:  [-2527.46239811 -8521.23367799  1015.77533006 -3232.64456823]
Reward: 9  Episode Reward:  35
xxxxx
x . x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5199.05668368 -6569.7824655  -7525.7277781   2975.08387406]
------
Step:16, Action:North
State  288
Old Q Values:  [-5199.05668368 -6569.7824655  -7525.7277781   2975.08387406]
New Q values:  [-1959.91682935 -6569.7824655  -7525.7277781   2975.08387406]
Reward: 9  Episode Reward:  44
xxxxx
x . x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 381.01948039 -170.61028753  318.28798488    3.52184257]
------
Step:17, Action:North
State  210
Old Q Values:  [ 381.01948039 -170.61028753  318.28798488    3.52184257]
New Q values:  [ 1.99985503e+04 -1.70610288e+02  3.18287985e+02  3.52184257e+00]
Reward: -1  Episode Reward:  43
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  1553.95068069  -180.00807518 66155.80845713]
------
Step:18, Action:West
State  130
Old Q Values:  [36041.91667283  1553.95068069  -180.00807518 66155.80845713]
New Q values:  [ 36041.91667283   1553.95068069   -180.00807518 124054.2508704 ]
Reward: 100009  Episode Reward:  100052
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  492.09541479   534.10502958   483.97903422 -3385.12952694]
------
Step:1, Action:South
State  210
Old Q Values:  [ 1.99985503e+04 -1.70610288e+02  3.18287985e+02  3.52184257e+00]
New Q values:  [1.99985503e+04 8.29681047e+02 3.18287985e+02 3.52184257e+00]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1959.91682935 -6569.7824655  -7525.7277781   2975.08387406]
------
Step:2, Action:North
State  288
Old Q Values:  [-1959.91682935 -6569.7824655  -7525.7277781   2975.08387406]
New Q values:  [ 5214.99836705 -6569.7824655  -7525.7277781   2975.08387406]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.99985503e+04 8.29681047e+02 3.18287985e+02 3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [1.99985503e+04 8.29681047e+02 3.18287985e+02 3.52184257e+00]
New Q values:  [4.52210954e+04 8.29681047e+02 3.18287985e+02 3.52184257e+00]
Reward: 9  Episode Reward:  17
xxxxx
x..ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   1553.95068069   -180.00807518 124054.2508704 ]
------
Step:4, Action:West
State  138
Old Q Values:  [-139.45925583  321.40651138 -180.6          20.80177827]
New Q values:  [-139.45925583  321.40651138 -180.6          94.3827568 ]
Reward: 9  Episode Reward:  26
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    268.87348499   129.87416332]
------
Step:5, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   850.7821061   -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225   471.31335839  -841.67946252]
Reward: -1  Episode Reward:  25
xxxxx
x.gax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   438.66838651 -6245.61866138  -353.26197669]
------
Step:6, Action:South
State  136
Old Q Values:  [-5281.21195651   438.66838651 -6245.61866138  -353.26197669]
New Q values:  [-5281.21195651   335.09886348 -6245.61866138  -353.26197669]
Reward: -1  Episode Reward:  24
xxxxx
x. gx
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  492.09541479   534.10502958   483.97903422 -3385.12952694]
------
Step:7, Action:South
State  208
Old Q Values:  [  492.09541479   534.10502958   483.97903422 -3385.12952694]
New Q values:  [  492.09541479  1777.54152195   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x. gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5214.99836705 -6569.7824655  -7525.7277781   2975.08387406]
------
Step:8, Action:West
State  288
Old Q Values:  [ 5214.99836705 -6569.7824655  -7525.7277781   2975.08387406]
New Q values:  [ 5214.99836705 -6569.7824655  -7525.7277781   1356.34862757]
Reward: 9  Episode Reward:  32
xxxxx
x. gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -342.0411532   536.38359317]
------
Step:9, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -342.0411532   536.38359317]
New Q values:  [   37.74111519  -168.92307549  -342.0411532  34994.73287258]
Reward: 9  Episode Reward:  41
xxxxx
x.  x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[115915.93145104   2256.66526474  12394.68064405   1875.31501677]
------
Step:10, Action:North
State  257
Old Q Values:  [115915.93145104   2256.66526474  12394.68064405   1875.31501677]
New Q values:  [80119.14178952  2256.66526474 12394.68064405  1875.31501677]
Reward: 9  Episode Reward:  50
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[112491.23069702   1950.96757305  67020.65338209      0.        ]
------
Step:11, Action:North
State  177
Old Q Values:  [112491.23069702   1950.96757305  67020.65338209      0.        ]
New Q values:  [105001.89227881   1950.96757305  67020.65338209      0.        ]
Reward: 100009  Episode Reward:  100059
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  524.04616209   31.9495824  -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869  799.99243387  575.95475015 -120.29354603]
New Q values:  [-177.44732869  516.83113546  575.95475015 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  638.11387304   447.56793403 -5152.9170124   -180.6       ]
------
Step:2, Action:North
State  183
Old Q Values:  [ 545.58678269  848.8796495  1346.95700506    0.        ]
New Q values:  [ 390.42113812  848.8796495  1346.95700506    0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xa. x
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  516.83113546  575.95475015 -120.29354603]
------
Step:3, Action:East
State  111
Old Q Values:  [-177.44732869  516.83113546  575.95475015 -120.29354603]
New Q values:  [-177.44732869  516.83113546  316.44394556 -120.29354603]
Reward: 9  Episode Reward:  17
xxxxx
x a x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    268.87348499   129.87416332]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    268.87348499   129.87416332]
New Q values:  [ -281.736      -9545.4473624    203.37134741   129.87416332]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  321.40651138 -180.6          94.3827568 ]
------
Step:5, Action:South
State  138
Old Q Values:  [-139.45925583  321.40651138 -180.6          94.3827568 ]
New Q values:  [-139.45925583 1562.01843094 -180.6          94.3827568 ]
Reward: 9  Episode Reward:  25
xxxxx
x   x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1638.46584919 4760.18608796    0.          429.03841886]
------
Step:6, Action:South
State  210
Old Q Values:  [4.52210954e+04 8.29681047e+02 3.18287985e+02 3.52184257e+00]
New Q values:  [4.52210954e+04 1.90177193e+03 3.18287985e+02 3.52184257e+00]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x . x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5214.99836705 -6569.7824655  -7525.7277781   1356.34862757]
------
Step:7, Action:North
State  288
Old Q Values:  [ 5214.99836705 -6569.7824655  -7525.7277781   1356.34862757]
New Q values:  [15651.72796467 -6569.7824655  -7525.7277781   1356.34862757]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x .ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.52210954e+04 1.90177193e+03 3.18287985e+02 3.52184257e+00]
------
Step:8, Action:North
State  210
Old Q Values:  [4.52210954e+04 1.90177193e+03 3.18287985e+02 3.52184257e+00]
New Q values:  [1.85564437e+04 1.90177193e+03 3.18287985e+02 3.52184257e+00]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x . x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1562.01843094 -180.6          94.3827568 ]
------
Step:9, Action:South
State  138
Old Q Values:  [-139.45925583 1562.01843094 -180.6          94.3827568 ]
New Q values:  [-139.45925583  746.91995892 -180.6          94.3827568 ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  343.40782369 -1062.67879491 -8489.43729461   409.04195514]
------
Step:10, Action:West
State  216
Old Q Values:  [  343.40782369 -1062.67879491 -8489.43729461   409.04195514]
New Q values:  [  343.40782369 -1062.67879491 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  40
xxxxx
xg  x
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1206.93052108  694.77966824  408.67479662]
------
Step:11, Action:South
State  200
Old Q Values:  [  62.8218634  1206.93052108  694.77966824  408.67479662]
New Q values:  [ 62.8218634  792.90480745 694.77966824 408.67479662]
Reward: 9  Episode Reward:  49
xxxxx
x   x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1015.77533006 -3232.64456823]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1015.77533006 -3232.64456823]
New Q values:  [-2527.46239811 -8521.23367799  5101.22852143 -3232.64456823]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15651.72796467 -6569.7824655  -7525.7277781   1356.34862757]
------
Step:13, Action:North
State  288
Old Q Values:  [15651.72796467 -6569.7824655  -7525.7277781   1356.34862757]
New Q values:  [ 7688.14701225 -6569.7824655  -7525.7277781   1356.34862757]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1638.46584919 4760.18608796    0.          429.03841886]
------
Step:14, Action:South
State  218
Old Q Values:  [1638.46584919 4760.18608796    0.          429.03841886]
New Q values:  [ 1638.46584919 -1790.08146114     0.           429.03841886]
Reward: -10001  Episode Reward:  -9954
xxxxx
x   x
x   x
x. gx
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  638.11387304   447.56793403 -5152.9170124   -180.6       ]
------
Step:1, Action:North
State  181
Old Q Values:  [  638.11387304   447.56793403 -5152.9170124   -180.6       ]
New Q values:  [  417.85939784   447.56793403 -5152.9170124   -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  524.04616209   31.9495824  -180.6       ]
------
Step:2, Action:South
State  108
Old Q Values:  [-8463.16477134  1166.4852442    642.92809613     0.        ]
New Q values:  [-8463.16477134  1255.78993593   642.92809613     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xg. x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2247.02245117   437.03323629  2632.65279418 -4966.32149798]
------
Step:3, Action:East
State  189
Old Q Values:  [    9.84673294  1102.64474348 -1189.2802612    940.95197235]
New Q values:  [   9.84673294 1102.64474348 -232.44066224  940.95197235]
Reward: 9  Episode Reward:  17
xxxxx
x g x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 62.8218634  792.90480745 694.77966824 408.67479662]
------
Step:4, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.50419403e+03 1.04673438e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.53744617e+03 1.04673438e+03 2.91043938e+03]
Reward: 9  Episode Reward:  26
xxxxx
xg. x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5101.22852143 -3232.64456823]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5101.22852143 -3232.64456823]
New Q values:  [-2527.46239811 -8521.23367799  4352.33551225 -3232.64456823]
Reward: 9  Episode Reward:  35
xxxxx
x g x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7688.14701225 -6569.7824655  -7525.7277781   1356.34862757]
------
Step:6, Action:North
State  288
Old Q Values:  [ 7688.14701225 -6569.7824655  -7525.7277781   1356.34862757]
New Q values:  [ 3613.92126149 -6569.7824655  -7525.7277781   1356.34862757]
Reward: 9  Episode Reward:  44
xxxxx
xg. x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  492.09541479  1777.54152195   483.97903422 -3385.12952694]
------
Step:7, Action:South
State  216
Old Q Values:  [  343.40782369 -1062.67879491 -8489.43729461   531.09593838]
New Q values:  [  343.40782369   658.50486048 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3613.92126149 -6569.7824655  -7525.7277781   1356.34862757]
------
Step:8, Action:North
State  288
Old Q Values:  [ 3613.92126149 -6569.7824655  -7525.7277781   1356.34862757]
New Q values:  [ 1978.23096118 -6569.7824655  -7525.7277781   1356.34862757]
Reward: -1  Episode Reward:  42
xxxxx
x .gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  492.09541479  1777.54152195   483.97903422 -3385.12952694]
------
Step:9, Action:South
State  208
Old Q Values:  [  492.09541479  1777.54152195   483.97903422 -3385.12952694]
New Q values:  [  492.09541479  1303.88589713   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  41
xxxxx
x . x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1978.23096118 -6569.7824655  -7525.7277781   1356.34862757]
------
Step:10, Action:West
State  288
Old Q Values:  [ 1978.23096118 -6569.7824655  -7525.7277781   1356.34862757]
New Q values:  [ 1978.23096118 -6569.7824655  -7525.7277781  11040.3593128 ]
Reward: -1  Episode Reward:  40
xxxxx
x . x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  -342.0411532  34994.73287258]
------
Step:11, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  -342.0411532  34994.73287258]
New Q values:  [   37.74111519  -168.92307549  -342.0411532  14153.52295657]
Reward: 9  Episode Reward:  49
xxxxx
x . x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[500.76602513 -40.34168621 396.01693338 -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [500.76602513 -40.34168621 396.01693338 -35.88578819]
New Q values:  [603.79351157 -40.34168621 396.01693338 -35.88578819]
Reward: -1  Episode Reward:  48
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812  848.8796495  1346.95700506    0.        ]
------
Step:13, Action:East
State  183
Old Q Values:  [ 390.42113812  848.8796495  1346.95700506    0.        ]
New Q values:  [ 390.42113812  848.8796495  1303.75789729    0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x . x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -8.12857824e+03  2.55191698e+03  1.20371620e+03]
------
Step:14, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.53744617e+03 1.04673438e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.53744617e+03 8.09259520e+02 2.91043938e+03]
Reward: -1  Episode Reward:  46
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  492.09541479  1303.88589713   483.97903422 -3385.12952694]
------
Step:15, Action:South
State  208
Old Q Values:  [  492.09541479  1303.88589713   483.97903422 -3385.12952694]
New Q values:  [  492.09541479  3833.06215269   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  45
xxxxx
x . x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1978.23096118 -6569.7824655  -7525.7277781  11040.3593128 ]
------
Step:16, Action:West
State  288
Old Q Values:  [ 1978.23096118 -6569.7824655  -7525.7277781  11040.3593128 ]
New Q values:  [ 1978.23096118 -6569.7824655  -7525.7277781   8661.60061209]
Reward: -1  Episode Reward:  44
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  -342.0411532  14153.52295657]
------
Step:17, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  -342.0411532  14153.52295657]
New Q values:  [  37.74111519 -168.92307549 -342.0411532  5841.9472361 ]
Reward: -1  Episode Reward:  43
xxxxx
x . x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[603.79351157 -40.34168621 396.01693338 -35.88578819]
------
Step:18, Action:North
State  261
Old Q Values:  [603.79351157 -40.34168621 396.01693338 -35.88578819]
New Q values:  [632.04477381 -40.34168621 396.01693338 -35.88578819]
Reward: -1  Episode Reward:  42
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812  848.8796495  1303.75789729    0.        ]
------
Step:19, Action:East
State  183
Old Q Values:  [ 390.42113812  848.8796495  1303.75789729    0.        ]
New Q values:  [ 390.42113812  848.8796495  1286.47825418    0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x . x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -8.12857824e+03  2.55191698e+03  1.20371620e+03]
------
Step:20, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.53744617e+03 8.09259520e+02 2.91043938e+03]
New Q values:  [3.89777037e-01 2.53744617e+03 1.47302245e+03 2.91043938e+03]
Reward: -1  Episode Reward:  40
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  492.09541479  3833.06215269   483.97903422 -3385.12952694]
------
Step:21, Action:South
State  210
Old Q Values:  [1.85564437e+04 1.90177193e+03 3.18287985e+02 3.52184257e+00]
New Q values:  [1.85564437e+04 3.35858896e+03 3.18287985e+02 3.52184257e+00]
Reward: -1  Episode Reward:  39
xxxxx
x . x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1978.23096118 -6569.7824655  -7525.7277781   8661.60061209]
------
Step:22, Action:North
State  288
Old Q Values:  [ 1978.23096118 -6569.7824655  -7525.7277781   8661.60061209]
New Q values:  [ 1940.61103028 -6569.7824655  -7525.7277781   8661.60061209]
Reward: -1  Episode Reward:  38
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  492.09541479  3833.06215269   483.97903422 -3385.12952694]
------
Step:23, Action:South
State  208
Old Q Values:  [  492.09541479  3833.06215269   483.97903422 -3385.12952694]
New Q values:  [  492.09541479  4131.10504471   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1940.61103028 -6569.7824655  -7525.7277781   8661.60061209]
------
Step:24, Action:West
State  288
Old Q Values:  [ 1940.61103028 -6569.7824655  -7525.7277781   8661.60061209]
New Q values:  [ 1940.61103028 -6569.7824655  -7525.7277781   5216.62441567]
Reward: -1  Episode Reward:  36
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -342.0411532  5841.9472361 ]
------
Step:25, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4352.33551225 -3232.64456823]
New Q values:  [-2527.46239811 -8521.23367799  4352.33551225 22742.08470957]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[80119.14178952  2256.66526474 12394.68064405  1875.31501677]
------
Step:26, Action:North
State  261
Old Q Values:  [632.04477381 -40.34168621 396.01693338 -35.88578819]
New Q values:  [386.48828973 -40.34168621 396.01693338 -35.88578819]
Reward: -1  Episode Reward:  34
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  417.85939784   447.56793403 -5152.9170124   -180.6       ]
------
Step:27, Action:South
State  177
Old Q Values:  [105001.89227881   1950.96757305  67020.65338209      0.        ]
New Q values:  [105001.89227881  24815.52956608  67020.65338209      0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[80119.14178952  2256.66526474 12394.68064405  1875.31501677]
------
Step:28, Action:North
State  261
Old Q Values:  [386.48828973 -40.34168621 396.01693338 -35.88578819]
New Q values:  [288.2656961  -40.34168621 396.01693338 -35.88578819]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  417.85939784   447.56793403 -5152.9170124   -180.6       ]
------
Step:29, Action:South
State  181
Old Q Values:  [  417.85939784   447.56793403 -5152.9170124   -180.6       ]
New Q values:  [  417.85939784   297.23225363 -5152.9170124   -180.6       ]
Reward: -1  Episode Reward:  31
xxxxx
x . x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[288.2656961  -40.34168621 396.01693338 -35.88578819]
------
Step:30, Action:East
State  261
Old Q Values:  [288.2656961  -40.34168621 396.01693338 -35.88578819]
New Q values:  [ 288.2656961   -40.34168621 6980.43218622  -35.88578819]
Reward: -1  Episode Reward:  30
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4352.33551225 22742.08470957]
------
Step:31, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4352.33551225 22742.08470957]
New Q values:  [-2527.46239811 -8521.23367799  4352.33551225 33131.97642068]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[80119.14178952  2256.66526474 12394.68064405  1875.31501677]
------
Step:32, Action:North
State  261
Old Q Values:  [ 288.2656961   -40.34168621 6980.43218622  -35.88578819]
New Q values:  [ 240.06409779  -40.34168621 6980.43218622  -35.88578819]
Reward: -1  Episode Reward:  28
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  417.85939784   297.23225363 -5152.9170124   -180.6       ]
------
Step:33, Action:North
State  181
Old Q Values:  [  417.85939784   297.23225363 -5152.9170124   -180.6       ]
New Q values:  [  522.76225735   297.23225363 -5152.9170124   -180.6       ]
Reward: -1  Episode Reward:  27
xxxxx
xa. x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        1187.39499405    5.4           0.        ]
------
Step:34, Action:South
State  103
Old Q Values:  [-180.6        1187.39499405    5.4           0.        ]
New Q values:  [-180.6         631.18667482    5.4           0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x . x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  522.76225735   297.23225363 -5152.9170124   -180.6       ]
------
Step:35, Action:North
State  181
Old Q Values:  [  522.76225735   297.23225363 -5152.9170124   -180.6       ]
New Q values:  [  397.86090539   297.23225363 -5152.9170124   -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
xa. x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         631.18667482    5.4           0.        ]
------
Step:36, Action:South
State  103
Old Q Values:  [-180.6         631.18667482    5.4           0.        ]
New Q values:  [-180.6         371.23294155    5.4           0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x . x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  397.86090539   297.23225363 -5152.9170124   -180.6       ]
------
Step:37, Action:North
State  180
Old Q Values:  [-2247.02245117   437.03323629  2632.65279418 -4966.32149798]
New Q values:  [ -897.86098047   437.03323629  2632.65279418 -4966.32149798]
Reward: -1  Episode Reward:  23
xxxxx
xa. x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[ -180.6       -2257.0253383     5.16       -180.6      ]
------
Step:38, Action:East
State  99
Old Q Values:  [    0.         54146.33549678 59306.25267112     0.        ]
New Q values:  [     0.          54146.33549678 121314.428556        0.        ]
Reward: 100009  Episode Reward:  100032
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  397.86090539   297.23225363 -5152.9170124   -180.6       ]
------
Step:1, Action:North
State  181
Old Q Values:  [  397.86090539   297.23225363 -5152.9170124   -180.6       ]
New Q values:  [  275.91424462   297.23225363 -5152.9170124   -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         371.23294155    5.4           0.        ]
------
Step:2, Action:South
State  103
Old Q Values:  [-180.6         371.23294155    5.4           0.        ]
New Q values:  [-180.6         533.83665287    5.4           0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812  848.8796495  1286.47825418    0.        ]
------
Step:3, Action:East
State  181
Old Q Values:  [  275.91424462   297.23225363 -5152.9170124   -180.6       ]
New Q values:  [ 275.91424462  297.23225363 -970.99155015 -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
------
Step:4, Action:South
State  199
Old Q Values:  [  22.48535485 1141.9741627   549.89931413  753.62201984]
New Q values:  [  22.48535485 2214.77383591  549.89931413  753.62201984]
Reward: 9  Episode Reward:  16
xxxxx
x ..x
x  .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -342.0411532  5841.9472361 ]
------
Step:5, Action:West
State  277
Old Q Values:  [   1.64433       0.         1346.23826999  970.65412719]
New Q values:  [1.64433000e+00 0.00000000e+00 1.34623827e+03 2.48779131e+03]
Reward: 9  Episode Reward:  25
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 240.06409779  -40.34168621 6980.43218622  -35.88578819]
------
Step:6, Action:East
State  261
Old Q Values:  [ 240.06409779  -40.34168621 6980.43218622  -35.88578819]
New Q values:  [ 240.06409779  -40.34168621 2938.06566051  -35.88578819]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   488.30928674   420.91939858]
------
Step:7, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197   488.30928674   420.91939858]
New Q values:  [   16.82637525 -5807.06396197  1765.7110394    420.91939858]
Reward: 9  Episode Reward:  33
xxxxx
x ..x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1940.61103028 -6569.7824655  -7525.7277781   5216.62441567]
------
Step:8, Action:West
State  288
Old Q Values:  [ 1940.61103028 -6569.7824655  -7525.7277781   5216.62441567]
New Q values:  [ 1940.61103028 -6569.7824655  -7525.7277781   2615.76307809]
Reward: -1  Episode Reward:  32
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1765.7110394    420.91939858]
------
Step:9, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1765.7110394    420.91939858]
New Q values:  [   16.82637525 -5807.06396197  1490.41333918   420.91939858]
Reward: -1  Episode Reward:  31
xxxxx
x g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1940.61103028 -6569.7824655  -7525.7277781   2615.76307809]
------
Step:10, Action:West
State  288
Old Q Values:  [ 1940.61103028 -6569.7824655  -7525.7277781   2615.76307809]
New Q values:  [ 1940.61103028 -6569.7824655  -7525.7277781   2798.28940206]
Reward: -1  Episode Reward:  30
xxxxx
x .gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 -342.0411532  5841.9472361 ]
------
Step:11, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 -342.0411532  5841.9472361 ]
New Q values:  [  37.74111519 -168.92307549 -342.0411532  3217.59859259]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 240.06409779  -40.34168621 2938.06566051  -35.88578819]
------
Step:12, Action:East
State  261
Old Q Values:  [ 240.06409779  -40.34168621 2938.06566051  -35.88578819]
New Q values:  [ 240.06409779  -40.34168621 1621.75026596  -35.88578819]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1490.41333918   420.91939858]
------
Step:13, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 -342.0411532  3217.59859259]
New Q values:  [  37.74111519 -168.92307549  702.07035934 3217.59859259]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1940.61103028 -6569.7824655  -7525.7277781   2798.28940206]
------
Step:14, Action:West
State  288
Old Q Values:  [ 1940.61103028 -6569.7824655  -7525.7277781   2798.28940206]
New Q values:  [ 1940.61103028 -6569.7824655  -7525.7277781   1565.83976258]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1490.41333918   420.91939858]
------
Step:15, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1490.41333918   420.91939858]
New Q values:  [   16.82637525 -5807.06396197  1177.74864476   420.91939858]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1940.61103028 -6569.7824655  -7525.7277781   1565.83976258]
------
Step:16, Action:North
State  288
Old Q Values:  [ 1940.61103028 -6569.7824655  -7525.7277781   1565.83976258]
New Q values:  [ 6348.57751804 -6569.7824655  -7525.7277781   1565.83976258]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.85564437e+04 3.35858896e+03 3.18287985e+02 3.52184257e+00]
------
Step:17, Action:North
State  208
Old Q Values:  [  492.09541479  4131.10504471   483.97903422 -3385.12952694]
New Q values:  [37418.51342704  4131.10504471   483.97903422 -3385.12952694]
Reward: 9  Episode Reward:  43
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   1553.95068069   -180.00807518 124054.2508704 ]
------
Step:18, Action:West
State  128
Old Q Values:  [ 8775.70846068 24033.32294218 -8652.84       42952.38547284]
New Q values:  [  8775.70846068  24033.32294218  -8652.84       106420.98506913]
Reward: 100009  Episode Reward:  100052
xxxxx
xga x
x   x
x   x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.85564437e+04 3.35858896e+03 3.18287985e+02 3.52184257e+00]
------
Step:1, Action:North
State  210
Old Q Values:  [1.85564437e+04 3.35858896e+03 3.18287985e+02 3.52184257e+00]
New Q values:  [7.65205346e+03 3.35858896e+03 3.18287985e+02 3.52184257e+00]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  746.91995892 -180.6          94.3827568 ]
------
Step:2, Action:South
State  138
Old Q Values:  [-139.45925583  746.91995892 -180.6          94.3827568 ]
New Q values:  [-139.45925583 2593.78402224 -180.6          94.3827568 ]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.65205346e+03 3.35858896e+03 3.18287985e+02 3.52184257e+00]
------
Step:3, Action:North
State  208
Old Q Values:  [37418.51342704  4131.10504471   483.97903422 -3385.12952694]
New Q values:  [15744.94057749  4131.10504471   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 2593.78402224 -180.6          94.3827568 ]
------
Step:4, Action:South
State  138
Old Q Values:  [-139.45925583 2593.78402224 -180.6          94.3827568 ]
New Q values:  [-139.45925583 3332.52964757 -180.6          94.3827568 ]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.65205346e+03 3.35858896e+03 3.18287985e+02 3.52184257e+00]
------
Step:5, Action:North
State  208
Old Q Values:  [15744.94057749  4131.10504471   483.97903422 -3385.12952694]
New Q values:  [ 7297.13512526  4131.10504471   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 3332.52964757 -180.6          94.3827568 ]
------
Step:6, Action:South
State  138
Old Q Values:  [-139.45925583 3332.52964757 -180.6          94.3827568 ]
New Q values:  [ -139.45925583 -2478.44760339  -180.6           94.3827568 ]
Reward: -10001  Episode Reward:  -9996
xxxxx
x.. x
x..gx
x. .x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1126.08525554   450.62327432  1909.41710691]
------
Step:1, Action:West
State  193
Old Q Values:  [-5922.26708831  1126.08525554   450.62327432  1909.41710691]
New Q values:  [-5922.26708831  1126.08525554   450.62327432  1099.96026581]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
xa  x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294 1102.64474348 -232.44066224  940.95197235]
------
Step:2, Action:South
State  181
Old Q Values:  [ 275.91424462  297.23225363 -970.99155015 -180.6       ]
New Q values:  [ 275.91424462  610.81798124 -970.99155015 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 240.06409779  -40.34168621 1621.75026596  -35.88578819]
------
Step:3, Action:East
State  261
Old Q Values:  [ 240.06409779  -40.34168621 1621.75026596  -35.88578819]
New Q values:  [ 240.06409779  -40.34168621 1619.37968416  -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x..gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  702.07035934 3217.59859259]
------
Step:4, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1177.74864476   420.91939858]
New Q values:  [   16.82637525 -5807.06396197  1177.74864476   653.58166468]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 240.06409779  -40.34168621 1619.37968416  -35.88578819]
------
Step:5, Action:East
State  261
Old Q Values:  [ 240.06409779  -40.34168621 1619.37968416  -35.88578819]
New Q values:  [  240.06409779   -40.34168621 10586.74479987   -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4352.33551225 33131.97642068]
------
Step:6, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  702.07035934 3217.59859259]
New Q values:  [  37.74111519 -168.92307549  702.07035934 4462.462877  ]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  240.06409779   -40.34168621 10586.74479987   -35.88578819]
------
Step:7, Action:East
State  261
Old Q Values:  [  240.06409779   -40.34168621 10586.74479987   -35.88578819]
New Q values:  [ 240.06409779  -40.34168621 5572.83678305  -35.88578819]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  702.07035934 4462.462877  ]
------
Step:8, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  702.07035934 4462.462877  ]
New Q values:  [  37.74111519 -168.92307549  702.07035934 3456.23618571]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 240.06409779  -40.34168621 5572.83678305  -35.88578819]
------
Step:9, Action:East
State  261
Old Q Values:  [ 240.06409779  -40.34168621 5572.83678305  -35.88578819]
New Q values:  [ 240.06409779  -40.34168621 3265.40556893  -35.88578819]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  702.07035934 3456.23618571]
------
Step:10, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4352.33551225 33131.97642068]
New Q values:  [-2527.46239811 -8521.23367799  4352.33551225 14231.81223895]
Reward: -1  Episode Reward:  20
xxxxx
x...x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 240.06409779  -40.34168621 3265.40556893  -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [ 240.06409779  -40.34168621 3265.40556893  -35.88578819]
New Q values:  [ 481.36911537  -40.34168621 3265.40556893  -35.88578819]
Reward: -1  Episode Reward:  19
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812  848.8796495  1286.47825418    0.        ]
------
Step:12, Action:East
State  183
Old Q Values:  [ 390.42113812  848.8796495  1286.47825418    0.        ]
New Q values:  [ 390.42113812  848.8796495  1279.56639694    0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -8.12857824e+03  2.55191698e+03  1.20371620e+03]
------
Step:13, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -8.12857824e+03  2.55191698e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -8.12857824e+03  3.31578283e+03  1.20371620e+03]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  ax
xg .x
xxxxx
Step:14, Action:East
State  210
Old Q Values:  [7.65205346e+03 3.35858896e+03 3.18287985e+02 3.52184257e+00]
New Q values:  [7.65205346e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
Reward: -301  Episode Reward:  -284
xxxxx
x...x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.65205346e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:15, Action:North
State  210
Old Q Values:  [7.65205346e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [4.02824966e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  -275
xxxxx
x..ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   1553.95068069   -180.00807518 124054.2508704 ]
------
Step:16, Action:West
State  130
Old Q Values:  [ 36041.91667283   1553.95068069   -180.00807518 124054.2508704 ]
New Q values:  [36041.91667283  1553.95068069  -180.00807518 87213.62783571]
Reward: 9  Episode Reward:  -266
xxxxx
x.a x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  36205.85611814 125288.42495849]
------
Step:17, Action:West
State  126
Old Q Values:  [  0.         421.62811728 374.96879939 204.22976196]
New Q values:  [  0.         421.62811728 374.96879939 114.09661187]
Reward: 9  Episode Reward:  -257
xxxxx
xa  x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684    90.01569027  -180.6       ]
------
Step:18, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684    90.01569027  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   161.89471129  -180.6       ]
Reward: -1  Episode Reward:  -258
xxxxx
x a x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         421.62811728 374.96879939 114.09661187]
------
Step:19, Action:South
State  126
Old Q Values:  [  0.         421.62811728 374.96879939 114.09661187]
New Q values:  [  0.         437.4470373  374.96879939 114.09661187]
Reward: -1  Episode Reward:  -259
xxxxx
x   x
x a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         135.48456638 897.98596794   0.        ]
------
Step:20, Action:East
State  204
Old Q Values:  [   0.         3883.17139005 1542.69571291  399.75525955]
New Q values:  [   0.         3883.17139005  814.02974331  399.75525955]
Reward: -1  Episode Reward:  -260
xxxxx
x   x
xg ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  343.40782369   658.50486048 -8489.43729461   531.09593838]
------
Step:21, Action:South
State  208
Old Q Values:  [ 7297.13512526  4131.10504471   483.97903422 -3385.12952694]
New Q values:  [ 7297.13512526 63562.41527329   483.97903422 -3385.12952694]
Reward: 100009  Episode Reward:  99749
xxxxx
x   x
x g x
x  ax
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 5874.35865608 3535.62314734 1101.59744825]
------
Step:1, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -8.12857824e+03  3.31578283e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -4.97648762e+03  3.31578283e+03  1.20371620e+03]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. .x
x.g x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4352.33551225 14231.81223895]
------
Step:1, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  702.07035934 3456.23618571]
New Q values:  [  37.74111519 -168.92307549  702.07035934 2367.51614497]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 481.36911537  -40.34168621 3265.40556893  -35.88578819]
------
Step:2, Action:East
State  261
Old Q Values:  [ 481.36911537  -40.34168621 3265.40556893  -35.88578819]
New Q values:  [ 481.36911537  -40.34168621 2015.81707106  -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  702.07035934 2367.51614497]
------
Step:3, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  702.07035934 2367.51614497]
New Q values:  [  37.74111519 -168.92307549  702.07035934 1551.1515793 ]
Reward: -1  Episode Reward:  7
xxxxx
x. gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 481.36911537  -40.34168621 2015.81707106  -35.88578819]
------
Step:4, Action:East
State  261
Old Q Values:  [ 481.36911537  -40.34168621 2015.81707106  -35.88578819]
New Q values:  [ 481.36911537  -40.34168621 1271.07230222  -35.88578819]
Reward: -1  Episode Reward:  6
xxxxx
x. .x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  702.07035934 1551.1515793 ]
------
Step:5, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1177.74864476   653.58166468]
New Q values:  [   16.82637525 -5807.06396197  1177.74864476   642.15435654]
Reward: -1  Episode Reward:  5
xxxxx
x. .x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 481.36911537  -40.34168621 1271.07230222  -35.88578819]
------
Step:6, Action:East
State  261
Old Q Values:  [ 481.36911537  -40.34168621 1271.07230222  -35.88578819]
New Q values:  [ 481.36911537  -40.34168621 4777.37259257  -35.88578819]
Reward: -1  Episode Reward:  4
xxxxx
x.g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4352.33551225 14231.81223895]
------
Step:7, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  702.07035934 1551.1515793 ]
New Q values:  [  37.74111519 -168.92307549  702.07035934 2053.07240949]
Reward: -1  Episode Reward:  3
xxxxx
x. gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 481.36911537  -40.34168621 4777.37259257  -35.88578819]
------
Step:8, Action:East
State  261
Old Q Values:  [ 481.36911537  -40.34168621 4777.37259257  -35.88578819]
New Q values:  [ 481.36911537  -40.34168621 2526.27075988  -35.88578819]
Reward: -1  Episode Reward:  2
xxxxx
x. .x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  702.07035934 2053.07240949]
------
Step:9, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  702.07035934 2053.07240949]
New Q values:  [  37.74111519 -168.92307549  702.07035934 1578.51019176]
Reward: -1  Episode Reward:  1
xxxxx
x. gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 481.36911537  -40.34168621 2526.27075988  -35.88578819]
------
Step:10, Action:East
State  261
Old Q Values:  [ 481.36911537  -40.34168621 2526.27075988  -35.88578819]
New Q values:  [ 481.36911537  -40.34168621 1483.46136148  -35.88578819]
Reward: -1  Episode Reward:  0
xxxxx
x. .x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  702.07035934 1578.51019176]
------
Step:11, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  702.07035934 1578.51019176]
New Q values:  [  37.74111519 -168.92307549  702.07035934 1075.84248515]
Reward: -1  Episode Reward:  -1
xxxxx
x. .x
x...x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 481.36911537  -40.34168621 1483.46136148  -35.88578819]
------
Step:12, Action:East
State  261
Old Q Values:  [ 481.36911537  -40.34168621 1483.46136148  -35.88578819]
New Q values:  [  481.36911537   -40.34168621 -1137.67178372   -35.88578819]
Reward: -10001  Episode Reward:  -10002
xxxxx
x. .x
x...x
x g.x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 5874.35865608 3535.62314734 1101.59744825]
------
Step:1, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -4.97648762e+03  3.31578283e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  3.31578283e+03  1.20371620e+03]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. .x
x.g x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 275.91424462  610.81798124 -970.99155015 -180.6       ]
------
Step:1, Action:South
State  183
Old Q Values:  [ 390.42113812  848.8796495  1279.56639694    0.        ]
New Q values:  [ 390.42113812  489.36259441 1279.56639694    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  481.36911537   -40.34168621 -1137.67178372   -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [  481.36911537   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [  375.19304052   -40.34168621 -1137.67178372   -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 275.91424462  610.81798124 -970.99155015 -180.6       ]
------
Step:3, Action:South
State  183
Old Q Values:  [ 390.42113812  489.36259441 1279.56639694    0.        ]
New Q values:  [ 390.42113812  307.70294992 1279.56639694    0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  375.19304052   -40.34168621 -1137.67178372   -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [  375.19304052   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [  533.34713529   -40.34168621 -1137.67178372   -35.88578819]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xa. x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812  307.70294992 1279.56639694    0.        ]
------
Step:5, Action:East
State  181
Old Q Values:  [ 275.91424462  610.81798124 -970.99155015 -180.6       ]
New Q values:  [  275.91424462   610.81798124 -5509.864806    -180.6       ]
Reward: -9991  Episode Reward:  -9985
xxxxx
x...x
x g x
x ..x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.02824966e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:1, Action:North
State  210
Old Q Values:  [4.02824966e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1.61467135e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 -2478.44760339  -180.6           94.3827568 ]
------
Step:2, Action:West
State  138
Old Q Values:  [ -139.45925583 -2478.44760339  -180.6           94.3827568 ]
New Q values:  [ -139.45925583 -2478.44760339  -180.6          108.38595824]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   217.44285172    49.79232781]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    203.37134741   129.87416332]
New Q values:  [ -281.736      -9545.4473624    113.26432643   129.87416332]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 -2478.44760339  -180.6          108.38595824]
------
Step:4, Action:West
State  138
Old Q Values:  [ -139.45925583 -2478.44760339  -180.6          108.38595824]
New Q values:  [ -139.45925583 -2478.44760339  -180.6           81.71663229]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    113.26432643   129.87416332]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624    113.26432643   129.87416332]
New Q values:  [ -281.736      -9545.4473624    113.26432643   218.4389335 ]
Reward: 9  Episode Reward:  25
xxxxx
xa  x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516  536.96422723 -252.78192178]
------
Step:6, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   161.89471129  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   129.68956457  -180.6       ]
Reward: -1  Episode Reward:  24
xxxxx
x a x
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    113.26432643   218.4389335 ]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624    113.26432643   218.4389335 ]
New Q values:  [ -281.736      -9545.4473624    113.26432643   247.86484157]
Reward: -1  Episode Reward:  23
xxxxx
xa  x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516  536.96422723 -252.78192178]
------
Step:8, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516  536.96422723 -252.78192178]
New Q values:  [-252.35169558    7.11267516  279.41854641 -252.78192178]
Reward: -1  Episode Reward:  22
xxxxx
x a x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   217.44285172    49.79232781]
------
Step:9, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   217.44285172    49.79232781]
New Q values:  [ -253.44886264 -1902.20915811   110.89213037    49.79232781]
Reward: -1  Episode Reward:  21
xxxxx
x  ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 -2478.44760339  -180.6           81.71663229]
------
Step:10, Action:West
State  138
Old Q Values:  [ -139.45925583 -2478.44760339  -180.6           81.71663229]
New Q values:  [ -139.45925583 -2478.44760339  -180.6          106.44610539]
Reward: -1  Episode Reward:  20
xxxxx
x a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    113.26432643   247.86484157]
------
Step:11, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   110.89213037    49.79232781]
New Q values:  [ -253.44886264 -1902.20915811   110.89213037   103.14249505]
Reward: -1  Episode Reward:  19
xxxxx
xa  x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516  279.41854641 -252.78192178]
------
Step:12, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516  279.41854641 -252.78192178]
New Q values:  [-252.35169558    7.11267516  185.52687103 -252.78192178]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    113.26432643   247.86484157]
------
Step:13, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   110.89213037   103.14249505]
New Q values:  [ -253.44886264 -1902.20915811   110.89213037    96.31505933]
Reward: -1  Episode Reward:  17
xxxxx
xa  x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516  185.52687103 -252.78192178]
------
Step:14, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516  185.52687103 -252.78192178]
New Q values:  [-252.35169558    7.11267516  147.97020088 -252.78192178]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    113.26432643   247.86484157]
------
Step:15, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624    113.26432643   247.86484157]
New Q values:  [ -281.736      -9545.4473624    113.26432643   142.93699689]
Reward: -1  Episode Reward:  15
xxxxx
xa  x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516  147.97020088 -252.78192178]
------
Step:16, Action:East
State  105
Old Q Values:  [ -180.6            6.72320144 -5974.07473851     0.        ]
New Q values:  [-1.80600000e+02  6.72320144e+00 -8.24883589e+03  0.00000000e+00]
Reward: -10001  Episode Reward:  -9986
xxxxx
x g x
x.. x
x. .x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1126.08525554   450.62327432  1099.96026581]
------
Step:1, Action:South
State  195
Old Q Values:  [  38.85388605 5874.35865608 3535.62314734 1101.59744825]
New Q values:  [  38.85388605 2677.89620798 3535.62314734 1101.59744825]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  702.07035934 1075.84248515]
------
Step:2, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  702.07035934 1075.84248515]
New Q values:  [  37.74111519 -168.92307549  702.07035934  595.74113465]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  533.34713529   -40.34168621 -1137.67178372   -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [  533.34713529   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [31719.30653776   -40.34168621 -1137.67178372   -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x..gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[105001.89227881  24815.52956608  67020.65338209      0.        ]
------
Step:4, Action:North
State  181
Old Q Values:  [  275.91424462   610.81798124 -5509.864806    -180.6       ]
New Q values:  [  120.21502113   610.81798124 -5509.864806    -180.6       ]
Reward: 9  Episode Reward:  36
xxxxx
xag.x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[ 0.         14.83107761  0.          0.        ]
------
Step:5, Action:South
State  103
Old Q Values:  [-180.6         533.83665287    5.4           0.        ]
New Q values:  [-180.6         396.18005552    5.4           0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  120.21502113   610.81798124 -5509.864806    -180.6       ]
------
Step:6, Action:South
State  183
Old Q Values:  [ 390.42113812  307.70294992 1279.56639694    0.        ]
New Q values:  [ 390.42113812 9638.2731413  1279.56639694    0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[31719.30653776   -40.34168621 -1137.67178372   -35.88578819]
------
Step:7, Action:North
State  260
Old Q Values:  [  366.04155557 -8695.4397473   1670.47737734 -2601.74710518]
New Q values:  [ 1236.3219607  -8695.4397473   1670.47737734 -2601.74710518]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3635.01779491     0.        ]
------
Step:8, Action:East
State  183
Old Q Values:  [ 390.42113812 9638.2731413  1279.56639694    0.        ]
New Q values:  [ 390.42113812 9638.2731413  1505.96140848    0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  3.31578283e+03  1.20371620e+03]
------
Step:9, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  3.31578283e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  6.16972718e+03  1.20371620e+03]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.61467135e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:10, Action:North
State  210
Old Q Values:  [1.61467135e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [3.26281737e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  40
xxxxx
x .ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  1553.95068069  -180.00807518 87213.62783571]
------
Step:11, Action:West
State  138
Old Q Values:  [ -139.45925583 -2478.44760339  -180.6          106.44610539]
New Q values:  [ -139.45925583 -2478.44760339  -180.6          179.21255334]
Reward: 9  Episode Reward:  49
xxxxx
x a x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         437.4470373  374.96879939 114.09661187]
------
Step:12, Action:South
State  124
Old Q Values:  [   0.            5.4        1684.46356666  341.12160345]
New Q values:  [   0.         1166.51141701 1684.46356666  341.12160345]
Reward: -1  Episode Reward:  48
xxxxx
xg  x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         3883.17139005  814.02974331  399.75525955]
------
Step:13, Action:South
State  204
Old Q Values:  [   0.         3883.17139005  814.02974331  399.75525955]
New Q values:  [   0.         1905.99314945  814.02974331  399.75525955]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xg  x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1177.74864476   642.15435654]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4352.33551225 14231.81223895]
New Q values:  [-2527.46239811 -8521.23367799 63650.90746031 14231.81223895]
Reward: 100009  Episode Reward:  100056
xxxxx
x   x
x g x
x  ax
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  6.16972718e+03  1.20371620e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.53744617e+03 1.47302245e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.53744617e+03 1.96633336e+04 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7297.13512526 63562.41527329   483.97903422 -3385.12952694]
------
Step:2, Action:South
State  208
Old Q Values:  [ 7297.13512526 63562.41527329   483.97903422 -3385.12952694]
New Q values:  [ 7297.13512526 27334.93936473   483.97903422 -3385.12952694]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6348.57751804 -6569.7824655  -7525.7277781   1565.83976258]
------
Step:3, Action:North
State  288
Old Q Values:  [ 6348.57751804 -6569.7824655  -7525.7277781   1565.83976258]
New Q values:  [12327.28313068 -6569.7824655  -7525.7277781   1565.83976258]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x. ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.26281737e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:4, Action:North
State  208
Old Q Values:  [ 7297.13512526 27334.93936473   483.97903422 -3385.12952694]
New Q values:  [29088.34240082 27334.93936473   483.97903422 -3385.12952694]
Reward: 9  Episode Reward:  26
xxxxx
x..ax
x.g x
x . x
xxxxx
Step:5, Action:South
State  130
Old Q Values:  [36041.91667283  1553.95068069  -180.00807518 87213.62783571]
New Q values:  [36041.91667283  9347.48299252  -180.00807518 87213.62783571]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xg ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[29088.34240082 27334.93936473   483.97903422 -3385.12952694]
------
Step:6, Action:North
State  208
Old Q Values:  [29088.34240082 27334.93936473   483.97903422 -3385.12952694]
New Q values:  [11735.26661937 27334.93936473   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  24
xxxxx
xg.ax
x.  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   335.09886348 -6245.61866138  -353.26197669]
------
Step:7, Action:South
State  130
Old Q Values:  [36041.91667283  9347.48299252  -180.00807518 87213.62783571]
New Q values:  [36041.91667283 11938.87500643  -180.00807518 87213.62783571]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
xg ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11735.26661937 27334.93936473   483.97903422 -3385.12952694]
------
Step:8, Action:South
State  208
Old Q Values:  [11735.26661937 27334.93936473   483.97903422 -3385.12952694]
New Q values:  [11735.26661937 14631.5606851    483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  22
xxxxx
xg. x
x.  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12327.28313068 -6569.7824655  -7525.7277781   1565.83976258]
------
Step:9, Action:North
State  288
Old Q Values:  [12327.28313068 -6569.7824655  -7525.7277781   1565.83976258]
New Q values:  [ 9319.7814578  -6569.7824655  -7525.7277781   1565.83976258]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
xg ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11735.26661937 14631.5606851    483.97903422 -3385.12952694]
------
Step:10, Action:South
State  208
Old Q Values:  [11735.26661937 14631.5606851    483.97903422 -3385.12952694]
New Q values:  [11735.26661937  8647.95871138   483.97903422 -3385.12952694]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
x.g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9319.7814578  -6569.7824655  -7525.7277781   1565.83976258]
------
Step:11, Action:North
State  288
Old Q Values:  [ 9319.7814578  -6569.7824655  -7525.7277781   1565.83976258]
New Q values:  [13515.76470659 -6569.7824655  -7525.7277781   1565.83976258]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
x. ax
x g x
xxxxx
Step:12, Action:West
State  208
Old Q Values:  [11735.26661937  8647.95871138   483.97903422 -3385.12952694]
New Q values:  [11735.26661937  8647.95871138   483.97903422 -1455.65174173]
Reward: -10001  Episode Reward:  -9982
xxxxx
x.. x
x.g x
x . x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1126.08525554   450.62327432  1099.96026581]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.53744617e+03 1.96633336e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 2.01156507e+04 1.96633336e+04 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 63650.90746031 14231.81223895]
------
Step:2, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  702.07035934  595.74113465]
New Q values:  [  37.74111519 -168.92307549 4340.95755571  595.74113465]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13515.76470659 -6569.7824655  -7525.7277781   1565.83976258]
------
Step:3, Action:North
State  288
Old Q Values:  [13515.76470659 -6569.7824655  -7525.7277781   1565.83976258]
New Q values:  [ 8932.28586845 -6569.7824655  -7525.7277781   1565.83976258]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11735.26661937  8647.95871138   483.97903422 -1455.65174173]
------
Step:4, Action:North
State  208
Old Q Values:  [11735.26661937  8647.95871138   483.97903422 -1455.65174173]
New Q values:  [30857.59499846  8647.95871138   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  26
xxxxx
x..ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283 11938.87500643  -180.00807518 87213.62783571]
------
Step:5, Action:West
State  138
Old Q Values:  [ -139.45925583 -2478.44760339  -180.6          179.21255334]
New Q values:  [ -139.45925583 -2478.44760339  -180.6          110.35266045]
Reward: 9  Episode Reward:  35
xxxxx
x.a x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   110.89213037    96.31505933]
------
Step:6, Action:East
State  121
Old Q Values:  [    0.             0.         -8430.28109083    76.59116932]
New Q values:  [    0.             0.         -9272.18277729    76.59116932]
Reward: -10001  Episode Reward:  -9966
xxxxx
x. gx
x.  x
x.  x
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    113.26432643   142.93699689]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624    113.26432643   142.93699689]
New Q values:  [ -281.736      -9545.4473624    113.26432643   101.48166813]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   129.68956457  -180.6       ]
------
Step:2, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   129.68956457  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684    85.25512376  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    113.26432643   101.48166813]
------
Step:3, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   471.31335839  -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225   294.4550024   -841.67946252]
Reward: 9  Episode Reward:  17
xxxxx
xg ax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   335.09886348 -6245.61866138  -353.26197669]
------
Step:4, Action:South
State  138
Old Q Values:  [ -139.45925583 -2478.44760339  -180.6          110.35266045]
New Q values:  [-139.45925583 -788.42758321 -180.6         110.35266045]
Reward: 9  Episode Reward:  26
xxxxx
x   x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  343.40782369   658.50486048 -8489.43729461   531.09593838]
------
Step:5, Action:South
State  216
Old Q Values:  [  343.40782369   658.50486048 -8489.43729461   531.09593838]
New Q values:  [  343.40782369  2948.48770473 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8932.28586845 -6569.7824655  -7525.7277781   1565.83976258]
------
Step:6, Action:North
State  288
Old Q Values:  [ 8932.28586845 -6569.7824655  -7525.7277781   1565.83976258]
New Q values:  [ 4456.8606588  -6569.7824655  -7525.7277781   1565.83976258]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  343.40782369  2948.48770473 -8489.43729461   531.09593838]
------
Step:7, Action:South
State  216
Old Q Values:  [  343.40782369  2948.48770473 -8489.43729461   531.09593838]
New Q values:  [  343.40782369  2515.85327953 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  33
xxxxx
xg  x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4456.8606588  -6569.7824655  -7525.7277781   1565.83976258]
------
Step:8, Action:North
State  288
Old Q Values:  [ 4456.8606588  -6569.7824655  -7525.7277781   1565.83976258]
New Q values:  [ 2536.90024738 -6569.7824655  -7525.7277781   1565.83976258]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  343.40782369  2515.85327953 -8489.43729461   531.09593838]
------
Step:9, Action:South
State  216
Old Q Values:  [  343.40782369  2515.85327953 -8489.43729461   531.09593838]
New Q values:  [  343.40782369  1766.81138603 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  31
xxxxx
xg  x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2536.90024738 -6569.7824655  -7525.7277781   1565.83976258]
------
Step:10, Action:North
State  288
Old Q Values:  [ 2536.90024738 -6569.7824655  -7525.7277781   1565.83976258]
New Q values:  [ 1544.20351476 -6569.7824655  -7525.7277781   1565.83976258]
Reward: -1  Episode Reward:  30
xxxxx
x g x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  343.40782369  1766.81138603 -8489.43729461   531.09593838]
------
Step:11, Action:South
State  216
Old Q Values:  [  343.40782369  1766.81138603 -8489.43729461   531.09593838]
New Q values:  [  343.40782369  1175.87648318 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  29
xxxxx
xg  x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1544.20351476 -6569.7824655  -7525.7277781   1565.83976258]
------
Step:12, Action:West
State  288
Old Q Values:  [ 1544.20351476 -6569.7824655  -7525.7277781   1565.83976258]
New Q values:  [ 1544.20351476 -6569.7824655  -7525.7277781  19727.00814313]
Reward: 9  Episode Reward:  38
xxxxx
x   x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 63650.90746031 14231.81223895]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 63650.90746031 14231.81223895]
New Q values:  [-2527.46239811 -8521.23367799 31377.86542706 14231.81223895]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1544.20351476 -6569.7824655  -7525.7277781  19727.00814313]
------
Step:14, Action:West
State  288
Old Q Values:  [ 1544.20351476 -6569.7824655  -7525.7277781  19727.00814313]
New Q values:  [ 1544.20351476 -6569.7824655  -7525.7277781  17303.56288537]
Reward: -1  Episode Reward:  36
xxxxx
x g x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 31377.86542706 14231.81223895]
------
Step:15, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 31377.86542706 14231.81223895]
New Q values:  [-2527.46239811 -8521.23367799 17741.61503644 14231.81223895]
Reward: -1  Episode Reward:  35
xxxxx
xg  x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1544.20351476 -6569.7824655  -7525.7277781  17303.56288537]
------
Step:16, Action:West
State  288
Old Q Values:  [ 1544.20351476 -6569.7824655  -7525.7277781  17303.56288537]
New Q values:  [ 1544.20351476 -6569.7824655  -7525.7277781  12243.30966508]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 17741.61503644 14231.81223895]
------
Step:17, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 17741.61503644 14231.81223895]
New Q values:  [-2527.46239811 -8521.23367799 10769.0389141  14231.81223895]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1544.20351476 -6569.7824655  -7525.7277781  12243.30966508]
------
Step:18, Action:West
State  288
Old Q Values:  [ 1544.20351476 -6569.7824655  -7525.7277781  12243.30966508]
New Q values:  [ 1544.20351476 -6569.7824655  -7525.7277781   6199.01113274]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x .gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4340.95755571  595.74113465]
------
Step:19, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 4340.95755571  595.74113465]
New Q values:  [  37.74111519 -168.92307549 3595.48636211  595.74113465]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1544.20351476 -6569.7824655  -7525.7277781   6199.01113274]
------
Step:20, Action:West
State  288
Old Q Values:  [ 1544.20351476 -6569.7824655  -7525.7277781   6199.01113274]
New Q values:  [ 1544.20351476 -6569.7824655  -7525.7277781   6748.54812478]
Reward: -1  Episode Reward:  30
xxxxx
x g x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 10769.0389141  14231.81223895]
------
Step:21, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10769.0389141  14231.81223895]
New Q values:  [-2527.46239811 -8521.23367799 10769.0389141  29733.86743244]
Reward: 9  Episode Reward:  39
xxxxx
x   x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[80119.14178952  2256.66526474 12394.68064405  1875.31501677]
------
Step:22, Action:North
State  261
Old Q Values:  [31719.30653776   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [12870.36800947   -40.34168621 -1137.67178372   -35.88578819]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  120.21502113   610.81798124 -5509.864806    -180.6       ]
------
Step:23, Action:South
State  177
Old Q Values:  [105001.89227881  24815.52956608  67020.65338209      0.        ]
New Q values:  [105001.89227881  33961.35436329  67020.65338209      0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[80119.14178952  2256.66526474 12394.68064405  1875.31501677]
------
Step:24, Action:North
State  261
Old Q Values:  [12870.36800947   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [ 5330.79259816   -40.34168621 -1137.67178372   -35.88578819]
Reward: -1  Episode Reward:  36
xxxxx
x g x
xa. x
x   x
xxxxx
Step:25, Action:North
State  177
Old Q Values:  [105001.89227881  33961.35436329  67020.65338209      0.        ]
New Q values:  [78394.48547832 33961.35436329 67020.65338209     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
xa  x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SE
[     0.          54146.33549678 121314.428556        0.        ]
------
Step:26, Action:East
State  111
Old Q Values:  [-177.44732869  516.83113546  316.44394556 -120.29354603]
New Q values:  [-177.44732869  516.83113546  159.95687615 -120.29354603]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    113.26432643   101.48166813]
------
Step:27, Action:East
State  114
Old Q Values:  [  -180.6          4272.38349051  36205.85611814 125288.42495849]
New Q values:  [  -180.6          4272.38349051  40645.83079797 125288.42495849]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283 11938.87500643  -180.00807518 87213.62783571]
------
Step:28, Action:West
State  138
Old Q Values:  [-139.45925583 -788.42758321 -180.6         110.35266045]
New Q values:  [-139.45925583 -788.42758321 -180.6          77.52036211]
Reward: -1  Episode Reward:  32
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    113.26432643   101.48166813]
------
Step:29, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    113.26432643   101.48166813]
New Q values:  [ -281.736      -9545.4473624     67.96183921   101.48166813]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 -788.42758321 -180.6          77.52036211]
------
Step:30, Action:West
State  138
Old Q Values:  [-139.45925583 -788.42758321 -180.6          77.52036211]
New Q values:  [-139.45925583 -788.42758321 -180.6          60.85264528]
Reward: -1  Episode Reward:  30
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     67.96183921   101.48166813]
------
Step:31, Action:West
State  114
Old Q Values:  [  -180.6          4272.38349051  40645.83079797 125288.42495849]
New Q values:  [ -180.6         4272.38349051 40645.83079797 86509.0985502 ]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SE
[     0.          54146.33549678 121314.428556        0.        ]
------
Step:32, Action:East
State  111
Old Q Values:  [-177.44732869  516.83113546  159.95687615 -120.29354603]
New Q values:  [-177.44732869  516.83113546   96.65038957 -120.29354603]
Reward: -1  Episode Reward:  28
xxxxx
x a x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   110.89213037    96.31505933]
------
Step:33, Action:East
State  114
Old Q Values:  [ -180.6         4272.38349051 40645.83079797 86509.0985502 ]
New Q values:  [ -180.6         4272.38349051 42421.8206699  86509.0985502 ]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283 11938.87500643  -180.00807518 87213.62783571]
------
Step:34, Action:West
State  138
Old Q Values:  [-139.45925583 -788.42758321 -180.6          60.85264528]
New Q values:  [-139.45925583 -788.42758321 -180.6          57.00869722]
Reward: -1  Episode Reward:  26
xxxxx
x a x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   110.89213037    96.31505933]
------
Step:35, Action:East
State  114
Old Q Values:  [ -180.6         4272.38349051 42421.8206699  86509.0985502 ]
New Q values:  [ -180.6         4272.38349051 43132.21661867 86509.0985502 ]
Reward: -1  Episode Reward:  25
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283 11938.87500643  -180.00807518 87213.62783571]
------
Step:36, Action:West
State  136
Old Q Values:  [-5281.21195651   335.09886348 -6245.61866138  -353.26197669]
New Q values:  [-5281.21195651   335.09886348 -6245.61866138 -6053.56828995]
Reward: -10001  Episode Reward:  -9976
xxxxx
x g x
x . x
x   x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   110.89213037    96.31505933]
------
Step:1, Action:East
State  121
Old Q Values:  [    0.             0.         -9272.18277729    76.59116932]
New Q values:  [    0.             0.         -9602.94345187    76.59116932]
Reward: -9991  Episode Reward:  -9991
xxxxx
x. gx
x.. x
x...x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 10769.0389141  29733.86743244]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10769.0389141  29733.86743244]
New Q values:  [-2527.46239811 -8521.23367799 10769.0389141   6400.09018618]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x ..x
xg .x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1126.08525554   450.62327432  1099.96026581]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.01156507e+04 1.96633336e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.12823720e+04 1.96633336e+04 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 10769.0389141   6400.09018618]
------
Step:2, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 3595.48636211  595.74113465]
New Q values:  [  37.74111519 -168.92307549 3468.15898228  595.74113465]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1544.20351476 -6569.7824655  -7525.7277781   6748.54812478]
------
Step:3, Action:West
State  288
Old Q Values:  [ 1544.20351476 -6569.7824655  -7525.7277781   6748.54812478]
New Q values:  [ 1544.20351476 -6569.7824655  -7525.7277781   3739.2669446 ]
Reward: -1  Episode Reward:  17
xxxxx
x..gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 3468.15898228  595.74113465]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10769.0389141   6400.09018618]
New Q values:  [-2527.46239811 -8521.23367799  5428.79564902  6400.09018618]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1544.20351476 -6569.7824655  -7525.7277781   3739.2669446 ]
------
Step:5, Action:West
State  288
Old Q Values:  [ 1544.20351476 -6569.7824655  -7525.7277781   3739.2669446 ]
New Q values:  [ 1544.20351476 -6569.7824655  -7525.7277781   3415.13383369]
Reward: -1  Episode Reward:  15
xxxxx
xg..x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5428.79564902  6400.09018618]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5428.79564902  6400.09018618]
New Q values:  [-2527.46239811 -8521.23367799  5428.79564902  3066.57928767]
Reward: 9  Episode Reward:  24
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1236.3219607  -8695.4397473   1670.47737734 -2601.74710518]
------
Step:7, Action:East
State  257
Old Q Values:  [80119.14178952  2256.66526474 12394.68064405  1875.31501677]
New Q values:  [80119.14178952  2256.66526474  6585.91095232  1875.31501677]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5428.79564902  3066.57928767]
------
Step:8, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 3468.15898228  595.74113465]
New Q values:  [  37.74111519 -168.92307549 2411.20374302  595.74113465]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x. gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1544.20351476 -6569.7824655  -7525.7277781   3415.13383369]
------
Step:9, Action:West
State  288
Old Q Values:  [ 1544.20351476 -6569.7824655  -7525.7277781   3415.13383369]
New Q values:  [ 1544.20351476 -6569.7824655  -7525.7277781   2088.81465638]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x.  x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2411.20374302  595.74113465]
------
Step:10, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2411.20374302  595.74113465]
New Q values:  [   37.74111519  -168.92307549  2411.20374302 24273.43899072]
Reward: -1  Episode Reward:  20
xxxxx
x...x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[80119.14178952  2256.66526474  6585.91095232  1875.31501677]
------
Step:11, Action:North
State  261
Old Q Values:  [ 5330.79259816   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [ 2320.96243364   -40.34168621 -1137.67178372   -35.88578819]
Reward: 9  Episode Reward:  29
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  120.21502113   610.81798124 -5509.864806    -180.6       ]
------
Step:12, Action:South
State  177
Old Q Values:  [78394.48547832 33961.35436329 67020.65338209     0.        ]
New Q values:  [78394.48547832 37619.68428217 67020.65338209     0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[80119.14178952  2256.66526474  6585.91095232  1875.31501677]
------
Step:13, Action:North
State  261
Old Q Values:  [ 2320.96243364   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [ 1111.03036783   -40.34168621 -1137.67178372   -35.88578819]
Reward: -1  Episode Reward:  27
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  120.21502113   610.81798124 -5509.864806    -180.6       ]
------
Step:14, Action:South
State  183
Old Q Values:  [ 390.42113812 9638.2731413  1505.96140848    0.        ]
New Q values:  [ 390.42113812 4188.01836687 1505.96140848    0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1111.03036783   -40.34168621 -1137.67178372   -35.88578819]
------
Step:15, Action:North
State  260
Old Q Values:  [ 1236.3219607  -8695.4397473   1670.47737734 -2601.74710518]
New Q values:  [ 1584.43412275 -8695.4397473   1670.47737734 -2601.74710518]
Reward: -1  Episode Reward:  25
xxxxx
x...x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3635.01779491     0.        ]
------
Step:16, Action:East
State  183
Old Q Values:  [ 390.42113812 4188.01836687 1505.96140848    0.        ]
New Q values:  [ 390.42113812 4188.01836687 2452.70271697    0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  6.16972718e+03  1.20371620e+03]
------
Step:17, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.12823720e+04 1.96633336e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.12823720e+04 1.71220119e+04 2.91043938e+03]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30857.59499846  8647.95871138   483.97903422 -1455.65174173]
------
Step:18, Action:North
State  208
Old Q Values:  [30857.59499846  8647.95871138   483.97903422 -1455.65174173]
New Q values:  [38512.5263501   8647.95871138   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  32
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283 11938.87500643  -180.00807518 87213.62783571]
------
Step:19, Action:West
State  130
Old Q Values:  [36041.91667283 11938.87500643  -180.00807518 87213.62783571]
New Q values:  [36041.91667283 11938.87500643  -180.00807518 60843.58069934]
Reward: 9  Episode Reward:  41
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         4272.38349051 43132.21661867 86509.0985502 ]
------
Step:20, Action:West
State  112
Old Q Values:  [    0.         11059.61439394  6789.02994987 97448.7696    ]
New Q values:  [    0.         11059.61439394  6789.02994987 98984.90784   ]
Reward: 100009  Episode Reward:  100050
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
x.a.x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     67.96183921   101.48166813]
------
Step:1, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   110.89213037    96.31505933]
New Q values:  [ -253.44886264 -1902.20915811   110.89213037    88.317084  ]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516  147.97020088 -252.78192178]
------
Step:2, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516  147.97020088 -252.78192178]
New Q values:  [-252.35169558    7.11267516   91.85571947 -252.78192178]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   110.89213037    88.317084  ]
------
Step:3, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   110.89213037    88.317084  ]
New Q values:  [ -253.44886264 -1902.20915811    66.85946132    88.317084  ]
Reward: 9  Episode Reward:  17
xxxxx
x  ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 -788.42758321 -180.6          57.00869722]
------
Step:4, Action:West
State  136
Old Q Values:  [-5281.21195651   335.09886348 -6245.61866138 -6053.56828995]
New Q values:  [-5281.21195651   335.09886348 -6245.61866138 -2399.04996519]
Reward: -1  Episode Reward:  16
xxxxx
x agx
x...x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9602.94345187    76.59116932]
------
Step:5, Action:West
State  121
Old Q Values:  [    0.             0.         -9602.94345187    76.59116932]
New Q values:  [    0.             0.         -9602.94345187   187.25031635]
Reward: -1  Episode Reward:  15
xxxxx
xa gx
x...x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  524.04616209   31.9495824  -180.6       ]
------
Step:6, Action:South
State  109
Old Q Values:  [-241.10880094  524.04616209   31.9495824  -180.6       ]
New Q values:  [-241.10880094  398.26385921   31.9495824  -180.6       ]
Reward: 9  Episode Reward:  24
xxxxx
x g x
xa..x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  120.21502113   610.81798124 -5509.864806    -180.6       ]
------
Step:7, Action:South
State  181
Old Q Values:  [  120.21502113   610.81798124 -5509.864806    -180.6       ]
New Q values:  [  120.21502113   583.03630284 -5509.864806    -180.6       ]
Reward: 9  Episode Reward:  33
xxxxx
x   x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1111.03036783   -40.34168621 -1137.67178372   -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [ 1111.03036783   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [ 1700.21765719   -40.34168621 -1137.67178372   -35.88578819]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812 4188.01836687 2452.70271697    0.        ]
------
Step:9, Action:South
State  183
Old Q Values:  [ 390.42113812 4188.01836687 2452.70271697    0.        ]
New Q values:  [ 390.42113812 2184.6726439  2452.70271697    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x ..x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1700.21765719   -40.34168621 -1137.67178372   -35.88578819]
------
Step:10, Action:North
State  261
Old Q Values:  [ 1700.21765719   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [  854.39795373   -40.34168621 -1137.67178372   -35.88578819]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  120.21502113   583.03630284 -5509.864806    -180.6       ]
------
Step:11, Action:South
State  181
Old Q Values:  [  120.21502113   583.03630284 -5509.864806    -180.6       ]
New Q values:  [  120.21502113   488.93390726 -5509.864806    -180.6       ]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  854.39795373   -40.34168621 -1137.67178372   -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [  854.39795373   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [  487.83935367   -40.34168621 -1137.67178372   -35.88578819]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  120.21502113   488.93390726 -5509.864806    -180.6       ]
------
Step:13, Action:South
State  181
Old Q Values:  [  120.21502113   488.93390726 -5509.864806    -180.6       ]
New Q values:  [  120.21502113   341.325369   -5509.864806    -180.6       ]
Reward: -1  Episode Reward:  27
xxxxx
x  gx
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  487.83935367   -40.34168621 -1137.67178372   -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [  487.83935367   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [  296.93335217   -40.34168621 -1137.67178372   -35.88578819]
Reward: -1  Episode Reward:  26
xxxxx
x g x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  120.21502113   341.325369   -5509.864806    -180.6       ]
------
Step:15, Action:South
State  181
Old Q Values:  [  120.21502113   341.325369   -5509.864806    -180.6       ]
New Q values:  [  120.21502113   225.01015325 -5509.864806    -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  296.93335217   -40.34168621 -1137.67178372   -35.88578819]
------
Step:16, Action:North
State  261
Old Q Values:  [  296.93335217   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [  185.67638684   -40.34168621 -1137.67178372   -35.88578819]
Reward: -1  Episode Reward:  24
xxxxx
x g x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  120.21502113   225.01015325 -5509.864806    -180.6       ]
------
Step:17, Action:South
State  181
Old Q Values:  [  120.21502113   225.01015325 -5509.864806    -180.6       ]
New Q values:  [  120.21502113   145.10697735 -5509.864806    -180.6       ]
Reward: -1  Episode Reward:  23
xxxxx
x  gx
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  185.67638684   -40.34168621 -1137.67178372   -35.88578819]
------
Step:18, Action:North
State  261
Old Q Values:  [  185.67638684   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [  117.20264794   -40.34168621 -1137.67178372   -35.88578819]
Reward: -1  Episode Reward:  22
xxxxx
x g x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  120.21502113   145.10697735 -5509.864806    -180.6       ]
------
Step:19, Action:South
State  180
Old Q Values:  [ -897.86098047   437.03323629  2632.65279418 -4966.32149798]
New Q values:  [ -897.86098047   675.35650772  2632.65279418 -4966.32149798]
Reward: -1  Episode Reward:  21
xxxxx
xg  x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1584.43412275 -8695.4397473   1670.47737734 -2601.74710518]
------
Step:20, Action:East
State  261
Old Q Values:  [  117.20264794   -40.34168621 -1137.67178372   -35.88578819]
New Q values:  [ 117.20264794  -40.34168621 -102.34412006  -35.88578819]
Reward: -1  Episode Reward:  20
xxxxx
x g x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1177.74864476   642.15435654]
------
Step:21, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1177.74864476   642.15435654]
New Q values:  [   16.82637525 -5807.06396197  1103.14385482   642.15435654]
Reward: 9  Episode Reward:  29
xxxxx
x   x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1544.20351476 -6569.7824655  -7525.7277781   2088.81465638]
------
Step:22, Action:West
State  288
Old Q Values:  [ 1544.20351476 -6569.7824655  -7525.7277781   2088.81465638]
New Q values:  [ 1544.20351476 -6569.7824655  -7525.7277781   2463.56455726]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5428.79564902  3066.57928767]
------
Step:23, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1103.14385482   642.15435654]
New Q values:  [   16.82637525 -5807.06396197  1179.7269091    642.15435654]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1544.20351476 -6569.7824655  -7525.7277781   2463.56455726]
------
Step:24, Action:West
State  288
Old Q Values:  [ 1544.20351476 -6569.7824655  -7525.7277781   2463.56455726]
New Q values:  [ 1544.20351476 -6569.7824655  -7525.7277781  -3386.53548239]
Reward: -10001  Episode Reward:  -9974
xxxxx
x   x
x ..x
x g x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1544.20351476 -6569.7824655  -7525.7277781  -3386.53548239]
------
Step:1, Action:North
State  288
Old Q Values:  [ 1544.20351476 -6569.7824655  -7525.7277781  -3386.53548239]
New Q values:  [12176.83931093 -6569.7824655  -7525.7277781  -3386.53548239]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[38512.5263501   8647.95871138   483.97903422 -1455.65174173]
------
Step:2, Action:North
State  208
Old Q Values:  [38512.5263501   8647.95871138   483.97903422 -1455.65174173]
New Q values:  [15510.94019908  8647.95871138   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
xg.ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   335.09886348 -6245.61866138 -2399.04996519]
------
Step:3, Action:South
State  138
Old Q Values:  [-139.45925583 -788.42758321 -180.6          57.00869722]
New Q values:  [-139.45925583 4337.31102644 -180.6          57.00869722]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15510.94019908  8647.95871138   483.97903422 -1455.65174173]
------
Step:4, Action:North
State  210
Old Q Values:  [3.26281737e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1.43518628e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  16
xxxxx
x .ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4337.31102644 -180.6          57.00869722]
------
Step:5, Action:South
State  138
Old Q Values:  [-139.45925583 4337.31102644 -180.6          57.00869722]
New Q values:  [-139.45925583 6039.88325234 -180.6          57.00869722]
Reward: -1  Episode Reward:  15
xxxxx
x . x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.43518628e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:6, Action:North
State  208
Old Q Values:  [15510.94019908  8647.95871138   483.97903422 -1455.65174173]
New Q values:  [ 8015.74105534  8647.95871138   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  14
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 6039.88325234 -180.6          57.00869722]
------
Step:7, Action:South
State  136
Old Q Values:  [-5281.21195651   335.09886348 -6245.61866138 -2399.04996519]
New Q values:  [-5281.21195651  2727.8271588  -6245.61866138 -2399.04996519]
Reward: -1  Episode Reward:  13
xxxxx
x g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8015.74105534  8647.95871138   483.97903422 -1455.65174173]
------
Step:8, Action:South
State  208
Old Q Values:  [ 8015.74105534  8647.95871138   483.97903422 -1455.65174173]
New Q values:  [ 8015.74105534  7111.63527783   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  12
xxxxx
x .gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12176.83931093 -6569.7824655  -7525.7277781  -3386.53548239]
------
Step:9, Action:North
State  288
Old Q Values:  [12176.83931093 -6569.7824655  -7525.7277781  -3386.53548239]
New Q values:  [ 1274.85804097 -6569.7824655  -7525.7277781  -3386.53548239]
Reward: -10001  Episode Reward:  -9989
xxxxx
x . x
x..gx
x.. x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5428.79564902  3066.57928767]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5428.79564902  3066.57928767]
New Q values:  [-2527.46239811 -8521.23367799  2559.3756719   3066.57928767]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1274.85804097 -6569.7824655  -7525.7277781  -3386.53548239]
------
Step:2, Action:North
State  288
Old Q Values:  [ 1274.85804097 -6569.7824655  -7525.7277781  -3386.53548239]
New Q values:  [-3079.93446701 -6569.7824655  -7525.7277781  -3386.53548239]
Reward: -9991  Episode Reward:  -9982
xxxxx
x. .x
x..gx
x.  x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  120.21502113   145.10697735 -5509.864806    -180.6       ]
------
Step:1, Action:South
State  181
Old Q Values:  [  120.21502113   145.10697735 -5509.864806    -180.6       ]
New Q values:  [  120.21502113    98.60358532 -5509.864806    -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 117.20264794  -40.34168621 -102.34412006  -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [ 117.20264794  -40.34168621 -102.34412006  -35.88578819]
New Q values:  [ 782.09187427  -40.34168621 -102.34412006  -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa. x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812 2184.6726439  2452.70271697    0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [ 390.42113812 2184.6726439  2452.70271697    0.        ]
New Q values:  [ 390.42113812 2184.6726439  2047.16803099    0.        ]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x a x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 2677.89620798 3535.62314734 1101.59744825]
------
Step:4, Action:East
State  195
Old Q Values:  [  38.85388605 2677.89620798 3535.62314734 1101.59744825]
New Q values:  [  38.85388605 2677.89620798 5719.2081007  1101.59744825]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.43518628e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:5, Action:North
State  208
Old Q Values:  [ 8015.74105534  7111.63527783   483.97903422 -1455.65174173]
New Q values:  [21464.77063194  7111.63527783   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  25
xxxxx
x..ax
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283 11938.87500643  -180.00807518 60843.58069934]
------
Step:6, Action:West
State  130
Old Q Values:  [36041.91667283 11938.87500643  -180.00807518 60843.58069934]
New Q values:  [36041.91667283 11938.87500643  -180.00807518 63193.31045404]
Reward: 9  Episode Reward:  34
xxxxx
x.a x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29501594e+05]
------
Step:7, Action:West
State  126
Old Q Values:  [  0.         437.4470373  374.96879939 114.09661187]
New Q values:  [  0.         437.4470373  374.96879939 206.08798538]
Reward: 9  Episode Reward:  43
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  516.83113546   96.65038957 -120.29354603]
------
Step:8, Action:South
State  111
Old Q Values:  [-177.44732869  516.83113546   96.65038957 -120.29354603]
New Q values:  [-177.44732869  358.74212015   96.65038957 -120.29354603]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 508.69888654  64.46351788   0.        ]
------
Step:9, Action:South
State  191
Old Q Values:  [  3.06655861 508.69888654  64.46351788   0.        ]
New Q values:  [  3.06655861 437.5071169   64.46351788   0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 782.09187427  -40.34168621 -102.34412006  -35.88578819]
------
Step:10, Action:North
State  261
Old Q Values:  [ 782.09187427  -40.34168621 -102.34412006  -35.88578819]
New Q values:  [ 643.03017275  -40.34168621 -102.34412006  -35.88578819]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294 1102.64474348 -232.44066224  940.95197235]
------
Step:11, Action:South
State  188
Old Q Values:  [-6523.78898263   530.37056288  1972.05460653     0.        ]
New Q values:  [-6523.78898263   712.69143835  1972.05460653     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xg  x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1584.43412275 -8695.4397473   1670.47737734 -2601.74710518]
------
Step:12, Action:East
State  261
Old Q Values:  [ 643.03017275  -40.34168621 -102.34412006  -35.88578819]
New Q values:  [643.03017275 -40.34168621 318.38042471 -35.88578819]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1179.7269091    642.15435654]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2559.3756719   3066.57928767]
New Q values:  [-2527.46239811 -8521.23367799 60105.16992866  3066.57928767]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 60105.16992866  3066.57928767]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 60105.16992866  3066.57928767]
New Q values:  [-2527.46239811 -8521.23367799 23123.48763136  3066.57928767]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x...x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3079.93446701 -6569.7824655  -7525.7277781  -3386.53548239]
------
Step:2, Action:North
State  288
Old Q Values:  [-3079.93446701 -6569.7824655  -7525.7277781  -3386.53548239]
New Q values:  [ 5212.85740278 -6569.7824655  -7525.7277781  -3386.53548239]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21464.77063194  7111.63527783   483.97903422 -1455.65174173]
------
Step:3, Action:North
State  208
Old Q Values:  [21464.77063194  7111.63527783   483.97903422 -1455.65174173]
New Q values:  [ 9409.65640042  7111.63527783   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  27
xxxxx
xg ax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  2727.8271588  -6245.61866138 -2399.04996519]
------
Step:4, Action:South
State  138
Old Q Values:  [-139.45925583 6039.88325234 -180.6          57.00869722]
New Q values:  [-139.45925583 5238.25022106 -180.6          57.00869722]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9409.65640042  7111.63527783   483.97903422 -1455.65174173]
------
Step:5, Action:North
State  208
Old Q Values:  [ 9409.65640042  7111.63527783   483.97903422 -1455.65174173]
New Q values:  [ 5334.73762648  7111.63527783   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  25
xxxxx
x. ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5238.25022106 -180.6          57.00869722]
------
Step:6, Action:South
State  138
Old Q Values:  [-139.45925583 5238.25022106 -180.6          57.00869722]
New Q values:  [ -139.45925583 -1771.80932823  -180.6           57.00869722]
Reward: -10001  Episode Reward:  -9976
xxxxx
x.  x
x..gx
x.  x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 23123.48763136  3066.57928767]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 23123.48763136  3066.57928767]
New Q values:  [-2527.46239811 -8521.23367799 10818.65227338  3066.57928767]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x ..x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5212.85740278 -6569.7824655  -7525.7277781  -3386.53548239]
------
Step:2, Action:North
State  288
Old Q Values:  [ 5212.85740278 -6569.7824655  -7525.7277781  -3386.53548239]
New Q values:  [ 4224.03354446 -6569.7824655  -7525.7277781  -3386.53548239]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5334.73762648  7111.63527783   483.97903422 -1455.65174173]
------
Step:3, Action:South
State  208
Old Q Values:  [ 5334.73762648  7111.63527783   483.97903422 -1455.65174173]
New Q values:  [ 5334.73762648  4111.26417447   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4224.03354446 -6569.7824655  -7525.7277781  -3386.53548239]
------
Step:4, Action:North
State  288
Old Q Values:  [ 4224.03354446 -6569.7824655  -7525.7277781  -3386.53548239]
New Q values:  [ 3289.43470573 -6569.7824655  -7525.7277781  -3386.53548239]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x .ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5334.73762648  4111.26417447   483.97903422 -1455.65174173]
------
Step:5, Action:North
State  208
Old Q Values:  [ 5334.73762648  4111.26417447   483.97903422 -1455.65174173]
New Q values:  [ 2957.64319824  4111.26417447   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  25
xxxxx
xg.ax
x . x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  2727.8271588  -6245.61866138 -2399.04996519]
------
Step:6, Action:South
State  136
Old Q Values:  [-5281.21195651  2727.8271588  -6245.61866138 -2399.04996519]
New Q values:  [-5281.21195651  2323.91011586 -6245.61866138 -2399.04996519]
Reward: -1  Episode Reward:  24
xxxxx
x.g x
x .ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2957.64319824  4111.26417447   483.97903422 -1455.65174173]
------
Step:7, Action:South
State  208
Old Q Values:  [ 2957.64319824  4111.26417447   483.97903422 -1455.65174173]
New Q values:  [ 2957.64319824  2630.73608151   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  23
xxxxx
x..gx
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3289.43470573 -6569.7824655  -7525.7277781  -3386.53548239]
------
Step:8, Action:North
State  288
Old Q Values:  [ 3289.43470573 -6569.7824655  -7525.7277781  -3386.53548239]
New Q values:  [ 2202.46684176 -6569.7824655  -7525.7277781  -3386.53548239]
Reward: -1  Episode Reward:  22
xxxxx
x.g x
x .ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2957.64319824  2630.73608151   483.97903422 -1455.65174173]
------
Step:9, Action:North
State  208
Old Q Values:  [ 2957.64319824  2630.73608151   483.97903422 -1455.65174173]
New Q values:  [20140.45041551  2630.73608151   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  21
xxxxx
x..ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283 11938.87500643  -180.00807518 63193.31045404]
------
Step:10, Action:West
State  136
Old Q Values:  [-5281.21195651  2323.91011586 -6245.61866138 -2399.04996519]
New Q values:  [-5281.21195651  2323.91011586 -6245.61866138 -6865.88348535]
Reward: -9991  Episode Reward:  -9970
xxxxx
x.g x
x . x
x.  x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20140.45041551  2630.73608151   483.97903422 -1455.65174173]
------
Step:1, Action:North
State  216
Old Q Values:  [  343.40782369  1175.87648318 -8489.43729461   531.09593838]
New Q values:  [  839.93616424  1175.87648318 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  9
xxxxx
x.gax
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  2323.91011586 -6245.61866138 -6865.88348535]
------
Step:2, Action:South
State  138
Old Q Values:  [ -139.45925583 -1771.80932823  -180.6           57.00869722]
New Q values:  [-139.45925583 5332.81139336 -180.6          57.00869722]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20140.45041551  2630.73608151   483.97903422 -1455.65174173]
------
Step:3, Action:North
State  216
Old Q Values:  [  839.93616424  1175.87648318 -8489.43729461   531.09593838]
New Q values:  [ 1935.2178837   1175.87648318 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5332.81139336 -180.6          57.00869722]
------
Step:4, Action:South
State  138
Old Q Values:  [-139.45925583 5332.81139336 -180.6          57.00869722]
New Q values:  [-139.45925583 6438.08339911 -180.6          57.00869722]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x. ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.43518628e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:5, Action:North
State  210
Old Q Values:  [1.43518628e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [7.67157014e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 6438.08339911 -180.6          57.00869722]
------
Step:6, Action:South
State  138
Old Q Values:  [-139.45925583 6438.08339911 -180.6          57.00869722]
New Q values:  [-139.45925583 4876.10440227 -180.6          57.00869722]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.67157014e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:7, Action:North
State  208
Old Q Values:  [20140.45041551  2630.73608151   483.97903422 -1455.65174173]
New Q values:  [ 9518.41148688  2630.73608151   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  3
xxxxx
x..ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4876.10440227 -180.6          57.00869722]
------
Step:8, Action:West
State  138
Old Q Values:  [-139.45925583 4876.10440227 -180.6          57.00869722]
New Q values:  [-139.45925583 4876.10440227 -180.6          54.69860409]
Reward: 9  Episode Reward:  12
xxxxx
x.a x
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    66.85946132    88.317084  ]
------
Step:9, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    66.85946132    88.317084  ]
New Q values:  [ -253.44886264 -1902.20915811    66.85946132    68.28354944]
Reward: 9  Episode Reward:  21
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   91.85571947 -252.78192178]
------
Step:10, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   91.85571947 -252.78192178]
New Q values:  [-252.35169558    7.11267516   66.58678822 -252.78192178]
Reward: -1  Episode Reward:  20
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     67.96183921   101.48166813]
------
Step:11, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    66.85946132    68.28354944]
New Q values:  [ -253.44886264 -1902.20915811    66.85946132    46.68945624]
Reward: -1  Episode Reward:  19
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   66.58678822 -252.78192178]
------
Step:12, Action:East
State  105
Old Q Values:  [-1.80600000e+02  6.72320144e+00 -8.24883589e+03  0.00000000e+00]
New Q values:  [ -180.6            6.72320144 -3243.95926025     0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x agx
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9602.94345187   187.25031635]
------
Step:13, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    66.85946132    46.68945624]
New Q values:  [ -253.44886264 -1902.20915811    66.85946132    38.05181896]
Reward: -1  Episode Reward:  17
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   66.58678822 -252.78192178]
------
Step:14, Action:East
State  105
Old Q Values:  [ -180.6            6.72320144 -3243.95926025     0.        ]
New Q values:  [ -180.6            6.72320144 -1242.00860919     0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x agx
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9602.94345187   187.25031635]
------
Step:15, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    66.85946132    38.05181896]
New Q values:  [ -253.44886264 -1902.20915811    66.85946132    34.59676405]
Reward: -1  Episode Reward:  15
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   66.58678822 -252.78192178]
------
Step:16, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   66.58678822 -252.78192178]
New Q values:  [-252.35169558    7.11267516   46.09255368 -252.78192178]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811    66.85946132    34.59676405]
------
Step:17, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811    66.85946132    34.59676405]
New Q values:  [ -253.44886264 -1902.20915811  1488.97510521    34.59676405]
Reward: -1  Episode Reward:  13
xxxxx
x  ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4876.10440227 -180.6          54.69860409]
------
Step:18, Action:West
State  138
Old Q Values:  [-139.45925583 4876.10440227 -180.6          54.69860409]
New Q values:  [-139.45925583 4876.10440227 -180.6          51.72394207]
Reward: -1  Episode Reward:  12
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     67.96183921   101.48166813]
------
Step:19, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   294.4550024   -841.67946252]
New Q values:  [-9594.56523706 -8069.05606225   294.4550024   -335.25482457]
Reward: -1  Episode Reward:  11
xxxxx
xag x
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6            6.72320144 -1242.00860919     0.        ]
------
Step:20, Action:South
State  108
Old Q Values:  [-8463.16477134  1255.78993593   642.92809613     0.        ]
New Q values:  [-8463.16477134  1099.33235633   642.92809613     0.        ]
Reward: 9  Episode Reward:  20
xxxxx
xg  x
xa  x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263   712.69143835  1972.05460653     0.        ]
------
Step:21, Action:East
State  188
Old Q Values:  [-6523.78898263   712.69143835  1972.05460653     0.        ]
New Q values:  [-6523.78898263   712.69143835  1026.09328485     0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x   x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 62.8218634  792.90480745 694.77966824 408.67479662]
------
Step:22, Action:South
State  200
Old Q Values:  [ 62.8218634  792.90480745 694.77966824 408.67479662]
New Q values:  [  62.8218634  3568.15760499  694.77966824  408.67479662]
Reward: 9  Episode Reward:  28
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 10818.65227338  3066.57928767]
------
Step:23, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10818.65227338  3066.57928767]
New Q values:  [-2527.46239811 -8521.23367799  4993.60096188  3066.57928767]
Reward: 9  Episode Reward:  37
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2202.46684176 -6569.7824655  -7525.7277781  -3386.53548239]
------
Step:24, Action:North
State  288
Old Q Values:  [ 2202.46684176 -6569.7824655  -7525.7277781  -3386.53548239]
New Q values:  [ 3181.85777933 -6569.7824655  -7525.7277781  -3386.53548239]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.67157014e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:25, Action:North
State  216
Old Q Values:  [ 1935.2178837   1175.87648318 -8489.43729461   531.09593838]
New Q values:  [ 2236.31847416  1175.87648318 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4876.10440227 -180.6          51.72394207]
------
Step:26, Action:South
State  130
Old Q Values:  [36041.91667283 11938.87500643  -180.00807518 63193.31045404]
New Q values:  [36041.91667283  7076.4210452   -180.00807518 63193.31045404]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.67157014e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:27, Action:North
State  218
Old Q Values:  [ 1638.46584919 -1790.08146114     0.           429.03841886]
New Q values:  [ 2117.61766036 -1790.08146114     0.           429.03841886]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4876.10440227 -180.6          51.72394207]
------
Step:28, Action:South
State  138
Old Q Values:  [-139.45925583 4876.10440227 -180.6          51.72394207]
New Q values:  [-139.45925583 2620.73730316 -180.6          51.72394207]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2236.31847416  1175.87648318 -8489.43729461   531.09593838]
------
Step:29, Action:North
State  218
Old Q Values:  [ 2117.61766036 -1790.08146114     0.           429.03841886]
New Q values:  [ 1632.66825509 -1790.08146114     0.           429.03841886]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 2620.73730316 -180.6          51.72394207]
------
Step:30, Action:South
State  130
Old Q Values:  [36041.91667283  7076.4210452   -180.00807518 63193.31045404]
New Q values:  [36041.91667283  5131.43946071  -180.00807518 63193.31045404]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.67157014e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:31, Action:North
State  218
Old Q Values:  [ 1632.66825509 -1790.08146114     0.           429.03841886]
New Q values:  [ 1438.68849298 -1790.08146114     0.           429.03841886]
Reward: -1  Episode Reward:  29
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 2620.73730316 -180.6          51.72394207]
------
Step:32, Action:South
State  130
Old Q Values:  [36041.91667283  5131.43946071  -180.00807518 63193.31045404]
New Q values:  [36041.91667283  4353.44682691  -180.00807518 63193.31045404]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.67157014e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:33, Action:North
State  218
Old Q Values:  [ 1438.68849298 -1790.08146114     0.           429.03841886]
New Q values:  [ 1361.09658814 -1790.08146114     0.           429.03841886]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 2620.73730316 -180.6          51.72394207]
------
Step:34, Action:South
State  138
Old Q Values:  [-139.45925583 2620.73730316 -180.6          51.72394207]
New Q values:  [-139.45925583 1456.02389771 -180.6          51.72394207]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 1361.09658814 -1790.08146114     0.           429.03841886]
------
Step:35, Action:North
State  218
Old Q Values:  [ 1361.09658814 -1790.08146114     0.           429.03841886]
New Q values:  [  980.64580457 -1790.08146114     0.           429.03841886]
Reward: -1  Episode Reward:  25
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1456.02389771 -180.6          51.72394207]
------
Step:36, Action:South
State  138
Old Q Values:  [-139.45925583 1456.02389771 -180.6          51.72394207]
New Q values:  [-139.45925583  876.00330045 -180.6          51.72394207]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  980.64580457 -1790.08146114     0.           429.03841886]
------
Step:37, Action:North
State  218
Old Q Values:  [  980.64580457 -1790.08146114     0.           429.03841886]
New Q values:  [  654.45931196 -1790.08146114     0.           429.03841886]
Reward: -1  Episode Reward:  23
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  876.00330045 -180.6          51.72394207]
------
Step:38, Action:South
State  130
Old Q Values:  [36041.91667283  4353.44682691  -180.00807518 63193.31045404]
New Q values:  [36041.91667283  4042.24977339  -180.00807518 63193.31045404]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.67157014e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:39, Action:North
State  218
Old Q Values:  [  654.45931196 -1790.08146114     0.           429.03841886]
New Q values:  [  523.98471492 -1790.08146114     0.           429.03841886]
Reward: -1  Episode Reward:  21
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  876.00330045 -180.6          51.72394207]
------
Step:40, Action:South
State  138
Old Q Values:  [-139.45925583  876.00330045 -180.6          51.72394207]
New Q values:  [-139.45925583 1020.69686243 -180.6          51.72394207]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2236.31847416  1175.87648318 -8489.43729461   531.09593838]
------
Step:41, Action:North
State  216
Old Q Values:  [ 2236.31847416  1175.87648318 -8489.43729461   531.09593838]
New Q values:  [ 1200.13644839  1175.87648318 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1020.69686243 -180.6          51.72394207]
------
Step:42, Action:South
State  136
Old Q Values:  [-5281.21195651  2323.91011586 -6245.61866138 -6865.88348535]
New Q values:  [-5281.21195651  1289.00498086 -6245.61866138 -6865.88348535]
Reward: -1  Episode Reward:  18
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1200.13644839  1175.87648318 -8489.43729461   531.09593838]
------
Step:43, Action:North
State  216
Old Q Values:  [ 1200.13644839  1175.87648318 -8489.43729461   531.09593838]
New Q values:  [  785.66363809  1175.87648318 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1020.69686243 -180.6          51.72394207]
------
Step:44, Action:South
State  138
Old Q Values:  [-139.45925583 1020.69686243 -180.6          51.72394207]
New Q values:  [-139.45925583  760.44168993 -180.6          51.72394207]
Reward: -1  Episode Reward:  16
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  785.66363809  1175.87648318 -8489.43729461   531.09593838]
------
Step:45, Action:South
State  218
Old Q Values:  [  523.98471492 -1790.08146114     0.           429.03841886]
New Q values:  [523.98471492 237.92474934   0.         429.03841886]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3181.85777933 -6569.7824655  -7525.7277781  -3386.53548239]
------
Step:46, Action:North
State  288
Old Q Values:  [ 3181.85777933 -6569.7824655  -7525.7277781  -3386.53548239]
New Q values:  [ 1429.33852621 -6569.7824655  -7525.7277781  -3386.53548239]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[523.98471492 237.92474934   0.         429.03841886]
------
Step:47, Action:North
State  216
Old Q Values:  [  785.66363809  1175.87648318 -8489.43729461   531.09593838]
New Q values:  [  541.79796221  1175.87648318 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  13
xxxxx
x  ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  760.44168993 -180.6          51.72394207]
------
Step:48, Action:South
State  138
Old Q Values:  [-139.45925583  760.44168993 -180.6          51.72394207]
New Q values:  [-139.45925583  460.77209045 -180.6          51.72394207]
Reward: -1  Episode Reward:  12
xxxxx
x   x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[523.98471492 237.92474934   0.         429.03841886]
------
Step:49, Action:North
State  210
Old Q Values:  [7.67157014e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [2.20260212e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  11
xxxxx
x  ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  4042.24977339  -180.00807518 63193.31045404]
------
Step:50, Action:West
State  138
Old Q Values:  [-139.45925583  460.77209045 -180.6          51.72394207]
New Q values:  [-139.45925583  460.77209045 -180.6          50.53407727]
Reward: -1  Episode Reward:  10
xxxxx
x a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     67.96183921   101.48166813]
------
Step:51, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1488.97510521    34.59676405]
New Q values:  [ -253.44886264 -1902.20915811  1488.97510521    27.06647173]
Reward: -1  Episode Reward:  9
xxxxx
xa  x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   46.09255368 -252.78192178]
------
Step:52, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   46.09255368 -252.78192178]
New Q values:  [-252.35169558    7.11267516   48.28152191 -252.78192178]
Reward: -1  Episode Reward:  8
xxxxx
x a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     67.96183921   101.48166813]
------
Step:53, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1488.97510521    27.06647173]
New Q values:  [ -253.44886264 -1902.20915811  1488.97510521    24.71104526]
Reward: -1  Episode Reward:  7
xxxxx
xa  x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516   48.28152191 -252.78192178]
------
Step:54, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516   48.28152191 -252.78192178]
New Q values:  [-252.35169558    7.11267516  465.40514033 -252.78192178]
Reward: -1  Episode Reward:  6
xxxxx
x a x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1488.97510521    24.71104526]
------
Step:55, Action:East
State  121
Old Q Values:  [    0.             0.         -9602.94345187   187.25031635]
New Q values:  [    0.             0.         -9455.07588649   187.25031635]
Reward: -10001  Episode Reward:  -9995
xxxxx
x  gx
x   x
x.  x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.12823720e+04 1.71220119e+04 2.91043938e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [  62.8218634  3568.15760499  694.77966824  408.67479662]
New Q values:  [  62.8218634  3568.15760499  636.07481225  408.67479662]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  541.79796221  1175.87648318 -8489.43729461   531.09593838]
------
Step:2, Action:South
State  208
Old Q Values:  [ 9518.41148688  2630.73608151   483.97903422 -1455.65174173]
New Q values:  [ 9518.41148688  1486.49599047   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1429.33852621 -6569.7824655  -7525.7277781  -3386.53548239]
------
Step:3, Action:North
State  288
Old Q Values:  [ 1429.33852621 -6569.7824655  -7525.7277781  -3386.53548239]
New Q values:  [ 3426.65885655 -6569.7824655  -7525.7277781  -3386.53548239]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9518.41148688  1486.49599047   483.97903422 -1455.65174173]
------
Step:4, Action:North
State  208
Old Q Values:  [ 9518.41148688  1486.49599047   483.97903422 -1455.65174173]
New Q values:  [ 3950.99622189  1486.49599047   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  26
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  460.77209045 -180.6          50.53407727]
------
Step:5, Action:South
State  136
Old Q Values:  [-5281.21195651  1289.00498086 -6245.61866138 -6865.88348535]
New Q values:  [-5281.21195651   867.7649373  -6245.61866138 -6865.88348535]
Reward: -1  Episode Reward:  25
xxxxx
x g x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  541.79796221  1175.87648318 -8489.43729461   531.09593838]
------
Step:6, Action:South
State  208
Old Q Values:  [ 3950.99622189  1486.49599047   483.97903422 -1455.65174173]
New Q values:  [ 3950.99622189  1621.99605315   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  24
xxxxx
x . x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3426.65885655 -6569.7824655  -7525.7277781  -3386.53548239]
------
Step:7, Action:North
State  288
Old Q Values:  [ 3426.65885655 -6569.7824655  -7525.7277781  -3386.53548239]
New Q values:  [-3444.63759081 -6569.7824655  -7525.7277781  -3386.53548239]
Reward: -10001  Episode Reward:  -9977
xxxxx
x . x
x. gx
x.. x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3950.99622189  1621.99605315   483.97903422 -1455.65174173]
------
Step:1, Action:North
State  216
Old Q Values:  [  541.79796221  1175.87648318 -8489.43729461   531.09593838]
New Q values:  [  360.35081202  1175.87648318 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  9
xxxxx
x .ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  460.77209045 -180.6          50.53407727]
------
Step:2, Action:South
State  138
Old Q Values:  [-139.45925583  460.77209045 -180.6          50.53407727]
New Q values:  [-139.45925583  536.47178113 -180.6          50.53407727]
Reward: -1  Episode Reward:  8
xxxxx
x . x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  360.35081202  1175.87648318 -8489.43729461   531.09593838]
------
Step:3, Action:South
State  208
Old Q Values:  [ 3950.99622189  1621.99605315   483.97903422 -1455.65174173]
New Q values:  [ 3950.99622189  -361.76222346   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  17
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3444.63759081 -6569.7824655  -7525.7277781  -3386.53548239]
------
Step:4, Action:West
State  288
Old Q Values:  [-3444.63759081 -6569.7824655  -7525.7277781  -3386.53548239]
New Q values:  [-3444.63759081 -6569.7824655  -7525.7277781    148.86609561]
Reward: 9  Episode Reward:  26
xxxxx
xg. x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4993.60096188  3066.57928767]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4993.60096188  3066.57928767]
New Q values:  [-2527.46239811 -8521.23367799  2041.50021343  3066.57928767]
Reward: -1  Episode Reward:  25
xxxxx
x . x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3444.63759081 -6569.7824655  -7525.7277781    148.86609561]
------
Step:6, Action:West
State  288
Old Q Values:  [-3444.63759081 -6569.7824655  -7525.7277781    148.86609561]
New Q values:  [-3444.63759081 -6569.7824655  -7525.7277781    978.92022454]
Reward: -1  Episode Reward:  24
xxxxx
xg. x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2041.50021343  3066.57928767]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2041.50021343  3066.57928767]
New Q values:  [-2527.46239811 -8521.23367799  2041.50021343 25267.77425193]
Reward: 9  Episode Reward:  33
xxxxx
x g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[80119.14178952  2256.66526474  6585.91095232  1875.31501677]
------
Step:8, Action:North
State  260
Old Q Values:  [ 1584.43412275 -8695.4397473   1670.47737734 -2601.74710518]
New Q values:  [ 1428.96948735 -8695.4397473   1670.47737734 -2601.74710518]
Reward: 9  Episode Reward:  42
xxxxx
xg. x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -897.86098047   675.35650772  2632.65279418 -4966.32149798]
------
Step:9, Action:East
State  180
Old Q Values:  [ -897.86098047   675.35650772  2632.65279418 -4966.32149798]
New Q values:  [ -897.86098047   675.35650772  6195.06469515 -4966.32149798]
Reward: 9  Episode Reward:  51
xxxxx
x . x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.12823720e+04 1.71220119e+04 2.91043938e+03]
------
Step:10, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.12823720e+04 1.71220119e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.12823720e+04 8.03350364e+03 2.91043938e+03]
Reward: -1  Episode Reward:  50
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3950.99622189  -361.76222346   483.97903422 -1455.65174173]
------
Step:11, Action:North
State  208
Old Q Values:  [ 3950.99622189  -361.76222346   483.97903422 -1455.65174173]
New Q values:  [20537.79162497  -361.76222346   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  49
xxxxx
x .ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  4042.24977339  -180.00807518 63193.31045404]
------
Step:12, Action:West
State  130
Old Q Values:  [36041.91667283  4042.24977339  -180.00807518 63193.31045404]
New Q values:  [ 36041.91667283   4042.24977339   -180.00807518 124133.20235592]
Reward: 100009  Episode Reward:  100058
xxxxx
x a x
x   x
x  gx
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  536.47178113 -180.6          50.53407727]
------
Step:1, Action:South
State  138
Old Q Values:  [-139.45925583  536.47178113 -180.6          50.53407727]
New Q values:  [-139.45925583 6827.79507037 -180.6          50.53407727]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.20260212e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:2, Action:North
State  210
Old Q Values:  [2.20260212e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1.08581470e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 6827.79507037 -180.6          50.53407727]
------
Step:3, Action:South
State  138
Old Q Values:  [-139.45925583 6827.79507037 -180.6          50.53407727]
New Q values:  [-139.45925583 5987.96212765 -180.6          50.53407727]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.08581470e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:4, Action:North
State  208
Old Q Values:  [20537.79162497  -361.76222346   483.97903422 -1455.65174173]
New Q values:  [10010.90528828  -361.76222346   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  6
xxxxx
x..ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5987.96212765 -180.6          50.53407727]
------
Step:5, Action:West
State  138
Old Q Values:  [-139.45925583 5987.96212765 -180.6          50.53407727]
New Q values:  [-139.45925583 5987.96212765 -180.6         472.30616247]
Reward: 9  Episode Reward:  15
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1488.97510521    24.71104526]
------
Step:6, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1488.97510521    24.71104526]
New Q values:  [ -253.44886264 -1902.20915811  2391.37868038    24.71104526]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5987.96212765 -180.6         472.30616247]
------
Step:7, Action:West
State  138
Old Q Values:  [-139.45925583 5987.96212765 -180.6         472.30616247]
New Q values:  [-139.45925583 5987.96212765 -180.6         218.76696543]
Reward: -1  Episode Reward:  13
xxxxx
x.a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     67.96183921   101.48166813]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     67.96183921   101.48166813]
New Q values:  [ -281.736      -9545.4473624     67.96183921    71.56920438]
Reward: 9  Episode Reward:  22
xxxxx
xa  x
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684    85.25512376  -180.6       ]
------
Step:9, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684    85.25512376  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684    54.97281082  -180.6       ]
Reward: -1  Episode Reward:  21
xxxxx
x a x
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624     67.96183921    71.56920438]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624     67.96183921    71.56920438]
New Q values:  [ -281.736      -9545.4473624     67.96183921   167.64922385]
Reward: -1  Episode Reward:  20
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558    7.11267516  465.40514033 -252.78192178]
------
Step:11, Action:East
State  107
Old Q Values:  [-252.35169558    7.11267516  465.40514033 -252.78192178]
New Q values:  [-252.35169558    7.11267516  902.97566024 -252.78192178]
Reward: -1  Episode Reward:  19
xxxxx
x a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2391.37868038    24.71104526]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624     67.96183921   167.64922385]
New Q values:  [ -281.736      -9545.4473624   1822.97337398   167.64922385]
Reward: -1  Episode Reward:  18
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5987.96212765 -180.6         218.76696543]
------
Step:13, Action:South
State  138
Old Q Values:  [-139.45925583 5987.96212765 -180.6         218.76696543]
New Q values:  [-139.45925583 5652.02895056 -180.6         218.76696543]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.08581470e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:14, Action:North
State  208
Old Q Values:  [10010.90528828  -361.76222346   483.97903422 -1455.65174173]
New Q values:  [ 5699.37080048  -361.76222346   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5652.02895056 -180.6         218.76696543]
------
Step:15, Action:West
State  138
Old Q Values:  [-139.45925583 5652.02895056 -180.6         218.76696543]
New Q values:  [-139.45925583 5652.02895056 -180.6         633.79879836]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   1822.97337398   167.64922385]
------
Step:16, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2391.37868038    24.71104526]
New Q values:  [ -253.44886264 -1902.20915811  2651.56015732    24.71104526]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5652.02895056 -180.6         633.79879836]
------
Step:17, Action:West
State  138
Old Q Values:  [-139.45925583 5652.02895056 -180.6         633.79879836]
New Q values:  [-139.45925583 5652.02895056 -180.6        1048.38756654]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2651.56015732    24.71104526]
------
Step:18, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   1822.97337398   167.64922385]
New Q values:  [ -281.736      -9545.4473624   2424.19803476   167.64922385]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5652.02895056 -180.6        1048.38756654]
------
Step:19, Action:South
State  138
Old Q Values:  [-139.45925583 5652.02895056 -180.6        1048.38756654]
New Q values:  [-139.45925583 5517.65567972 -180.6        1048.38756654]
Reward: -1  Episode Reward:  11
xxxxx
x   x
x..ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.08581470e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:20, Action:North
State  210
Old Q Values:  [1.08581470e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [5.99795550e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  10
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5517.65567972 -180.6        1048.38756654]
------
Step:21, Action:South
State  138
Old Q Values:  [-139.45925583 5517.65567972 -180.6        1048.38756654]
New Q values:  [-139.45925583 2559.22521684 -180.6        1048.38756654]
Reward: -1  Episode Reward:  9
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  360.35081202  1175.87648318 -8489.43729461   531.09593838]
------
Step:22, Action:South
State  208
Old Q Values:  [ 5699.37080048  -361.76222346   483.97903422 -1455.65174173]
New Q values:  [ 5699.37080048   154.37117798   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3444.63759081 -6569.7824655  -7525.7277781    978.92022454]
------
Step:23, Action:West
State  288
Old Q Values:  [-3444.63759081 -6569.7824655  -7525.7277781    978.92022454]
New Q values:  [-3444.63759081 -6569.7824655  -7525.7277781   7971.3003654 ]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2041.50021343 25267.77425193]
------
Step:24, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2041.50021343 25267.77425193]
New Q values:  [-2527.46239811 -8521.23367799  2041.50021343 34148.25223763]
Reward: 9  Episode Reward:  26
xxxxx
x   x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[80119.14178952  2256.66526474  6585.91095232  1875.31501677]
------
Step:25, Action:North
State  261
Old Q Values:  [643.03017275 -40.34168621 318.38042471 -35.88578819]
New Q values:  [918.01386227 -40.34168621 318.38042471 -35.88578819]
Reward: 9  Episode Reward:  35
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812 2184.6726439  2047.16803099    0.        ]
------
Step:26, Action:South
State  183
Old Q Values:  [ 390.42113812 2184.6726439  2047.16803099    0.        ]
New Q values:  [ 390.42113812 1148.67321624 2047.16803099    0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[918.01386227 -40.34168621 318.38042471 -35.88578819]
------
Step:27, Action:North
State  260
Old Q Values:  [ 1428.96948735 -8695.4397473   1670.47737734 -2601.74710518]
New Q values:  [ 1661.49313341 -8695.4397473   1670.47737734 -2601.74710518]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3635.01779491     0.        ]
------
Step:28, Action:East
State  179
Old Q Values:  [   0.         4614.46100011 9830.49615301    0.        ]
New Q values:  [    0.          4614.46100011 65788.51661478     0.        ]
Reward: 100009  Episode Reward:  100042
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 2559.22521684 -180.6        1048.38756654]
------
Step:1, Action:South
State  138
Old Q Values:  [-139.45925583 2559.22521684 -180.6        1048.38756654]
New Q values:  [-139.45925583 1381.85303169 -180.6        1048.38756654]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  360.35081202  1175.87648318 -8489.43729461   531.09593838]
------
Step:2, Action:South
State  208
Old Q Values:  [ 5699.37080048   154.37117798   483.97903422 -1455.65174173]
New Q values:  [ 5699.37080048  2458.53858081   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3444.63759081 -6569.7824655  -7525.7277781   7971.3003654 ]
------
Step:3, Action:West
State  288
Old Q Values:  [-3444.63759081 -6569.7824655  -7525.7277781   7971.3003654 ]
New Q values:  [-3444.63759081 -6569.7824655  -7525.7277781  13438.39581745]
Reward: 9  Episode Reward:  27
xxxxx
xg. x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2041.50021343 34148.25223763]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2041.50021343 34148.25223763]
New Q values:  [-2527.46239811 -8521.23367799  2041.50021343 14165.84410825]
Reward: 9  Episode Reward:  36
xxxxx
x.. x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1661.49313341 -8695.4397473   1670.47737734 -2601.74710518]
------
Step:5, Action:East
State  260
Old Q Values:  [ 1661.49313341 -8695.4397473   1670.47737734 -2601.74710518]
New Q values:  [ 1661.49313341 -8695.4397473   4917.34418341 -2601.74710518]
Reward: -1  Episode Reward:  35
xxxxx
x.. x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2041.50021343 14165.84410825]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2041.50021343 14165.84410825]
New Q values:  [-2527.46239811 -8521.23367799  4847.51883061 14165.84410825]
Reward: -1  Episode Reward:  34
xxxxx
x.. x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3444.63759081 -6569.7824655  -7525.7277781  13438.39581745]
------
Step:7, Action:North
State  288
Old Q Values:  [-3444.63759081 -6569.7824655  -7525.7277781  13438.39581745]
New Q values:  [  420.93161465 -6569.7824655  -7525.7277781  13438.39581745]
Reward: -1  Episode Reward:  33
xxxxx
x.. x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.99795550e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:8, Action:North
State  210
Old Q Values:  [5.99795550e+03 3.35858896e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [3.96385429e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  32
xxxxx
x..ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   4042.24977339   -180.00807518 124133.20235592]
------
Step:9, Action:West
State  130
Old Q Values:  [ 36041.91667283   4042.24977339   -180.00807518 124133.20235592]
New Q values:  [36041.91667283  4042.24977339  -180.00807518 75611.41050743]
Reward: 9  Episode Reward:  41
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         4272.38349051 43132.21661867 86509.0985502 ]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624   2424.19803476   167.64922385]
New Q values:  [ -281.736      -9545.4473624   2424.19803476    88.95153278]
Reward: 9  Episode Reward:  50
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684    54.97281082  -180.6       ]
------
Step:11, Action:East
State  108
Old Q Values:  [-8463.16477134  1099.33235633   642.92809613     0.        ]
New Q values:  [-8463.16477134  1099.33235633   344.90773917     0.        ]
Reward: -1  Episode Reward:  49
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   294.4550024   -335.25482457]
------
Step:12, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   294.4550024   -335.25482457]
New Q values:  [-9594.56523706 -8069.05606225   377.51148215  -335.25482457]
Reward: -1  Episode Reward:  48
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   867.7649373  -6245.61866138 -6865.88348535]
------
Step:13, Action:South
State  136
Old Q Values:  [-5281.21195651   867.7649373  -6245.61866138 -6865.88348535]
New Q values:  [-5281.21195651  2056.31721506 -6245.61866138 -6865.88348535]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5699.37080048  2458.53858081   483.97903422 -1455.65174173]
------
Step:14, Action:South
State  208
Old Q Values:  [ 5699.37080048  2458.53858081   483.97903422 -1455.65174173]
New Q values:  [ 5699.37080048  5014.33417756   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  46
xxxxx
x  gx
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  420.93161465 -6569.7824655  -7525.7277781  13438.39581745]
------
Step:15, Action:West
State  288
Old Q Values:  [  420.93161465 -6569.7824655  -7525.7277781  13438.39581745]
New Q values:  [  420.93161465 -6569.7824655  -7525.7277781   9624.51155945]
Reward: -1  Episode Reward:  45
xxxxx
x g x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4847.51883061 14165.84410825]
------
Step:16, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4847.51883061 14165.84410825]
New Q values:  [-2527.46239811 -8521.23367799  4847.51883061  7140.94089832]
Reward: -1  Episode Reward:  44
xxxxx
xg  x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1661.49313341 -8695.4397473   4917.34418341 -2601.74710518]
------
Step:17, Action:East
State  260
Old Q Values:  [ 1661.49313341 -8695.4397473   4917.34418341 -2601.74710518]
New Q values:  [ 1661.49313341 -8695.4397473   4108.61994286 -2601.74710518]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4847.51883061  7140.94089832]
------
Step:18, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4847.51883061  7140.94089832]
New Q values:  [-2527.46239811 -8521.23367799  4847.51883061 26891.51889619]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[80119.14178952  2256.66526474  6585.91095232  1875.31501677]
------
Step:19, Action:North
State  261
Old Q Values:  [918.01386227 -40.34168621 318.38042471 -35.88578819]
New Q values:  [402.67005125 -40.34168621 318.38042471 -35.88578819]
Reward: -1  Episode Reward:  41
xxxxx
x g x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  120.21502113    98.60358532 -5509.864806    -180.6       ]
------
Step:20, Action:North
State  181
Old Q Values:  [  120.21502113    98.60358532 -5509.864806    -180.6       ]
New Q values:  [  166.96516621    98.60358532 -5509.864806    -180.6       ]
Reward: -1  Episode Reward:  40
xxxxx
xa gx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  398.26385921   31.9495824  -180.6       ]
------
Step:21, Action:South
State  111
Old Q Values:  [-177.44732869  358.74212015   96.65038957 -120.29354603]
New Q values:  [-177.44732869  192.98639792   96.65038957 -120.29354603]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  166.96516621    98.60358532 -5509.864806    -180.6       ]
------
Step:22, Action:North
State  181
Old Q Values:  [  166.96516621    98.60358532 -5509.864806    -180.6       ]
New Q values:  [  185.66522425    98.60358532 -5509.864806    -180.6       ]
Reward: -1  Episode Reward:  38
xxxxx
xa gx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  398.26385921   31.9495824  -180.6       ]
------
Step:23, Action:South
State  111
Old Q Values:  [-177.44732869  192.98639792   96.65038957 -120.29354603]
New Q values:  [-177.44732869  132.29412644   96.65038957 -120.29354603]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  185.66522425    98.60358532 -5509.864806    -180.6       ]
------
Step:24, Action:North
State  181
Old Q Values:  [  185.66522425    98.60358532 -5509.864806    -180.6       ]
New Q values:  [  193.14524746    98.60358532 -5509.864806    -180.6       ]
Reward: -1  Episode Reward:  36
xxxxx
xa gx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  398.26385921   31.9495824  -180.6       ]
------
Step:25, Action:South
State  109
Old Q Values:  [-241.10880094  398.26385921   31.9495824  -180.6       ]
New Q values:  [-241.10880094  216.64911792   31.9495824  -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x g x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  193.14524746    98.60358532 -5509.864806    -180.6       ]
------
Step:26, Action:North
State  181
Old Q Values:  [  193.14524746    98.60358532 -5509.864806    -180.6       ]
New Q values:  [  141.65283436    98.60358532 -5509.864806    -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
xa gx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  216.64911792   31.9495824  -180.6       ]
------
Step:27, Action:South
State  109
Old Q Values:  [-241.10880094  216.64911792   31.9495824  -180.6       ]
New Q values:  [-241.10880094  128.55549748   31.9495824  -180.6       ]
Reward: -1  Episode Reward:  33
xxxxx
x g x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  141.65283436    98.60358532 -5509.864806    -180.6       ]
------
Step:28, Action:North
State  180
Old Q Values:  [ -897.86098047   675.35650772  6195.06469515 -4966.32149798]
New Q values:  [-6029.94468529   675.35650772  6195.06469515 -4966.32149798]
Reward: -10001  Episode Reward:  -9968
xxxxx
xg  x
x . x
x   x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1381.85303169 -180.6        1048.38756654]
------
Step:1, Action:West
State  136
Old Q Values:  [-5281.21195651  2056.31721506 -6245.61866138 -6865.88348535]
New Q values:  [-5281.21195651  2056.31721506 -6245.61866138 -2684.77829924]
Reward: 9  Episode Reward:  9
xxxxx
x.agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9455.07588649   187.25031635]
------
Step:2, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   377.51148215  -335.25482457]
New Q values:  [-9594.56523706 -8069.05606225   377.51148215   -90.13528059]
Reward: 9  Episode Reward:  18
xxxxx
xag x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  128.55549748   31.9495824  -180.6       ]
------
Step:3, Action:South
State  108
Old Q Values:  [-8463.16477134  1099.33235633   344.90773917     0.        ]
New Q values:  [-8463.16477134   752.96092799   344.90773917     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263   712.69143835  1026.09328485     0.        ]
------
Step:4, Action:East
State  189
Old Q Values:  [   9.84673294 1102.64474348 -232.44066224  940.95197235]
New Q values:  [   9.84673294 1102.64474348  982.8710166   940.95197235]
Reward: 9  Episode Reward:  36
xxxxx
x g x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  3568.15760499  636.07481225  408.67479662]
------
Step:5, Action:South
State  200
Old Q Values:  [  62.8218634  3568.15760499  636.07481225  408.67479662]
New Q values:  [  62.8218634  9500.11871085  636.07481225  408.67479662]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4847.51883061 26891.51889619]
------
Step:6, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1179.7269091    642.15435654]
New Q values:  [   16.82637525 -5807.06396197  1179.7269091    383.06275799]
Reward: 9  Episode Reward:  54
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[402.67005125 -40.34168621 318.38042471 -35.88578819]
------
Step:7, Action:North
State  260
Old Q Values:  [ 1661.49313341 -8695.4397473   4108.61994286 -2601.74710518]
New Q values:  [  971.82523882 -8695.4397473   4108.61994286 -2601.74710518]
Reward: -1  Episode Reward:  53
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263   712.69143835  1026.09328485     0.        ]
------
Step:8, Action:East
State  189
Old Q Values:  [   9.84673294 1102.64474348  982.8710166   940.95197235]
New Q values:  [   9.84673294 1102.64474348  964.34635147  940.95197235]
Reward: -1  Episode Reward:  52
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         1905.99314945  814.02974331  399.75525955]
------
Step:9, Action:South
State  204
Old Q Values:  [   0.         1905.99314945  814.02974331  399.75525955]
New Q values:  [   0.         1115.71533251  814.02974331  399.75525955]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x g x
x a.x
xxxxx
Step:10, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549  2411.20374302 24273.43899072]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  6.38572350e+04  2.42734390e+04]
Reward: 100009  Episode Reward:  100060
xxxxx
x   x
x  gx
x  ax
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  6.16972718e+03  1.20371620e+03]
------
Step:1, Action:East
State  195
Old Q Values:  [  38.85388605 2677.89620798 5719.2081007  1101.59744825]
New Q values:  [   38.85388605  2677.89620798 14184.6461127   1101.59744825]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.96385429e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:2, Action:North
State  208
Old Q Values:  [ 5699.37080048  5014.33417756   483.97903422 -1455.65174173]
New Q values:  [ 2699.7042297   5014.33417756   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1381.85303169 -180.6        1048.38756654]
------
Step:3, Action:West
State  138
Old Q Values:  [-139.45925583 1381.85303169 -180.6        1048.38756654]
New Q values:  [-139.45925583 1381.85303169 -180.6        1220.22307381]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2651.56015732    24.71104526]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2651.56015732    24.71104526]
New Q values:  [ -253.44886264 -1902.20915811  1474.57997244    24.71104526]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1381.85303169 -180.6        1220.22307381]
------
Step:5, Action:West
State  138
Old Q Values:  [-139.45925583 1381.85303169 -180.6        1220.22307381]
New Q values:  [-139.45925583 1381.85303169 -180.6         929.86322126]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1474.57997244    24.71104526]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   2424.19803476    88.95153278]
New Q values:  [ -281.736      -9545.4473624   1383.63512341    88.95153278]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1381.85303169 -180.6         929.86322126]
------
Step:7, Action:South
State  138
Old Q Values:  [-139.45925583 1381.85303169 -180.6         929.86322126]
New Q values:  [ -139.45925583 12443.7040851   -180.6          929.86322126]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.96385429e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:8, Action:North
State  210
Old Q Values:  [3.96385429e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1.95879284e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  22
xxxxx
x. ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 12443.7040851   -180.6          929.86322126]
------
Step:9, Action:South
State  138
Old Q Values:  [ -139.45925583 12443.7040851   -180.6          929.86322126]
New Q values:  [ -139.45925583 10853.26015067  -180.6          929.86322126]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.95879284e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:10, Action:North
State  216
Old Q Values:  [  360.35081202  1175.87648318 -8489.43729461   531.09593838]
New Q values:  [ 3399.51837001  1175.87648318 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 10853.26015067  -180.6          929.86322126]
------
Step:11, Action:South
State  138
Old Q Values:  [ -139.45925583 10853.26015067  -180.6          929.86322126]
New Q values:  [ -139.45925583 10217.0825769   -180.6          929.86322126]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.95879284e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:12, Action:North
State  216
Old Q Values:  [ 3399.51837001  1175.87648318 -8489.43729461   531.09593838]
New Q values:  [ 4424.33212107  1175.87648318 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  18
xxxxx
x. ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 10217.0825769   -180.6          929.86322126]
------
Step:13, Action:South
State  138
Old Q Values:  [ -139.45925583 10217.0825769   -180.6          929.86322126]
New Q values:  [-139.45925583 9962.61154739 -180.6         929.86322126]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.95879284e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
------
Step:14, Action:North
State  216
Old Q Values:  [ 4424.33212107  1175.87648318 -8489.43729461   531.09593838]
New Q values:  [ 4757.91631264  1175.87648318 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 9962.61154739 -180.6         929.86322126]
------
Step:15, Action:South
State  138
Old Q Values:  [-139.45925583 9962.61154739 -180.6         929.86322126]
New Q values:  [-139.45925583 5488.74487222 -180.6         929.86322126]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2699.7042297   5014.33417756   483.97903422 -1455.65174173]
------
Step:16, Action:South
State  210
Old Q Values:  [1.95879284e+04 3.35858896e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1.95879284e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  24
xxxxx
x.  x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  420.93161465 -6569.7824655  -7525.7277781   9624.51155945]
------
Step:17, Action:North
State  288
Old Q Values:  [  420.93161465 -6569.7824655  -7525.7277781   9624.51155945]
New Q values:  [ 1672.07289913 -6569.7824655  -7525.7277781   9624.51155945]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2699.7042297   5014.33417756   483.97903422 -1455.65174173]
------
Step:18, Action:South
State  208
Old Q Values:  [ 2699.7042297   5014.33417756   483.97903422 -1455.65174173]
New Q values:  [ 2699.7042297   4892.48713886   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  22
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.07289913 -6569.7824655  -7525.7277781   9624.51155945]
------
Step:19, Action:West
State  288
Old Q Values:  [ 1672.07289913 -6569.7824655  -7525.7277781   9624.51155945]
New Q values:  [ 1672.07289913 -6569.7824655  -7525.7277781  23006.3751133 ]
Reward: -1  Episode Reward:  21
xxxxx
x. gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  6.38572350e+04  2.42734390e+04]
------
Step:20, Action:East
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  6.38572350e+04  2.42734390e+04]
New Q values:  [   37.74111519  -168.92307549 32444.20652001 24273.43899072]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.07289913 -6569.7824655  -7525.7277781  23006.3751133 ]
------
Step:21, Action:West
State  288
Old Q Values:  [ 1672.07289913 -6569.7824655  -7525.7277781  23006.3751133 ]
New Q values:  [ 1672.07289913 -6569.7824655  -7525.7277781  17269.40571417]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4847.51883061 26891.51889619]
------
Step:22, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 32444.20652001 24273.43899072]
New Q values:  [   37.74111519  -168.92307549 32444.20652001 33750.51813314]
Reward: 9  Episode Reward:  28
xxxxx
x.  x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[80119.14178952  2256.66526474  6585.91095232  1875.31501677]
------
Step:23, Action:North
State  257
Old Q Values:  [80119.14178952  2256.66526474  6585.91095232  1875.31501677]
New Q values:  [51789.61170024  2256.66526474  6585.91095232  1875.31501677]
Reward: 9  Episode Reward:  37
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.          4614.46100011 65788.51661478     0.        ]
------
Step:24, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 65788.51661478     0.        ]
New Q values:  [    0.          4614.46100011 28165.72479948     0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  6.16972718e+03  1.20371620e+03]
------
Step:25, Action:East
State  195
Old Q Values:  [   38.85388605  2677.89620798 14184.6461127   1101.59744825]
New Q values:  [   38.85388605  2677.89620798 11549.63696171  1101.59744825]
Reward: -1  Episode Reward:  35
xxxxx
x.  x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.95879284e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:26, Action:North
State  208
Old Q Values:  [ 2699.7042297   4892.48713886   483.97903422 -1455.65174173]
New Q values:  [23762.70484411  4892.48713886   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  34
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  4042.24977339  -180.00807518 75611.41050743]
------
Step:27, Action:West
State  130
Old Q Values:  [36041.91667283  4042.24977339  -180.00807518 75611.41050743]
New Q values:  [36041.91667283  4042.24977339  -180.00807518 69094.44237728]
Reward: -1  Episode Reward:  33
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29501594e+05]
------
Step:28, Action:West
State  114
Old Q Values:  [ -180.6         4272.38349051 43132.21661867 86509.0985502 ]
New Q values:  [  -180.6          4272.38349051  43132.21661867 131003.36798688]
Reward: 100009  Episode Reward:  100042
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  6.16972718e+03  1.20371620e+03]
------
Step:1, Action:East
State  195
Old Q Values:  [   38.85388605  2677.89620798 11549.63696171  1101.59744825]
New Q values:  [   38.85388605  2677.89620798 10501.63330131  1101.59744825]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.95879284e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:2, Action:North
State  210
Old Q Values:  [1.95879284e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [9.48719482e+03 4.23618905e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5488.74487222 -180.6         929.86322126]
------
Step:3, Action:South
State  130
Old Q Values:  [36041.91667283  4042.24977339  -180.00807518 69094.44237728]
New Q values:  [36041.91667283  4462.45835451  -180.00807518 69094.44237728]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9.48719482e+03 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:4, Action:North
State  208
Old Q Values:  [23762.70484411  4892.48713886   483.97903422 -1455.65174173]
New Q values:  [11151.10539931  4892.48713886   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5488.74487222 -180.6         929.86322126]
------
Step:5, Action:West
State  138
Old Q Values:  [-139.45925583 5488.74487222 -180.6         929.86322126]
New Q values:  [-139.45925583 5488.74487222 -180.6         792.43582553]
Reward: 9  Episode Reward:  25
xxxxx
x.a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   1383.63512341    88.95153278]
------
Step:6, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1474.57997244    24.71104526]
New Q values:  [ -253.44886264 -1902.20915811  2235.85545064    24.71104526]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5488.74487222 -180.6         792.43582553]
------
Step:7, Action:West
State  136
Old Q Values:  [-5281.21195651  2056.31721506 -6245.61866138 -2684.77829924]
New Q values:  [-5281.21195651  2056.31721506 -6245.61866138 -1018.33622479]
Reward: -1  Episode Reward:  23
xxxxx
x.agx
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9455.07588649   187.25031635]
------
Step:8, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   377.51148215   -90.13528059]
New Q values:  [-9594.56523706 -8069.05606225   377.51148215   -28.6371518 ]
Reward: 9  Episode Reward:  32
xxxxx
xag x
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6            6.72320144 -1242.00860919     0.        ]
------
Step:9, Action:South
State  107
Old Q Values:  [-252.35169558    7.11267516  902.97566024 -252.78192178]
New Q values:  [-252.35169558   14.23378158  902.97566024 -252.78192178]
Reward: 9  Episode Reward:  41
xxxxx
x   x
xag x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   19.96237173     0.         -4434.34340468  -178.98      ]
------
Step:10, Action:North
State  185
Old Q Values:  [   19.96237173     0.         -4434.34340468  -178.98      ]
New Q values:  [    9.40190913     0.         -4434.34340468  -178.98      ]
Reward: -1  Episode Reward:  40
xxxxx
xag x
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6            6.72320144 -1242.00860919     0.        ]
------
Step:11, Action:South
State  104
Old Q Values:  [-8652.84         853.17632684  1078.33520689 -8652.84      ]
New Q values:  [-8652.84         745.80419858  1078.33520689 -8652.84      ]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
xa  x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[-2081.09028721     0.          1350.44555947     0.        ]
------
Step:12, Action:East
State  185
Old Q Values:  [    9.40190913     0.         -4434.34340468  -178.98      ]
New Q values:  [   9.40190913    0.         1075.69825138 -178.98      ]
Reward: -1  Episode Reward:  38
xxxxx
x g x
x a x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  9500.11871085  636.07481225  408.67479662]
------
Step:13, Action:South
State  200
Old Q Values:  [  62.8218634  9500.11871085  636.07481225  408.67479662]
New Q values:  [   62.8218634  11866.9031532    636.07481225   408.67479662]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4847.51883061 26891.51889619]
------
Step:14, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1179.7269091    383.06275799]
New Q values:  [   16.82637525 -5807.06396197  1179.7269091   1391.21108605]
Reward: 9  Episode Reward:  46
xxxxx
xg  x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  971.82523882 -8695.4397473   4108.61994286 -2601.74710518]
------
Step:15, Action:East
State  260
Old Q Values:  [  971.82523882 -8695.4397473   4108.61994286 -2601.74710518]
New Q values:  [  971.82523882 -8695.4397473   2060.21130296 -2601.74710518]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xg  x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1179.7269091   1391.21108605]
------
Step:16, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1179.7269091   1391.21108605]
New Q values:  [   16.82637525 -5807.06396197  1179.7269091  -4826.05217469]
Reward: -10001  Episode Reward:  -9956
xxxxx
x   x
x   x
xg .x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  971.82523882 -8695.4397473   2060.21130296 -2601.74710518]
------
Step:1, Action:East
State  260
Old Q Values:  [  971.82523882 -8695.4397473   2060.21130296 -2601.74710518]
New Q values:  [  971.82523882 -8695.4397473   1183.40259392 -2601.74710518]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1179.7269091  -4826.05217469]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4847.51883061 26891.51889619]
New Q values:  [-2527.46239811 -8521.23367799  7125.2292465  26891.51889619]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x...x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.07289913 -6569.7824655  -7525.7277781  17269.40571417]
------
Step:3, Action:West
State  288
Old Q Values:  [ 1672.07289913 -6569.7824655  -7525.7277781  17269.40571417]
New Q values:  [ 1672.07289913 -6569.7824655  -7525.7277781  14974.61795453]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7125.2292465  26891.51889619]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7125.2292465  26891.51889619]
New Q values:  [-2527.46239811 -8521.23367799  7125.2292465  10876.80857385]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[402.67005125 -40.34168621 318.38042471 -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [402.67005125 -40.34168621 318.38042471 -35.88578819]
New Q values:  [780.6184298  -40.34168621 318.38042471 -35.88578819]
Reward: 9  Episode Reward:  25
xxxxx
x ..x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812 1148.67321624 2047.16803099    0.        ]
------
Step:6, Action:East
State  183
Old Q Values:  [ 390.42113812 1148.67321624 2047.16803099    0.        ]
New Q values:  [ 390.42113812 1148.67321624 1488.69936317    0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  22.48535485 2214.77383591  549.89931413  753.62201984]
------
Step:7, Action:South
State  198
Old Q Values:  [-2.78872080e-01 -2.89484643e+03  1.26155064e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -6.80462050e+03  1.26155064e+04  0.00000000e+00]
Reward: -10001  Episode Reward:  -9967
xxxxx
x ..x
x  .x
x g x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  132.29412644   96.65038957 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6         396.18005552    5.4           0.        ]
New Q values:  [-180.6         610.48183116    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812 1148.67321624 1488.69936317    0.        ]
------
Step:2, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  3635.01779491     0.        ]
New Q values:  [    0.         -5969.29177534  5244.05902686     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -6.80462050e+03  1.26155064e+04  0.00000000e+00]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  6.16972718e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  5.31944932e+03  1.20371620e+03]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9.48719482e+03 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:4, Action:North
State  210
Old Q Values:  [9.48719482e+03 4.23618905e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [2.45286106e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  36
xxxxx
x .ax
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  4462.45835451  -180.00807518 69094.44237728]
------
Step:5, Action:West
State  138
Old Q Values:  [-139.45925583 5488.74487222 -180.6         792.43582553]
New Q values:  [-139.45925583 5488.74487222 -180.6         737.46486723]
Reward: 9  Episode Reward:  45
xxxxx
x a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   1383.63512341    88.95153278]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   1383.63512341    88.95153278]
New Q values:  [ -281.736      -9545.4473624   2199.47751103    88.95153278]
Reward: -1  Episode Reward:  44
xxxxx
x  ax
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5488.74487222 -180.6         737.46486723]
------
Step:7, Action:South
State  136
Old Q Values:  [-5281.21195651  2056.31721506 -6245.61866138 -1018.33622479]
New Q values:  [-5281.21195651  2249.30177982 -6245.61866138 -1018.33622479]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4757.91631264  1175.87648318 -8489.43729461   531.09593838]
------
Step:8, Action:North
State  216
Old Q Values:  [ 4757.91631264  1175.87648318 -8489.43729461   531.09593838]
New Q values:  [ 3549.18998672  1175.87648318 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5488.74487222 -180.6         737.46486723]
------
Step:9, Action:South
State  136
Old Q Values:  [-5281.21195651  2249.30177982 -6245.61866138 -1018.33622479]
New Q values:  [-5281.21195651  1963.87770794 -6245.61866138 -1018.33622479]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3549.18998672  1175.87648318 -8489.43729461   531.09593838]
------
Step:10, Action:North
State  216
Old Q Values:  [ 3549.18998672  1175.87648318 -8489.43729461   531.09593838]
New Q values:  [-3991.76069293  1175.87648318 -8489.43729461   531.09593838]
Reward: -10001  Episode Reward:  -9960
xxxxx
x  gx
x   x
x.. x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.12823720e+04 8.03350364e+03 2.91043938e+03]
------
Step:1, Action:South
State  196
Old Q Values:  [-2469.90645144   834.11025312   174.55451539     0.        ]
New Q values:  [-2469.90645144   692.96217398   174.55451539     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1179.7269091  -4826.05217469]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7125.2292465  10876.80857385]
New Q values:  [-2527.46239811 -8521.23367799  7347.87708496 10876.80857385]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.07289913 -6569.7824655  -7525.7277781  14974.61795453]
------
Step:3, Action:West
State  288
Old Q Values:  [ 1672.07289913 -6569.7824655  -7525.7277781  14974.61795453]
New Q values:  [ 1672.07289913 -6569.7824655  -7525.7277781   9252.28975397]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7347.87708496 10876.80857385]
------
Step:4, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 32444.20652001 33750.51813314]
New Q values:  [   37.74111519  -168.92307549 32444.20652001 13739.7927822 ]
Reward: 9  Episode Reward:  26
xxxxx
x .gx
x. .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[780.6184298  -40.34168621 318.38042471 -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [780.6184298  -40.34168621 318.38042471 -35.88578819]
New Q values:  [360.14322223 -40.34168621 318.38042471 -35.88578819]
Reward: 9  Episode Reward:  35
xxxxx
x g.x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  141.65283436    98.60358532 -5509.864806    -180.6       ]
------
Step:6, Action:North
State  181
Old Q Values:  [  141.65283436    98.60358532 -5509.864806    -180.6       ]
New Q values:  [  239.20568309    98.60358532 -5509.864806    -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
xa..x
x g.x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         610.48183116    5.4           0.        ]
------
Step:7, Action:South
State  103
Old Q Values:  [-180.6         610.48183116    5.4           0.        ]
New Q values:  [-180.6         315.35443739    5.4           0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  239.20568309    98.60358532 -5509.864806    -180.6       ]
------
Step:8, Action:North
State  181
Old Q Values:  [  239.20568309    98.60358532 -5509.864806    -180.6       ]
New Q values:  [  189.68860445    98.60358532 -5509.864806    -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
xa..x
x g.x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         315.35443739    5.4           0.        ]
------
Step:9, Action:South
State  103
Old Q Values:  [-180.6         315.35443739    5.4           0.        ]
New Q values:  [-180.6         572.15158391    5.4           0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
xa .x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812 1148.67321624 1488.69936317    0.        ]
------
Step:10, Action:East
State  181
Old Q Values:  [  189.68860445    98.60358532 -5509.864806    -180.6       ]
New Q values:  [  189.68860445    98.60358532 -7996.65727021  -180.6       ]
Reward: -10001  Episode Reward:  -9970
xxxxx
x ..x
x g.x
x   x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  128.55549748   31.9495824  -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869  132.29412644   96.65038957 -120.29354603]
New Q values:  [-177.44732869  115.22423191   96.65038957 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  189.68860445    98.60358532 -7996.65727021  -180.6       ]
------
Step:2, Action:North
State  181
Old Q Values:  [  189.68860445    98.60358532 -7996.65727021  -180.6       ]
New Q values:  [  113.84209102    98.60358532 -7996.65727021  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
xa.gx
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  128.55549748   31.9495824  -180.6       ]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869  115.22423191   96.65038957 -120.29354603]
New Q values:  [-177.44732869   79.64232007   96.65038957 -120.29354603]
Reward: -1  Episode Reward:  7
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  113.84209102    98.60358532 -7996.65727021  -180.6       ]
------
Step:4, Action:North
State  181
Old Q Values:  [  113.84209102    98.60358532 -7996.65727021  -180.6       ]
New Q values:  [   73.93195328    98.60358532 -7996.65727021  -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
xa. x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007   96.65038957 -120.29354603]
------
Step:5, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684    54.97281082  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   687.23237764  -180.6       ]
Reward: 9  Episode Reward:  15
xxxxx
x a x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2199.47751103    88.95153278]
------
Step:6, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   377.51148215   -28.6371518 ]
New Q values:  [-9594.56523706 -8069.05606225   739.56790524   -28.6371518 ]
Reward: -1  Episode Reward:  14
xxxxx
xg ax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  1963.87770794 -6245.61866138 -1018.33622479]
------
Step:7, Action:South
State  136
Old Q Values:  [-5281.21195651  1963.87770794 -6245.61866138 -1018.33622479]
New Q values:  [-5281.21195651  1143.71402813 -6245.61866138 -1018.33622479]
Reward: 9  Episode Reward:  23
xxxxx
x g x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-3991.76069293  1175.87648318 -8489.43729461   531.09593838]
------
Step:8, Action:South
State  216
Old Q Values:  [-3991.76069293  1175.87648318 -8489.43729461   531.09593838]
New Q values:  [-3991.76069293  3251.43751946 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  32
xxxxx
x  gx
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.07289913 -6569.7824655  -7525.7277781   9252.28975397]
------
Step:9, Action:West
State  288
Old Q Values:  [ 1672.07289913 -6569.7824655  -7525.7277781   9252.28975397]
New Q values:  [ 1672.07289913 -6569.7824655  -7525.7277781   6969.35847374]
Reward: 9  Episode Reward:  41
xxxxx
x g x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7347.87708496 10876.80857385]
------
Step:10, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7347.87708496 10876.80857385]
New Q values:  [-2527.46239811 -8521.23367799  7347.87708496  4711.14420771]
Reward: 9  Episode Reward:  50
xxxxx
xg  x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  971.82523882 -8695.4397473   1183.40259392 -2601.74710518]
------
Step:11, Action:East
State  261
Old Q Values:  [360.14322223 -40.34168621 318.38042471 -35.88578819]
New Q values:  [ 360.14322223  -40.34168621 2331.11529537  -35.88578819]
Reward: -1  Episode Reward:  49
xxxxx
x g x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7347.87708496  4711.14420771]
------
Step:12, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 32444.20652001 13739.7927822 ]
New Q values:  [   37.74111519  -168.92307549 15067.89015012 13739.7927822 ]
Reward: -1  Episode Reward:  48
xxxxx
x  gx
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.07289913 -6569.7824655  -7525.7277781   6969.35847374]
------
Step:13, Action:West
State  288
Old Q Values:  [ 1672.07289913 -6569.7824655  -7525.7277781   6969.35847374]
New Q values:  [ 1672.07289913 -6569.7824655  -7525.7277781   4991.50651498]
Reward: -1  Episode Reward:  47
xxxxx
x g x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7347.87708496  4711.14420771]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7347.87708496  4711.14420771]
New Q values:  [-2527.46239811 -8521.23367799  4436.00278848  4711.14420771]
Reward: -1  Episode Reward:  46
xxxxx
xg  x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.07289913 -6569.7824655  -7525.7277781   4991.50651498]
------
Step:15, Action:West
State  288
Old Q Values:  [ 1672.07289913 -6569.7824655  -7525.7277781   4991.50651498]
New Q values:  [ 1672.07289913 -6569.7824655  -7525.7277781   3409.34586831]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4436.00278848  4711.14420771]
------
Step:16, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4436.00278848  4711.14420771]
New Q values:  [-2527.46239811 -8521.23367799  4436.00278848 -3761.12153874]
Reward: -10001  Episode Reward:  -9956
xxxxx
x   x
x . x
xg  x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.07289913 -6569.7824655  -7525.7277781   3409.34586831]
------
Step:1, Action:West
State  288
Old Q Values:  [ 1672.07289913 -6569.7824655  -7525.7277781   3409.34586831]
New Q values:  [ 1672.07289913 -6569.7824655  -7525.7277781   2699.93918387]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x ..x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4436.00278848 -3761.12153874]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4436.00278848 -3761.12153874]
New Q values:  [-2527.46239811 -8521.23367799  2583.78287055 -3761.12153874]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.07289913 -6569.7824655  -7525.7277781   2699.93918387]
------
Step:3, Action:West
State  288
Old Q Values:  [ 1672.07289913 -6569.7824655  -7525.7277781   2699.93918387]
New Q values:  [ 1672.07289913 -6569.7824655  -7525.7277781   1854.51053471]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2583.78287055 -3761.12153874]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2583.78287055 -3761.12153874]
New Q values:  [-2527.46239811 -8521.23367799  1589.26630863 -3761.12153874]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.07289913 -6569.7824655  -7525.7277781   1854.51053471]
------
Step:5, Action:West
State  288
Old Q Values:  [ 1672.07289913 -6569.7824655  -7525.7277781   1854.51053471]
New Q values:  [ 1672.07289913 -6569.7824655  -7525.7277781   1217.98410647]
Reward: -1  Episode Reward:  5
xxxxx
xg..x
x ..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1589.26630863 -3761.12153874]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1589.26630863 -3761.12153874]
New Q values:  [-2527.46239811 -8521.23367799  1136.72839319 -3761.12153874]
Reward: -1  Episode Reward:  4
xxxxx
x.g.x
x ..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.07289913 -6569.7824655  -7525.7277781   1217.98410647]
------
Step:7, Action:North
State  288
Old Q Values:  [ 1672.07289913 -6569.7824655  -7525.7277781   1217.98410647]
New Q values:  [ 4019.56077944 -6569.7824655  -7525.7277781   1217.98410647]
Reward: 9  Episode Reward:  13
xxxxx
xg..x
x .ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11151.10539931  4892.48713886   483.97903422 -1455.65174173]
------
Step:8, Action:North
State  208
Old Q Values:  [11151.10539931  4892.48713886   483.97903422 -1455.65174173]
New Q values:  [ 4808.95636816  4892.48713886   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  22
xxxxx
x.gax
x . x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  1143.71402813 -6245.61866138 -1018.33622479]
------
Step:9, Action:South
State  130
Old Q Values:  [36041.91667283  4462.45835451  -180.00807518 69094.44237728]
New Q values:  [36041.91667283  3252.12948346  -180.00807518 69094.44237728]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4808.95636816  4892.48713886   483.97903422 -1455.65174173]
------
Step:10, Action:South
State  208
Old Q Values:  [ 4808.95636816  4892.48713886   483.97903422 -1455.65174173]
New Q values:  [ 4808.95636816  3162.26308938   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  20
xxxxx
x.g x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4019.56077944 -6569.7824655  -7525.7277781   1217.98410647]
------
Step:11, Action:North
State  288
Old Q Values:  [ 4019.56077944 -6569.7824655  -7525.7277781   1217.98410647]
New Q values:  [ 3049.91122223 -6569.7824655  -7525.7277781   1217.98410647]
Reward: -1  Episode Reward:  19
xxxxx
x..gx
x .ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4808.95636816  3162.26308938   483.97903422 -1455.65174173]
------
Step:12, Action:South
State  208
Old Q Values:  [ 4808.95636816  3162.26308938   483.97903422 -1455.65174173]
New Q values:  [ 4808.95636816  2179.27860242   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
x .gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3049.91122223 -6569.7824655  -7525.7277781   1217.98410647]
------
Step:13, Action:West
State  288
Old Q Values:  [ 3049.91122223 -6569.7824655  -7525.7277781   1217.98410647]
New Q values:  [ 3049.91122223 -6569.7824655  -7525.7277781   5006.96068763]
Reward: -1  Episode Reward:  17
xxxxx
x..gx
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 15067.89015012 13739.7927822 ]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1136.72839319 -3761.12153874]
New Q values:  [-2527.46239811 -8521.23367799  1956.17956356 -3761.12153874]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3049.91122223 -6569.7824655  -7525.7277781   5006.96068763]
------
Step:15, Action:West
State  288
Old Q Values:  [ 3049.91122223 -6569.7824655  -7525.7277781   5006.96068763]
New Q values:  [ 3049.91122223 -6569.7824655  -7525.7277781   2589.03814412]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1956.17956356 -3761.12153874]
------
Step:16, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1956.17956356 -3761.12153874]
New Q values:  [-2527.46239811 -8521.23367799  1696.84519209 -3761.12153874]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3049.91122223 -6569.7824655  -7525.7277781   2589.03814412]
------
Step:17, Action:North
State  288
Old Q Values:  [ 3049.91122223 -6569.7824655  -7525.7277781   2589.03814412]
New Q values:  [ 8577.94768091 -6569.7824655  -7525.7277781   2589.03814412]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x .ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.45286106e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:18, Action:North
State  210
Old Q Values:  [2.45286106e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [3.05391770e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  12
xxxxx
x..ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  3252.12948346  -180.00807518 69094.44237728]
------
Step:19, Action:West
State  130
Old Q Values:  [36041.91667283  3252.12948346  -180.00807518 69094.44237728]
New Q values:  [36041.91667283  3252.12948346  -180.00807518 66944.18734697]
Reward: 9  Episode Reward:  21
xxxxx
x.a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  43132.21661867 131003.36798688]
------
Step:20, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624   2199.47751103    88.95153278]
New Q values:  [ -281.736      -9545.4473624   2199.47751103   311.87331119]
Reward: 9  Episode Reward:  30
xxxxx
xa  x
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   14.23378158  902.97566024 -252.78192178]
------
Step:21, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   687.23237764  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   934.13620436  -180.6       ]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2199.47751103   311.87331119]
------
Step:22, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   2199.47751103   311.87331119]
New Q values:  [ -281.736      -9545.4473624   2525.81446608   311.87331119]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5488.74487222 -180.6         737.46486723]
------
Step:23, Action:South
State  138
Old Q Values:  [-139.45925583 5488.74487222 -180.6         737.46486723]
New Q values:  [ -139.45925583 11356.65103965  -180.6          737.46486723]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x .ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.05391770e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:24, Action:North
State  210
Old Q Values:  [3.05391770e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1.56220661e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 11356.65103965  -180.6          737.46486723]
------
Step:25, Action:South
State  138
Old Q Values:  [ -139.45925583 11356.65103965  -180.6          737.46486723]
New Q values:  [-139.45925583 5517.4916717  -180.6         737.46486723]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-3991.76069293  3251.43751946 -8489.43729461   531.09593838]
------
Step:26, Action:South
State  208
Old Q Values:  [ 4808.95636816  2179.27860242   483.97903422 -1455.65174173]
New Q values:  [ 4808.95636816  3444.49574524   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  24
xxxxx
x g x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8577.94768091 -6569.7824655  -7525.7277781   2589.03814412]
------
Step:27, Action:North
State  288
Old Q Values:  [ 8577.94768091 -6569.7824655  -7525.7277781   2589.03814412]
New Q values:  [ 4406.0103282  -6569.7824655  -7525.7277781   2589.03814412]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-3991.76069293  3251.43751946 -8489.43729461   531.09593838]
------
Step:28, Action:South
State  208
Old Q Values:  [ 4808.95636816  3444.49574524   483.97903422 -1455.65174173]
New Q values:  [ 4808.95636816  2699.00139656   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  22
xxxxx
x g x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4406.0103282  -6569.7824655  -7525.7277781   2589.03814412]
------
Step:29, Action:North
State  288
Old Q Values:  [ 4406.0103282  -6569.7824655  -7525.7277781   2589.03814412]
New Q values:  [ 3204.49104173 -6569.7824655  -7525.7277781   2589.03814412]
Reward: -1  Episode Reward:  21
xxxxx
x  gx
x .ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4808.95636816  2699.00139656   483.97903422 -1455.65174173]
------
Step:30, Action:South
State  208
Old Q Values:  [ 4808.95636816  2699.00139656   483.97903422 -1455.65174173]
New Q values:  [ 4808.95636816  2040.34787114   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  20
xxxxx
x g x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3204.49104173 -6569.7824655  -7525.7277781   2589.03814412]
------
Step:31, Action:North
State  288
Old Q Values:  [ 3204.49104173 -6569.7824655  -7525.7277781   2589.03814412]
New Q values:  [ 2256.62767253 -6569.7824655  -7525.7277781   2589.03814412]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-3991.76069293  3251.43751946 -8489.43729461   531.09593838]
------
Step:32, Action:South
State  208
Old Q Values:  [ 4808.95636816  2040.34787114   483.97903422 -1455.65174173]
New Q values:  [ 4808.95636816  1592.25059169   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  18
xxxxx
x   x
x .gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2256.62767253 -6569.7824655  -7525.7277781   2589.03814412]
------
Step:33, Action:West
State  288
Old Q Values:  [ 2256.62767253 -6569.7824655  -7525.7277781   2589.03814412]
New Q values:  [ 2256.62767253 -6569.7824655  -7525.7277781   5555.38230269]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x . x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 15067.89015012 13739.7927822 ]
------
Step:34, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 15067.89015012 13739.7927822 ]
New Q values:  [   37.74111519  -168.92307549 15067.89015012  6200.65170149]
Reward: 9  Episode Reward:  26
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 360.14322223  -40.34168621 2331.11529537  -35.88578819]
------
Step:35, Action:East
State  261
Old Q Values:  [ 360.14322223  -40.34168621 2331.11529537  -35.88578819]
New Q values:  [ 360.14322223  -40.34168621 5452.21316319  -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 15067.89015012  6200.65170149]
------
Step:36, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1696.84519209 -3761.12153874]
New Q values:  [-2527.46239811 -8521.23367799  1696.84519209   130.61533346]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 360.14322223  -40.34168621 5452.21316319  -35.88578819]
------
Step:37, Action:North
State  260
Old Q Values:  [  971.82523882 -8695.4397473   1183.40259392 -2601.74710518]
New Q values:  [ 1961.34780358 -8695.4397473   1183.40259392 -2601.74710518]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  5244.05902686     0.        ]
------
Step:38, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 28165.72479948     0.        ]
New Q values:  [    0.          4614.46100011 72867.52471477     0.        ]
Reward: 100009  Episode Reward:  100032
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  1143.71402813 -6245.61866138 -1018.33622479]
------
Step:1, Action:South
State  136
Old Q Values:  [-5281.21195651  1143.71402813 -6245.61866138 -1018.33622479]
New Q values:  [-5281.21195651  1905.5725217  -6245.61866138 -1018.33622479]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4808.95636816  1592.25059169   483.97903422 -1455.65174173]
------
Step:2, Action:South
State  208
Old Q Values:  [ 4808.95636816  1592.25059169   483.97903422 -1455.65174173]
New Q values:  [ 4808.95636816  2308.91492748   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2256.62767253 -6569.7824655  -7525.7277781   5555.38230269]
------
Step:3, Action:West
State  288
Old Q Values:  [ 2256.62767253 -6569.7824655  -7525.7277781   5555.38230269]
New Q values:  [ 2256.62767253 -6569.7824655  -7525.7277781   2736.6064787 ]
Reward: 9  Episode Reward:  27
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1696.84519209   130.61533346]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1696.84519209   130.61533346]
New Q values:  [-2527.46239811 -8521.23367799  1499.12002045   130.61533346]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2256.62767253 -6569.7824655  -7525.7277781   2736.6064787 ]
------
Step:5, Action:West
State  288
Old Q Values:  [ 2256.62767253 -6569.7824655  -7525.7277781   2736.6064787 ]
New Q values:  [ 2256.62767253 -6569.7824655  -7525.7277781   1543.77859762]
Reward: -1  Episode Reward:  25
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1499.12002045   130.61533346]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1499.12002045   130.61533346]
New Q values:  [-2527.46239811 -8521.23367799  1276.03630994   130.61533346]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2256.62767253 -6569.7824655  -7525.7277781   1543.77859762]
------
Step:7, Action:North
State  288
Old Q Values:  [ 2256.62767253 -6569.7824655  -7525.7277781   1543.77859762]
New Q values:  [ 2344.73797946 -6569.7824655  -7525.7277781   1543.77859762]
Reward: -1  Episode Reward:  23
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4808.95636816  2308.91492748   483.97903422 -1455.65174173]
------
Step:8, Action:North
State  208
Old Q Values:  [ 4808.95636816  2308.91492748   483.97903422 -1455.65174173]
New Q values:  [ 2494.65430378  2308.91492748   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  22
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  1905.5725217  -6245.61866138 -1018.33622479]
------
Step:9, Action:South
State  136
Old Q Values:  [-5281.21195651  1905.5725217  -6245.61866138 -1018.33622479]
New Q values:  [-5281.21195651  1510.02529981 -6245.61866138 -1018.33622479]
Reward: -1  Episode Reward:  21
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2494.65430378  2308.91492748   483.97903422 -1455.65174173]
------
Step:10, Action:North
State  208
Old Q Values:  [ 2494.65430378  2308.91492748   483.97903422 -1455.65174173]
New Q values:  [ 2652.50922302  2308.91492748   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5517.4916717  -180.6         737.46486723]
------
Step:11, Action:South
State  136
Old Q Values:  [-5281.21195651  1510.02529981 -6245.61866138 -1018.33622479]
New Q values:  [-5281.21195651  1399.16288683 -6245.61866138 -1018.33622479]
Reward: -1  Episode Reward:  19
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2652.50922302  2308.91492748   483.97903422 -1455.65174173]
------
Step:12, Action:North
State  208
Old Q Values:  [ 2652.50922302  2308.91492748   483.97903422 -1455.65174173]
New Q values:  [ 2715.65119072  2308.91492748   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  18
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5517.4916717  -180.6         737.46486723]
------
Step:13, Action:South
State  138
Old Q Values:  [-139.45925583 5517.4916717  -180.6         737.46486723]
New Q values:  [-139.45925583 3021.09202589 -180.6         737.46486723]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2715.65119072  2308.91492748   483.97903422 -1455.65174173]
------
Step:14, Action:North
State  208
Old Q Values:  [ 2715.65119072  2308.91492748   483.97903422 -1455.65174173]
New Q values:  [ 1505.40934234  2308.91492748   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  16
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  1399.16288683 -6245.61866138 -1018.33622479]
------
Step:15, Action:South
State  136
Old Q Values:  [-5281.21195651  1399.16288683 -6245.61866138 -1018.33622479]
New Q values:  [-5281.21195651  1251.73963298 -6245.61866138 -1018.33622479]
Reward: -1  Episode Reward:  15
xxxxx
x. gx
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1505.40934234  2308.91492748   483.97903422 -1455.65174173]
------
Step:16, Action:South
State  208
Old Q Values:  [ 1505.40934234  2308.91492748   483.97903422 -1455.65174173]
New Q values:  [ 1505.40934234  1626.38736483   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2344.73797946 -6569.7824655  -7525.7277781   1543.77859762]
------
Step:17, Action:West
State  288
Old Q Values:  [ 2344.73797946 -6569.7824655  -7525.7277781   1543.77859762]
New Q values:  [ 2344.73797946 -6569.7824655  -7525.7277781    999.72233203]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1276.03630994   130.61533346]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1276.03630994   130.61533346]
New Q values:  [-2527.46239811 -8521.23367799  1213.23591781   130.61533346]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2344.73797946 -6569.7824655  -7525.7277781    999.72233203]
------
Step:19, Action:North
State  288
Old Q Values:  [ 2344.73797946 -6569.7824655  -7525.7277781    999.72233203]
New Q values:  [ 5623.91502166 -6569.7824655  -7525.7277781    999.72233203]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.56220661e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:20, Action:North
State  208
Old Q Values:  [ 1505.40934234  1626.38736483   483.97903422 -1455.65174173]
New Q values:  [ 1507.8913447   1626.38736483   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  10
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 3021.09202589 -180.6         737.46486723]
------
Step:21, Action:South
State  136
Old Q Values:  [-5281.21195651  1251.73963298 -6245.61866138 -1018.33622479]
New Q values:  [-5281.21195651   988.01206264 -6245.61866138 -1018.33622479]
Reward: -1  Episode Reward:  9
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1507.8913447   1626.38736483   483.97903422 -1455.65174173]
------
Step:22, Action:South
State  208
Old Q Values:  [ 1507.8913447   1626.38736483   483.97903422 -1455.65174173]
New Q values:  [ 1507.8913447   2337.12945243   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  8
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5623.91502166 -6569.7824655  -7525.7277781    999.72233203]
------
Step:23, Action:North
State  288
Old Q Values:  [ 5623.91502166 -6569.7824655  -7525.7277781    999.72233203]
New Q values:  [ 2950.10484439 -6569.7824655  -7525.7277781    999.72233203]
Reward: -1  Episode Reward:  7
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1507.8913447   2337.12945243   483.97903422 -1455.65174173]
------
Step:24, Action:South
State  208
Old Q Values:  [ 1507.8913447   2337.12945243   483.97903422 -1455.65174173]
New Q values:  [ 1507.8913447   1819.28323429   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  6
xxxxx
xg  x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2950.10484439 -6569.7824655  -7525.7277781    999.72233203]
------
Step:25, Action:North
State  288
Old Q Values:  [ 2950.10484439 -6569.7824655  -7525.7277781    999.72233203]
New Q values:  [ 1725.22690804 -6569.7824655  -7525.7277781    999.72233203]
Reward: -1  Episode Reward:  5
xxxxx
x.  x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1507.8913447   1819.28323429   483.97903422 -1455.65174173]
------
Step:26, Action:South
State  208
Old Q Values:  [ 1507.8913447   1819.28323429   483.97903422 -1455.65174173]
New Q values:  [ 1507.8913447   1244.68136613   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  4
xxxxx
x.  x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1725.22690804 -6569.7824655  -7525.7277781    999.72233203]
------
Step:27, Action:North
State  288
Old Q Values:  [ 1725.22690804 -6569.7824655  -7525.7277781    999.72233203]
New Q values:  [ 1141.85816663 -6569.7824655  -7525.7277781    999.72233203]
Reward: -1  Episode Reward:  3
xxxxx
x.g x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1507.8913447   1244.68136613   483.97903422 -1455.65174173]
------
Step:28, Action:North
State  208
Old Q Values:  [ 1507.8913447   1244.68136613   483.97903422 -1455.65174173]
New Q values:  [  898.96015667  1244.68136613   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  2
xxxxx
xg ax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   988.01206264 -6245.61866138 -1018.33622479]
------
Step:29, Action:South
State  138
Old Q Values:  [-139.45925583 3021.09202589 -180.6         737.46486723]
New Q values:  [-139.45925583 1581.2412202  -180.6         737.46486723]
Reward: -1  Episode Reward:  1
xxxxx
x.  x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  898.96015667  1244.68136613   483.97903422 -1455.65174173]
------
Step:30, Action:South
State  208
Old Q Values:  [  898.96015667  1244.68136613   483.97903422 -1455.65174173]
New Q values:  [  898.96015667   839.82999644   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  0
xxxxx
x.  x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1141.85816663 -6569.7824655  -7525.7277781    999.72233203]
------
Step:31, Action:North
State  288
Old Q Values:  [ 1141.85816663 -6569.7824655  -7525.7277781    999.72233203]
New Q values:  [-5274.16868635 -6569.7824655  -7525.7277781    999.72233203]
Reward: -10001  Episode Reward:  -10001
xxxxx
x.  x
x..gx
x.  x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007   96.65038957 -120.29354603]
------
Step:1, Action:East
State  107
Old Q Values:  [-252.35169558   14.23378158  902.97566024 -252.78192178]
New Q values:  [-252.35169558   14.23378158 1124.33460392 -252.78192178]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2525.81446608   311.87331119]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   2525.81446608   311.87331119]
New Q values:  [ -281.736      -9545.4473624   1490.09815249   311.87331119]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
x...x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1581.2412202  -180.6         737.46486723]
------
Step:3, Action:South
State  138
Old Q Values:  [-139.45925583 1581.2412202  -180.6         737.46486723]
New Q values:  [-139.45925583 1613.32774392 -180.6         737.46486723]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-3991.76069293  3251.43751946 -8489.43729461   531.09593838]
------
Step:4, Action:South
State  216
Old Q Values:  [-3991.76069293  3251.43751946 -8489.43729461   531.09593838]
New Q values:  [-3991.76069293  1599.89170739 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -6569.7824655  -7525.7277781    999.72233203]
------
Step:5, Action:West
State  288
Old Q Values:  [-5274.16868635 -6569.7824655  -7525.7277781    999.72233203]
New Q values:  [-5274.16868635 -6569.7824655  -7525.7277781    769.25970816]
Reward: 9  Episode Reward:  35
xxxxx
x   x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1213.23591781   130.61533346]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1213.23591781   130.61533346]
New Q values:  [-2527.46239811 -8521.23367799   715.47227957   130.61533346]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -6569.7824655  -7525.7277781    769.25970816]
------
Step:7, Action:West
State  288
Old Q Values:  [-5274.16868635 -6569.7824655  -7525.7277781    769.25970816]
New Q values:  [-5274.16868635 -6569.7824655  -7525.7277781    521.74556713]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   715.47227957   130.61533346]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   715.47227957   130.61533346]
New Q values:  [-2527.46239811 -8521.23367799   442.11258197   130.61533346]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -6569.7824655  -7525.7277781    521.74556713]
------
Step:9, Action:West
State  288
Old Q Values:  [-5274.16868635 -6569.7824655  -7525.7277781    521.74556713]
New Q values:  [-5274.16868635 -6569.7824655  -7525.7277781    340.73200144]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   442.11258197   130.61533346]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   442.11258197   130.61533346]
New Q values:  [-2527.46239811 -8521.23367799   278.46463322   130.61533346]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -6569.7824655  -7525.7277781    340.73200144]
------
Step:11, Action:West
State  288
Old Q Values:  [-5274.16868635 -6569.7824655  -7525.7277781    340.73200144]
New Q values:  [-5274.16868635 -6569.7824655  -7525.7277781  -5780.76780946]
Reward: -10001  Episode Reward:  -9971
xxxxx
x   x
x.. x
x.g x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 15067.89015012  6200.65170149]
------
Step:1, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 15067.89015012  6200.65170149]
New Q values:  [   37.74111519  -168.92307549 15067.89015012  4121.32462955]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 360.14322223  -40.34168621 5452.21316319  -35.88578819]
------
Step:2, Action:East
State  261
Old Q Values:  [ 360.14322223  -40.34168621 5452.21316319  -35.88578819]
New Q values:  [ 360.14322223  -40.34168621 6700.65231031  -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 15067.89015012  4121.32462955]
------
Step:3, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 15067.89015012  4121.32462955]
New Q values:  [  37.74111519 -168.92307549 4444.30545415 4121.32462955]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -6569.7824655  -7525.7277781  -5780.76780946]
------
Step:4, Action:West
State  288
Old Q Values:  [-5274.16868635 -6569.7824655  -7525.7277781  -5780.76780946]
New Q values:  [-5274.16868635 -6569.7824655  -7525.7277781  -2229.36773382]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x.g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   278.46463322   130.61533346]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   278.46463322   130.61533346]
New Q values:  [-2527.46239811 -8521.23367799  -558.02446686   130.61533346]
Reward: -1  Episode Reward:  5
xxxxx
x.g.x
x...x
x  ax
xxxxx
Step:6, Action:South
State  288
Old Q Values:  [-5274.16868635 -6569.7824655  -7525.7277781  -2229.36773382]
New Q values:  [-5274.16868635 -3477.32330635 -7525.7277781  -2229.36773382]
Reward: -301  Episode Reward:  -296
xxxxx
x...x
x.g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -3477.32330635 -7525.7277781  -2229.36773382]
------
Step:7, Action:West
State  288
Old Q Values:  [-5274.16868635 -3477.32330635 -7525.7277781  -2229.36773382]
New Q values:  [-5274.16868635 -3477.32330635 -7525.7277781   -853.16249349]
Reward: -1  Episode Reward:  -297
xxxxx
x...x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  -558.02446686   130.61533346]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  -558.02446686   130.61533346]
New Q values:  [-2527.46239811 -8521.23367799  -558.02446686 -5359.94952554]
Reward: -10001  Episode Reward:  -10298
xxxxx
x...x
x...x
xg  x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4444.30545415 4121.32462955]
------
Step:1, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 4444.30545415 4121.32462955]
New Q values:  [  37.74111519 -168.92307549 1527.17343361 4121.32462955]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -3477.32330635 -7525.7277781   -853.16249349]
------
Step:2, Action:West
State  288
Old Q Values:  [-5274.16868635 -3477.32330635 -7525.7277781   -853.16249349]
New Q values:  [-5274.16868635 -3477.32330635 -7525.7277781    894.53239147]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1527.17343361 4121.32462955]
------
Step:3, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 1527.17343361 4121.32462955]
New Q values:  [   37.74111519  -168.92307549  1527.17343361 17190.81336189]
Reward: 9  Episode Reward:  17
xxxxx
x.. x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[51789.61170024  2256.66526474  6585.91095232  1875.31501677]
------
Step:4, Action:North
State  261
Old Q Values:  [ 360.14322223  -40.34168621 6700.65231031  -35.88578819]
New Q values:  [ 179.03836449  -40.34168621 6700.65231031  -35.88578819]
Reward: 9  Episode Reward:  26
xxxxx
x..gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   73.93195328    98.60358532 -7996.65727021  -180.6       ]
------
Step:5, Action:South
State  181
Old Q Values:  [   73.93195328    98.60358532 -7996.65727021  -180.6       ]
New Q values:  [   73.93195328  2049.03712722 -7996.65727021  -180.6       ]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 179.03836449  -40.34168621 6700.65231031  -35.88578819]
------
Step:6, Action:East
State  261
Old Q Values:  [ 179.03836449  -40.34168621 6700.65231031  -35.88578819]
New Q values:  [ 179.03836449  -40.34168621 7836.90493269  -35.88578819]
Reward: -1  Episode Reward:  24
xxxxx
x..gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  1527.17343361 17190.81336189]
------
Step:7, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  1527.17343361 17190.81336189]
New Q values:  [  37.74111519 -168.92307549 1527.17343361 9226.79682457]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 179.03836449  -40.34168621 7836.90493269  -35.88578819]
------
Step:8, Action:East
State  261
Old Q Values:  [ 179.03836449  -40.34168621 7836.90493269  -35.88578819]
New Q values:  [ 179.03836449  -40.34168621 5902.20102045  -35.88578819]
Reward: -1  Episode Reward:  22
xxxxx
x..gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1527.17343361 9226.79682457]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  -558.02446686 -5359.94952554]
New Q values:  [-2527.46239811 -8521.23367799  -558.02446686  -373.91950408]
Reward: -1  Episode Reward:  21
xxxxx
x.g x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 179.03836449  -40.34168621 5902.20102045  -35.88578819]
------
Step:10, Action:East
State  261
Old Q Values:  [ 179.03836449  -40.34168621 5902.20102045  -35.88578819]
New Q values:  [ 179.03836449  -40.34168621 5128.31945555  -35.88578819]
Reward: -1  Episode Reward:  20
xxxxx
x..gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1527.17343361 9226.79682457]
------
Step:11, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  -558.02446686  -373.91950408]
New Q values:  [-2527.46239811 -8521.23367799  -558.02446686  1388.32803503]
Reward: -1  Episode Reward:  19
xxxxx
x.g x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 179.03836449  -40.34168621 5128.31945555  -35.88578819]
------
Step:12, Action:East
State  261
Old Q Values:  [ 179.03836449  -40.34168621 5128.31945555  -35.88578819]
New Q values:  [ 179.03836449  -40.34168621 2467.22619273  -35.88578819]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  -558.02446686  1388.32803503]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  -558.02446686  1388.32803503]
New Q values:  [-2527.46239811 -8521.23367799  -558.02446686  1143.13555509]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1961.34780358 -8695.4397473   1183.40259392 -2601.74710518]
------
Step:14, Action:East
State  260
Old Q Values:  [ 1961.34780358 -8695.4397473   1183.40259392 -2601.74710518]
New Q values:  [ 1961.34780358 -8695.4397473    815.70170409 -2601.74710518]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x ..x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  -558.02446686  1143.13555509]
------
Step:15, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  -558.02446686  1143.13555509]
New Q values:  [-2527.46239811 -8521.23367799    44.5499307   1143.13555509]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x ..x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -3477.32330635 -7525.7277781    894.53239147]
------
Step:16, Action:South
State  288
Old Q Values:  [-5274.16868635 -3477.32330635 -7525.7277781    894.53239147]
New Q values:  [-5274.16868635 -1303.1696051  -7525.7277781    894.53239147]
Reward: -301  Episode Reward:  -286
xxxxx
x.. x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -1303.1696051  -7525.7277781    894.53239147]
------
Step:17, Action:West
State  288
Old Q Values:  [-5274.16868635 -1303.1696051  -7525.7277781    894.53239147]
New Q values:  [-5274.16868635 -1303.1696051  -7525.7277781    700.15362311]
Reward: -1  Episode Reward:  -287
xxxxx
x.. x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799    44.5499307   1143.13555509]
------
Step:18, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    44.5499307   1143.13555509]
New Q values:  [-2527.46239811 -8521.23367799    44.5499307  -4954.94143689]
Reward: -10001  Episode Reward:  -10288
xxxxx
x.. x
x ..x
xg  x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1613.32774392 -180.6         737.46486723]
------
Step:1, Action:West
State  138
Old Q Values:  [-139.45925583 1613.32774392 -180.6         737.46486723]
New Q values:  [-139.45925583 1613.32774392 -180.6         747.41539264]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   1490.09815249   311.87331119]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   1490.09815249   311.87331119]
New Q values:  [ -281.736      -9545.4473624   1079.43758417   311.87331119]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1613.32774392 -180.6         747.41539264]
------
Step:3, Action:South
State  138
Old Q Values:  [-139.45925583 1613.32774392 -180.6         747.41539264]
New Q values:  [-139.45925583 5331.35092744 -180.6         747.41539264]
Reward: -1  Episode Reward:  7
xxxxx
x.  x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.56220661e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:4, Action:North
State  216
Old Q Values:  [-3991.76069293  1599.89170739 -8489.43729461   531.09593838]
New Q values:  [ 2.10100106e+00  1.59989171e+03 -8.48943729e+03  5.31095938e+02]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5331.35092744 -180.6         747.41539264]
------
Step:5, Action:South
State  136
Old Q Values:  [-5281.21195651   988.01206264 -6245.61866138 -1018.33622479]
New Q values:  [-5281.21195651   874.57233727 -6245.61866138 -1018.33622479]
Reward: -1  Episode Reward:  5
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2.10100106e+00  1.59989171e+03 -8.48943729e+03  5.31095938e+02]
------
Step:6, Action:South
State  208
Old Q Values:  [  898.96015667   839.82999644   483.97903422 -1455.65174173]
New Q values:  [  898.96015667   551.37808551   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  14
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -1303.1696051  -7525.7277781    700.15362311]
------
Step:7, Action:West
State  288
Old Q Values:  [-5274.16868635 -1303.1696051  -7525.7277781    700.15362311]
New Q values:  [-5274.16868635 -1303.1696051  -7525.7277781    298.82642846]
Reward: 9  Episode Reward:  23
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799    44.5499307  -4954.94143689]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    44.5499307  -4954.94143689]
New Q values:  [-2527.46239811 -8521.23367799   106.86790082 -4954.94143689]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -1303.1696051  -7525.7277781    298.82642846]
------
Step:9, Action:West
State  288
Old Q Values:  [-5274.16868635 -1303.1696051  -7525.7277781    298.82642846]
New Q values:  [-5274.16868635 -1303.1696051  -7525.7277781    150.99094163]
Reward: -1  Episode Reward:  21
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   106.86790082 -4954.94143689]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   106.86790082 -4954.94143689]
New Q values:  [-2527.46239811 -8521.23367799    87.44444281 -4954.94143689]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -1303.1696051  -7525.7277781    150.99094163]
------
Step:11, Action:West
State  288
Old Q Values:  [-5274.16868635 -1303.1696051  -7525.7277781    150.99094163]
New Q values:  [-5274.16868635 -1303.1696051  -7525.7277781     86.0297095 ]
Reward: -1  Episode Reward:  19
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799    87.44444281 -4954.94143689]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    87.44444281 -4954.94143689]
New Q values:  [-2527.46239811 -8521.23367799    60.18668997 -4954.94143689]
Reward: -1  Episode Reward:  18
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -1303.1696051  -7525.7277781     86.0297095 ]
------
Step:13, Action:West
State  288
Old Q Values:  [-5274.16868635 -1303.1696051  -7525.7277781     86.0297095 ]
New Q values:  [-5274.16868635 -1303.1696051  -7525.7277781     51.86789079]
Reward: -1  Episode Reward:  17
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799    60.18668997 -4954.94143689]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    60.18668997 -4954.94143689]
New Q values:  [-2527.46239811 -8521.23367799    39.03504323 -4954.94143689]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -1303.1696051  -7525.7277781     51.86789079]
------
Step:15, Action:West
State  288
Old Q Values:  [-5274.16868635 -1303.1696051  -7525.7277781     51.86789079]
New Q values:  [-5274.16868635 -1303.1696051  -7525.7277781     31.85766928]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x.. x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799    39.03504323 -4954.94143689]
------
Step:16, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    39.03504323 -4954.94143689]
New Q values:  [-2527.46239811 -8521.23367799    24.57131808 -4954.94143689]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -1303.1696051  -7525.7277781     31.85766928]
------
Step:17, Action:South
State  288
Old Q Values:  [-5274.16868635 -1303.1696051  -7525.7277781     31.85766928]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781     31.85766928]
Reward: -301  Episode Reward:  -287
xxxxx
x.  x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781     31.85766928]
------
Step:18, Action:West
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781     31.85766928]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781     19.51446314]
Reward: -1  Episode Reward:  -288
xxxxx
x.g x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799    24.57131808 -4954.94143689]
------
Step:19, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 1527.17343361 9226.79682457]
New Q values:  [  37.74111519 -168.92307549  616.12371239 9226.79682457]
Reward: -1  Episode Reward:  -289
xxxxx
x. gx
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781     19.51446314]
------
Step:20, Action:West
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781     19.51446314]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781     14.57718068]
Reward: -1  Episode Reward:  -290
xxxxx
x.g x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799    24.57131808 -4954.94143689]
------
Step:21, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  616.12371239 9226.79682457]
New Q values:  [  37.74111519 -168.92307549  250.22263916 9226.79682457]
Reward: -1  Episode Reward:  -291
xxxxx
x. gx
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781     14.57718068]
------
Step:22, Action:West
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781     14.57718068]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781     12.60226769]
Reward: -1  Episode Reward:  -292
xxxxx
x.g x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799    24.57131808 -4954.94143689]
------
Step:23, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  250.22263916 9226.79682457]
New Q values:  [  37.74111519 -168.92307549  103.26973597 9226.79682457]
Reward: -1  Episode Reward:  -293
xxxxx
x. gx
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781     12.60226769]
------
Step:24, Action:West
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781     12.60226769]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781     11.8123025 ]
Reward: -1  Episode Reward:  -294
xxxxx
x.g x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799    24.57131808 -4954.94143689]
------
Step:25, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    24.57131808 -4954.94143689]
New Q values:  [-2527.46239811 -8521.23367799    12.77221798 -4954.94143689]
Reward: -1  Episode Reward:  -295
xxxxx
x.  x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781     11.8123025 ]
------
Step:26, Action:West
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781     11.8123025 ]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781      7.95658639]
Reward: -1  Episode Reward:  -296
xxxxx
x.  x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799    12.77221798 -4954.94143689]
------
Step:27, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799    12.77221798 -4954.94143689]
New Q values:  [-2.52746240e+03 -8.52123368e+03  6.89586311e+00 -4.95494144e+03]
Reward: -1  Episode Reward:  -297
xxxxx
x.  x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781      7.95658639]
------
Step:28, Action:West
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781      7.95658639]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781   2770.62168193]
Reward: -1  Episode Reward:  -298
xxxxx
x.  x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  103.26973597 9226.79682457]
------
Step:29, Action:West
State  272
Old Q Values:  [-2.52746240e+03 -8.52123368e+03  6.89586311e+00 -4.95494144e+03]
New Q values:  [-2.52746240e+03 -8.52123368e+03  6.89586311e+00  1.35603069e+04]
Reward: 9  Episode Reward:  -289
xxxxx
x.  x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[51789.61170024  2256.66526474  6585.91095232  1875.31501677]
------
Step:30, Action:North
State  257
Old Q Values:  [51789.61170024  2256.66526474  6585.91095232  1875.31501677]
New Q values:  [42581.50209453  2256.66526474  6585.91095232  1875.31501677]
Reward: 9  Episode Reward:  -280
xxxxx
x.  x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.          4614.46100011 72867.52471477     0.        ]
------
Step:31, Action:East
State  177
Old Q Values:  [78394.48547832 37619.68428217 67020.65338209     0.        ]
New Q values:  [78394.48547832 37619.68428217 24198.37293974     0.        ]
Reward: -9991  Episode Reward:  -10271
xxxxx
x.  x
x g x
x   x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.56220661e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:1, Action:North
State  210
Old Q Values:  [1.56220661e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [7.85363172e+03 4.23618905e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 5331.35092744 -180.6         747.41539264]
------
Step:2, Action:South
State  138
Old Q Values:  [-139.45925583 5331.35092744 -180.6         747.41539264]
New Q values:  [-139.45925583 4488.02988639 -180.6         747.41539264]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x..ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7.85363172e+03 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [7.85363172e+03 4.23618905e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [4.48726165e+03 4.23618905e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4488.02988639 -180.6         747.41539264]
------
Step:4, Action:South
State  138
Old Q Values:  [-139.45925583 4488.02988639 -180.6         747.41539264]
New Q values:  [-139.45925583 3140.7904505  -180.6         747.41539264]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x..ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.48726165e+03 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:5, Action:North
State  208
Old Q Values:  [  898.96015667   551.37808551   483.97903422 -1455.65174173]
New Q values:  [ 1301.22119782   551.37808551   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 3140.7904505  -180.6         747.41539264]
------
Step:6, Action:West
State  136
Old Q Values:  [-5281.21195651   874.57233727 -6245.61866138 -1018.33622479]
New Q values:  [-5281.21195651   874.57233727 -6245.61866138  -345.75939501]
Reward: 9  Episode Reward:  14
xxxxx
x.agx
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9455.07588649   187.25031635]
------
Step:7, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   739.56790524   -28.6371518 ]
New Q values:  [-9594.56523706 -8069.05606225   739.56790524    32.51178852]
Reward: 9  Episode Reward:  23
xxxxx
xag x
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  128.55549748   31.9495824  -180.6       ]
------
Step:8, Action:South
State  109
Old Q Values:  [-241.10880094  128.55549748   31.9495824  -180.6       ]
New Q values:  [-241.10880094  387.61562203   31.9495824  -180.6       ]
Reward: 9  Episode Reward:  32
xxxxx
x  gx
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294 1102.64474348  964.34635147  940.95197235]
------
Step:9, Action:South
State  189
Old Q Values:  [   9.84673294 1102.64474348  964.34635147  940.95197235]
New Q values:  [   9.84673294 1180.62575521  964.34635147  940.95197235]
Reward: -1  Episode Reward:  31
xxxxx
x g x
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 179.03836449  -40.34168621 2467.22619273  -35.88578819]
------
Step:10, Action:East
State  261
Old Q Values:  [ 179.03836449  -40.34168621 2467.22619273  -35.88578819]
New Q values:  [ 179.03836449  -40.34168621 3760.32952446  -35.88578819]
Reward: 9  Episode Reward:  40
xxxxx
x  gx
x . x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  103.26973597 9226.79682457]
------
Step:11, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  103.26973597 9226.79682457]
New Q values:  [  37.74111519 -168.92307549  103.26973597 4818.21758716]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
x . x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 179.03836449  -40.34168621 3760.32952446  -35.88578819]
------
Step:12, Action:East
State  261
Old Q Values:  [ 179.03836449  -40.34168621 3760.32952446  -35.88578819]
New Q values:  [ 179.03836449  -40.34168621 2948.99708593  -35.88578819]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x .gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  103.26973597 4818.21758716]
------
Step:13, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  103.26973597 4818.21758716]
New Q values:  [  37.74111519 -168.92307549  103.26973597 2811.38616065]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
x . x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 179.03836449  -40.34168621 2948.99708593  -35.88578819]
------
Step:14, Action:East
State  261
Old Q Values:  [ 179.03836449  -40.34168621 2948.99708593  -35.88578819]
New Q values:  [ 179.03836449  -40.34168621 5247.09091497  -35.88578819]
Reward: -1  Episode Reward:  36
xxxxx
x g x
x . x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2.52746240e+03 -8.52123368e+03  6.89586311e+00  1.35603069e+04]
------
Step:15, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  103.26973597 2811.38616065]
New Q values:  [  37.74111519 -168.92307549  103.26973597 2698.08173875]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
x . x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 179.03836449  -40.34168621 5247.09091497  -35.88578819]
------
Step:16, Action:East
State  261
Old Q Values:  [ 179.03836449  -40.34168621 5247.09091497  -35.88578819]
New Q values:  [ 179.03836449  -40.34168621 6166.32844658  -35.88578819]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x . x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2.52746240e+03 -8.52123368e+03  6.89586311e+00  1.35603069e+04]
------
Step:17, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  103.26973597 2698.08173875]
New Q values:  [  37.74111519 -168.92307549  103.26973597 2928.53122947]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
x . x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 179.03836449  -40.34168621 6166.32844658  -35.88578819]
------
Step:18, Action:East
State  261
Old Q Values:  [ 179.03836449  -40.34168621 6166.32844658  -35.88578819]
New Q values:  [ 179.03836449  -40.34168621 6534.02345923  -35.88578819]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x . x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2.52746240e+03 -8.52123368e+03  6.89586311e+00  1.35603069e+04]
------
Step:19, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1179.7269091  -4826.05217469]
New Q values:  [   16.82637525 -5807.06396197  1179.7269091     29.18616789]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 179.03836449  -40.34168621 6534.02345923  -35.88578819]
------
Step:20, Action:East
State  261
Old Q Values:  [ 179.03836449  -40.34168621 6534.02345923  -35.88578819]
New Q values:  [179.03836449 -40.34168621 681.10146429 -35.88578819]
Reward: -10001  Episode Reward:  -9970
xxxxx
x   x
x . x
x g.x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  5.31944932e+03  1.20371620e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.12823720e+04 8.03350364e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.12823720e+04 3.60916781e+03 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1301.22119782   551.37808551   483.97903422 -1455.65174173]
------
Step:2, Action:North
State  208
Old Q Values:  [ 1301.22119782   551.37808551   483.97903422 -1455.65174173]
New Q values:  [  788.26018031   551.37808551   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
xg.ax
x.  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   874.57233727 -6245.61866138  -345.75939501]
------
Step:3, Action:South
State  136
Old Q Values:  [-5281.21195651   874.57233727 -6245.61866138  -345.75939501]
New Q values:  [-5281.21195651   585.706989   -6245.61866138  -345.75939501]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x. ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  788.26018031   551.37808551   483.97903422 -1455.65174173]
------
Step:4, Action:North
State  208
Old Q Values:  [  788.26018031   551.37808551   483.97903422 -1455.65174173]
New Q values:  [-5509.58383118   551.37808551   483.97903422 -1455.65174173]
Reward: -10001  Episode Reward:  -9984
xxxxx
x..gx
x.  x
x ..x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[179.03836449 -40.34168621 681.10146429 -35.88578819]
------
Step:1, Action:North
State  260
Old Q Values:  [ 1961.34780358 -8695.4397473    815.70170409 -2601.74710518]
New Q values:  [ 2363.15682949 -8695.4397473    815.70170409 -2601.74710518]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  5244.05902686     0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 390.42113812 1148.67321624 1488.69936317    0.        ]
New Q values:  [ 390.42113812 1148.67321624 2196.71454024    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  5.31944932e+03  1.20371620e+03]
------
Step:3, Action:East
State  195
Old Q Values:  [   38.85388605  2677.89620798 10501.63330131  1101.59744825]
New Q values:  [  38.85388605 2677.89620798 5552.23181647 1101.59744825]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.48726165e+03 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:4, Action:North
State  210
Old Q Values:  [4.48726165e+03 4.23618905e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  3252.12948346  -180.00807518 66944.18734697]
------
Step:5, Action:West
State  130
Old Q Values:  [36041.91667283  3252.12948346  -180.00807518 66944.18734697]
New Q values:  [36041.91667283  3252.12948346  -180.00807518 65633.55311309]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29501594e+05]
------
Step:6, Action:West
State  126
Old Q Values:  [  0.         437.4470373  374.96879939 206.08798538]
New Q values:  [  0.         437.4470373  374.96879939 116.83031103]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007   96.65038957 -120.29354603]
------
Step:7, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   934.13620436  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   504.28859293  -180.6       ]
Reward: -1  Episode Reward:  53
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         437.4470373  374.96879939 116.83031103]
------
Step:8, Action:South
State  126
Old Q Values:  [  0.         437.4470373  374.96879939 116.83031103]
New Q values:  [  0.         443.7746053  374.96879939 116.83031103]
Reward: -1  Episode Reward:  52
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         135.48456638 897.98596794   0.        ]
------
Step:9, Action:East
State  206
Old Q Values:  [  0.         135.48456638 897.98596794   0.        ]
New Q values:  [  0.         135.48456638 515.78980165   0.        ]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[523.98471492 237.92474934   0.         429.03841886]
------
Step:10, Action:North
State  216
Old Q Values:  [ 2.10100106e+00  1.59989171e+03 -8.48943729e+03  5.31095938e+02]
New Q values:  [  942.47753557  1599.89170739 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 3140.7904505  -180.6         747.41539264]
------
Step:11, Action:South
State  138
Old Q Values:  [-139.45925583 3140.7904505  -180.6         747.41539264]
New Q values:  [-139.45925583 1735.68369242 -180.6         747.41539264]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  942.47753557  1599.89170739 -8489.43729461   531.09593838]
------
Step:12, Action:South
State  208
Old Q Values:  [-5509.58383118   551.37808551   483.97903422 -1455.65174173]
New Q values:  [-5509.58383118 61057.13773878   483.97903422 -1455.65174173]
Reward: 100009  Episode Reward:  100058
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  5.31944932e+03  1.20371620e+03]
------
Step:1, Action:East
State  195
Old Q Values:  [  38.85388605 2677.89620798 5552.23181647 1101.59744825]
New Q values:  [  38.85388605 2677.89620798 8791.36098619 1101.59744825]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:2, Action:North
State  208
Old Q Values:  [-5509.58383118 61057.13773878   483.97903422 -1455.65174173]
New Q values:  [-1677.72842474 61057.13773878   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1735.68369242 -180.6         747.41539264]
------
Step:3, Action:West
State  136
Old Q Values:  [-5281.21195651   585.706989   -6245.61866138  -345.75939501]
New Q values:  [-5281.21195651   585.706989   -6245.61866138   -76.7286631 ]
Reward: 9  Episode Reward:  27
xxxxx
x.agx
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9455.07588649   187.25031635]
------
Step:4, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2235.85545064    24.71104526]
New Q values:  [ -253.44886264 -1902.20915811  2235.85545064   352.58479928]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   14.23378158 1124.33460392 -252.78192178]
------
Step:5, Action:East
State  107
Old Q Values:  [-252.35169558   14.23378158 1124.33460392 -252.78192178]
New Q values:  [-252.35169558   14.23378158 1119.89047676 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2235.85545064   352.58479928]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   1079.43758417   311.87331119]
New Q values:  [ -281.736      -9545.4473624    951.88014139   311.87331119]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 1735.68369242 -180.6         747.41539264]
------
Step:7, Action:South
State  138
Old Q Values:  [-139.45925583 1735.68369242 -180.6         747.41539264]
New Q values:  [-139.45925583 7258.74173657 -180.6         747.41539264]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:8, Action:North
State  218
Old Q Values:  [523.98471492 237.92474934   0.         429.03841886]
New Q values:  [2386.61640694  237.92474934    0.          429.03841886]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 7258.74173657 -180.6         747.41539264]
------
Step:9, Action:South
State  138
Old Q Values:  [-139.45925583 7258.74173657 -180.6         747.41539264]
New Q values:  [-139.45925583 9467.96495423 -180.6         747.41539264]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:10, Action:North
State  216
Old Q Values:  [  942.47753557  1599.89170739 -8489.43729461   531.09593838]
New Q values:  [ 3216.7805005   1599.89170739 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 9467.96495423 -180.6         747.41539264]
------
Step:11, Action:South
State  138
Old Q Values:  [-139.45925583 9467.96495423 -180.6         747.41539264]
New Q values:  [-139.45925583 4751.62013184 -180.6         747.41539264]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3216.7805005   1599.89170739 -8489.43729461   531.09593838]
------
Step:12, Action:North
State  218
Old Q Values:  [2386.61640694  237.92474934    0.          429.03841886]
New Q values:  [2379.53260233  237.92474934    0.          429.03841886]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4751.62013184 -180.6         747.41539264]
------
Step:13, Action:South
State  138
Old Q Values:  [-139.45925583 4751.62013184 -180.6         747.41539264]
New Q values:  [-139.45925583 8465.11631234 -180.6         747.41539264]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:14, Action:North
State  216
Old Q Values:  [ 3216.7805005   1599.89170739 -8489.43729461   531.09593838]
New Q values:  [ 3825.6470939   1599.89170739 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 8465.11631234 -180.6         747.41539264]
------
Step:15, Action:West
State  138
Old Q Values:  [-139.45925583 8465.11631234 -180.6         747.41539264]
New Q values:  [-139.45925583 8465.11631234 -180.6         583.93019947]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    951.88014139   311.87331119]
------
Step:16, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    951.88014139   311.87331119]
New Q values:  [ -281.736      -9545.4473624   2919.68695026   311.87331119]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 8465.11631234 -180.6         583.93019947]
------
Step:17, Action:South
State  138
Old Q Values:  [-139.45925583 8465.11631234 -180.6         583.93019947]
New Q values:  [-139.45925583 4533.14065311 -180.6         583.93019947]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3825.6470939   1599.89170739 -8489.43729461   531.09593838]
------
Step:18, Action:North
State  216
Old Q Values:  [ 3825.6470939   1599.89170739 -8489.43729461   531.09593838]
New Q values:  [ 1705.37093426  1599.89170739 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  22
xxxxx
x gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   585.706989   -6245.61866138   -76.7286631 ]
------
Step:19, Action:South
State  136
Old Q Values:  [-5281.21195651   585.706989   -6245.61866138   -76.7286631 ]
New Q values:  [-5281.21195651   745.29407588 -6245.61866138   -76.7286631 ]
Reward: -1  Episode Reward:  21
xxxxx
xg  x
x. ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1705.37093426  1599.89170739 -8489.43729461   531.09593838]
------
Step:20, Action:North
State  216
Old Q Values:  [ 1705.37093426  1599.89170739 -8489.43729461   531.09593838]
New Q values:  [ 2041.49056964  1599.89170739 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  20
xxxxx
x  ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4533.14065311 -180.6         583.93019947]
------
Step:21, Action:South
State  138
Old Q Values:  [-139.45925583 4533.14065311 -180.6         583.93019947]
New Q values:  [-139.45925583 8377.72452085 -180.6         583.93019947]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:22, Action:North
State  218
Old Q Values:  [2379.53260233  237.92474934    0.          429.03841886]
New Q values:  [3464.53039719  237.92474934    0.          429.03841886]
Reward: -1  Episode Reward:  18
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 8377.72452085 -180.6         583.93019947]
------
Step:23, Action:South
State  138
Old Q Values:  [-139.45925583 8377.72452085 -180.6         583.93019947]
New Q values:  [-139.45925583 3962.93697923 -180.6         583.93019947]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2041.49056964  1599.89170739 -8489.43729461   531.09593838]
------
Step:24, Action:North
State  216
Old Q Values:  [ 2041.49056964  1599.89170739 -8489.43729461   531.09593838]
New Q values:  [ 2004.87732162  1599.89170739 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 3962.93697923 -180.6         583.93019947]
------
Step:25, Action:South
State  138
Old Q Values:  [-139.45925583 3962.93697923 -180.6         583.93019947]
New Q values:  [-139.45925583 8149.6430513  -180.6         583.93019947]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:26, Action:North
State  218
Old Q Values:  [3464.53039719  237.92474934    0.          429.03841886]
New Q values:  [3830.10507426  237.92474934    0.          429.03841886]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 8149.6430513  -180.6         583.93019947]
------
Step:27, Action:South
State  138
Old Q Values:  [-139.45925583 8149.6430513  -180.6         583.93019947]
New Q values:  [-139.45925583 9824.32548012 -180.6         583.93019947]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:28, Action:North
State  216
Old Q Values:  [ 2004.87732162  1599.89170739 -8489.43729461   531.09593838]
New Q values:  [ 3748.64857269  1599.89170739 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 9824.32548012 -180.6         583.93019947]
------
Step:29, Action:South
State  138
Old Q Values:  [-139.45925583 9824.32548012 -180.6         583.93019947]
New Q values:  [-139.45925583 5053.72476386 -180.6         583.93019947]
Reward: -1  Episode Reward:  11
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3748.64857269  1599.89170739 -8489.43729461   531.09593838]
------
Step:30, Action:North
State  216
Old Q Values:  [ 3748.64857269  1599.89170739 -8489.43729461   531.09593838]
New Q values:  [ 1722.44765184  1599.89170739 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  10
xxxxx
x gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   745.29407588 -6245.61866138   -76.7286631 ]
------
Step:31, Action:South
State  138
Old Q Values:  [-139.45925583 5053.72476386 -180.6         583.93019947]
New Q values:  [-139.45925583 2537.62420109 -180.6         583.93019947]
Reward: -1  Episode Reward:  9
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1722.44765184  1599.89170739 -8489.43729461   531.09593838]
------
Step:32, Action:North
State  216
Old Q Values:  [ 1722.44765184  1599.89170739 -8489.43729461   531.09593838]
New Q values:  [ 1449.66632106  1599.89170739 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  8
xxxxx
x  ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 2537.62420109 -180.6         583.93019947]
------
Step:33, Action:South
State  138
Old Q Values:  [-139.45925583 2537.62420109 -180.6         583.93019947]
New Q values:  [-139.45925583 7579.51794004 -180.6         583.93019947]
Reward: -1  Episode Reward:  7
xxxxx
x   x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:34, Action:North
State  218
Old Q Values:  [3830.10507426  237.92474934    0.          429.03841886]
New Q values:  [3805.29741172  237.92474934    0.          429.03841886]
Reward: -1  Episode Reward:  6
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 7579.51794004 -180.6         583.93019947]
------
Step:35, Action:South
State  138
Old Q Values:  [-139.45925583 7579.51794004 -180.6         583.93019947]
New Q values:  [-139.45925583 9596.27543562 -180.6         583.93019947]
Reward: -1  Episode Reward:  5
xxxxx
x   x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:36, Action:North
State  218
Old Q Values:  [3805.29741172  237.92474934    0.          429.03841886]
New Q values:  [4400.40159537  237.92474934    0.          429.03841886]
Reward: -1  Episode Reward:  4
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 9596.27543562 -180.6         583.93019947]
------
Step:37, Action:South
State  138
Old Q Values:  [-139.45925583 9596.27543562 -180.6         583.93019947]
New Q values:  [-139.45925583 4317.87768647 -180.6         583.93019947]
Reward: -1  Episode Reward:  3
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1449.66632106  1599.89170739 -8489.43729461   531.09593838]
------
Step:38, Action:South
State  216
Old Q Values:  [ 1449.66632106  1599.89170739 -8489.43729461   531.09593838]
New Q values:  [ 1449.66632106  1476.54318754 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  12
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781   2770.62168193]
------
Step:39, Action:West
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781   2770.62168193]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781   5175.74075337]
Reward: -1  Episode Reward:  11
xxxxx
xg  x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2.52746240e+03 -8.52123368e+03  6.89586311e+00  1.35603069e+04]
------
Step:40, Action:West
State  272
Old Q Values:  [-2.52746240e+03 -8.52123368e+03  6.89586311e+00  1.35603069e+04]
New Q values:  [-2.52746240e+03 -8.52123368e+03  6.89586311e+00  1.77263525e+04]
Reward: 9  Episode Reward:  20
xxxxx
x   x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[40989.43240555 15148.29305605  5576.40109469   644.94785455]
------
Step:41, Action:South
State  256
Old Q Values:  [40989.43240555 15148.29305605  5576.40109469   644.94785455]
New Q values:  [40989.43240555 18175.54694409  5576.40109469   644.94785455]
Reward: -301  Episode Reward:  -281
xxxxx
xg  x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[40989.43240555 18175.54694409  5576.40109469   644.94785455]
------
Step:42, Action:North
State  256
Old Q Values:  [40989.43240555 18175.54694409  5576.40109469   644.94785455]
New Q values:  [76193.91312679 18175.54694409  5576.40109469   644.94785455]
Reward: 90009  Episode Reward:  89728
xxxxx
x   x
xg  x
x   x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   504.28859293  -180.6       ]
------
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869   79.64232007   96.65038957 -120.29354603]
New Q values:  [-177.44732869   79.64232007  919.96624091 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2919.68695026   311.87331119]
------
Step:2, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2235.85545064   352.58479928]
New Q values:  [ -253.44886264 -1902.20915811  2195.1054862    352.58479928]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4317.87768647 -180.6         583.93019947]
------
Step:3, Action:West
State  136
Old Q Values:  [-5281.21195651   745.29407588 -6245.61866138   -76.7286631 ]
New Q values:  [-5281.21195651   745.29407588 -6245.61866138    24.88362967]
Reward: -1  Episode Reward:  17
xxxxx
x agx
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9455.07588649   187.25031635]
------
Step:4, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   739.56790524    32.51178852]
New Q values:  [-9594.56523706 -8069.05606225   739.56790524   128.68940202]
Reward: -1  Episode Reward:  16
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  387.61562203   31.9495824  -180.6       ]
------
Step:5, Action:South
State  109
Old Q Values:  [-241.10880094  387.61562203   31.9495824  -180.6       ]
New Q values:  [-241.10880094  769.15738698   31.9495824  -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
x  gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   73.93195328  2049.03712722 -7996.65727021  -180.6       ]
------
Step:6, Action:South
State  189
Old Q Values:  [   9.84673294 1180.62575521  964.34635147  940.95197235]
New Q values:  [  9.84673294 681.98074137 964.34635147 940.95197235]
Reward: 9  Episode Reward:  24
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[179.03836449 -40.34168621 681.10146429 -35.88578819]
------
Step:7, Action:East
State  261
Old Q Values:  [179.03836449 -40.34168621 681.10146429 -35.88578819]
New Q values:  [179.03836449 -40.34168621 631.75865845 -35.88578819]
Reward: 9  Episode Reward:  33
xxxxx
x   x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1179.7269091     29.18616789]
------
Step:8, Action:East
State  272
Old Q Values:  [-2.52746240e+03 -8.52123368e+03  6.89586311e+00  1.77263525e+04]
New Q values:  [-2527.46239811 -8521.23367799  1560.88057125 17726.35249579]
Reward: 9  Episode Reward:  42
xxxxx
x g x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781   5175.74075337]
------
Step:9, Action:West
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781   5175.74075337]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781   2423.61437408]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1179.7269091     29.18616789]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1560.88057125 17726.35249579]
New Q values:  [-2527.46239811 -8521.23367799  1350.83654072 17726.35249579]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781   2423.61437408]
------
Step:11, Action:West
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781   2423.61437408]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781   1322.76382236]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1179.7269091     29.18616789]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1350.83654072 17726.35249579]
New Q values:  [-2527.46239811 -8521.23367799   936.563763   17726.35249579]
Reward: -1  Episode Reward:  38
xxxxx
x g x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781   1322.76382236]
------
Step:13, Action:West
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781   1322.76382236]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781   1407.06489779]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  103.26973597 2928.53122947]
------
Step:14, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  103.26973597 2928.53122947]
New Q values:  [  37.74111519 -168.92307549  103.26973597 1360.34008932]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[179.03836449 -40.34168621 631.75865845 -35.88578819]
------
Step:15, Action:East
State  261
Old Q Values:  [179.03836449 -40.34168621 631.75865845 -35.88578819]
New Q values:  [179.03836449 -40.34168621 660.20549018 -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  103.26973597 1360.34008932]
------
Step:16, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   936.563763   17726.35249579]
New Q values:  [-2527.46239811 -8521.23367799   936.563763    7288.00264537]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[179.03836449 -40.34168621 660.20549018 -35.88578819]
------
Step:17, Action:East
State  261
Old Q Values:  [179.03836449 -40.34168621 660.20549018 -35.88578819]
New Q values:  [179.03836449 -40.34168621 617.4002688  -35.88578819]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1179.7269091     29.18616789]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   936.563763    7288.00264537]
New Q values:  [-2527.46239811 -8521.23367799   796.14497454  7288.00264537]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781   1407.06489779]
------
Step:19, Action:West
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781   1407.06489779]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781    916.14403185]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1179.7269091     29.18616789]
------
Step:20, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  103.26973597 1360.34008932]
New Q values:  [  37.74111519 -168.92307549  315.55110394 1360.34008932]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781    916.14403185]
------
Step:21, Action:West
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781    916.14403185]
New Q values:  [-5274.16868635  -692.31054125 -7525.7277781    773.95963954]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x ..x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  315.55110394 1360.34008932]
------
Step:22, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  315.55110394 1360.34008932]
New Q values:  [  37.74111519 -168.92307549  315.55110394  728.75611637]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[179.03836449 -40.34168621 617.4002688  -35.88578819]
------
Step:23, Action:East
State  261
Old Q Values:  [179.03836449 -40.34168621 617.4002688  -35.88578819]
New Q values:  [179.03836449 -40.34168621 600.27818025 -35.88578819]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1179.7269091     29.18616789]
------
Step:24, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   796.14497454  7288.00264537]
New Q values:  [-2527.46239811 -8521.23367799   550.04588167  7288.00264537]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x ..x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -692.31054125 -7525.7277781    773.95963954]
------
Step:25, Action:South
State  288
Old Q Values:  [-5274.16868635  -692.31054125 -7525.7277781    773.95963954]
New Q values:  [-5274.16868635  -225.33632464 -7525.7277781    773.95963954]
Reward: -301  Episode Reward:  -275
xxxxx
x   x
x ..x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -225.33632464 -7525.7277781    773.95963954]
------
Step:26, Action:West
State  288
Old Q Values:  [-5274.16868635  -225.33632464 -7525.7277781    773.95963954]
New Q values:  [-5274.16868635  -225.33632464 -7525.7277781   2495.38464943]
Reward: -1  Episode Reward:  -276
xxxxx
x   x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   550.04588167  7288.00264537]
------
Step:27, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   550.04588167  7288.00264537]
New Q values:  [-2527.46239811 -8521.23367799   550.04588167  3623.54810699]
Reward: -1  Episode Reward:  -277
xxxxx
xg  x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2363.15682949 -8695.4397473    815.70170409 -2601.74710518]
------
Step:28, Action:North
State  261
Old Q Values:  [179.03836449 -40.34168621 600.27818025 -35.88578819]
New Q values:  [685.72648396 -40.34168621 600.27818025 -35.88578819]
Reward: -1  Episode Reward:  -278
xxxxx
x g x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   73.93195328  2049.03712722 -7996.65727021  -180.6       ]
------
Step:29, Action:South
State  181
Old Q Values:  [   73.93195328  2049.03712722 -7996.65727021  -180.6       ]
New Q values:  [   73.93195328  1024.73279608 -7996.65727021  -180.6       ]
Reward: -1  Episode Reward:  -279
xxxxx
x  gx
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[685.72648396 -40.34168621 600.27818025 -35.88578819]
------
Step:30, Action:North
State  261
Old Q Values:  [685.72648396 -40.34168621 600.27818025 -35.88578819]
New Q values:  [581.11043241 -40.34168621 600.27818025 -35.88578819]
Reward: -1  Episode Reward:  -280
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   73.93195328  1024.73279608 -7996.65727021  -180.6       ]
------
Step:31, Action:South
State  183
Old Q Values:  [ 390.42113812 1148.67321624 2196.71454024    0.        ]
New Q values:  [ 390.42113812  638.95274057 2196.71454024    0.        ]
Reward: -1  Episode Reward:  -281
xxxxx
x   x
x ..x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[581.11043241 -40.34168621 600.27818025 -35.88578819]
------
Step:32, Action:East
State  261
Old Q Values:  [581.11043241 -40.34168621 600.27818025 -35.88578819]
New Q values:  [  581.11043241   -40.34168621 -4673.4242958    -35.88578819]
Reward: -10001  Episode Reward:  -10282
xxxxx
x   x
x ..x
x g x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812  638.95274057 2196.71454024    0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [ 390.42113812  638.95274057 2196.71454024    0.        ]
New Q values:  [ 390.42113812  638.95274057 2479.92061107    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  5.31944932e+03  1.20371620e+03]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  5.31944932e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  8.69824799e+03  1.20371620e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:3, Action:North
State  208
Old Q Values:  [-1677.72842474 61057.13773878   483.97903422 -1455.65174173]
New Q values:  [19024.37456403 61057.13773878   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  3252.12948346  -180.00807518 65633.55311309]
------
Step:4, Action:West
State  138
Old Q Values:  [-139.45925583 4317.87768647 -180.6         583.93019947]
New Q values:  [-139.45925583 4317.87768647 -180.6        1114.87816487]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2919.68695026   311.87331119]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   2919.68695026   311.87331119]
New Q values:  [ -281.736      -9545.4473624   2462.63808604   311.87331119]
Reward: -1  Episode Reward:  35
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4317.87768647 -180.6        1114.87816487]
------
Step:6, Action:South
State  130
Old Q Values:  [36041.91667283  3252.12948346  -180.00807518 65633.55311309]
New Q values:  [36041.91667283  7865.32005299  -180.00807518 65633.55311309]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
------
Step:7, Action:North
State  210
Old Q Values:  [2.18835609e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [2.84428903e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  33
xxxxx
x. ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  7865.32005299  -180.00807518 65633.55311309]
------
Step:8, Action:West
State  138
Old Q Values:  [-139.45925583 4317.87768647 -180.6        1114.87816487]
New Q values:  [-139.45925583 4317.87768647 -180.6        1103.88291181]
Reward: -1  Episode Reward:  32
xxxxx
x.a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2195.1054862    352.58479928]
------
Step:9, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2195.1054862    352.58479928]
New Q values:  [ -253.44886264 -1902.20915811  2172.80550042   352.58479928]
Reward: -1  Episode Reward:  31
xxxxx
x. ax
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4317.87768647 -180.6        1103.88291181]
------
Step:10, Action:West
State  136
Old Q Values:  [-5281.21195651   745.29407588 -6245.61866138    24.88362967]
New Q values:  [-5281.21195651   745.29407588 -6245.61866138    65.52854677]
Reward: -1  Episode Reward:  30
xxxxx
x.agx
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9455.07588649   187.25031635]
------
Step:11, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   739.56790524   128.68940202]
New Q values:  [-9594.56523706 -8069.05606225   739.56790524    58.89272124]
Reward: 9  Episode Reward:  39
xxxxx
xag x
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6            6.72320144 -1242.00860919     0.        ]
------
Step:12, Action:South
State  104
Old Q Values:  [-8652.84         745.80419858  1078.33520689 -8652.84      ]
New Q values:  [-8652.84         702.85534727  1078.33520689 -8652.84      ]
Reward: -1  Episode Reward:  38
xxxxx
xg  x
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[-2081.09028721     0.          1350.44555947     0.        ]
------
Step:13, Action:East
State  185
Old Q Values:  [   9.40190913    0.         1075.69825138 -178.98      ]
New Q values:  [   9.40190913    0.         3989.75024651 -178.98      ]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  11866.9031532    636.07481225   408.67479662]
------
Step:14, Action:South
State  200
Old Q Values:  [   62.8218634  11866.9031532    636.07481225   408.67479662]
New Q values:  [  62.8218634  5839.22569338  636.07481225  408.67479662]
Reward: 9  Episode Reward:  46
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   550.04588167  3623.54810699]
------
Step:15, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   550.04588167  3623.54810699]
New Q values:  [-2527.46239811 -8521.23367799   550.04588167 74229.26987116]
Reward: 100009  Episode Reward:  100055
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2172.80550042   352.58479928]
------
Step:1, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2172.80550042   352.58479928]
New Q values:  [ -253.44886264 -1902.20915811  2169.88550611   352.58479928]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4317.87768647 -180.6        1103.88291181]
------
Step:2, Action:West
State  138
Old Q Values:  [-139.45925583 4317.87768647 -180.6        1103.88291181]
New Q values:  [-139.45925583 4317.87768647 -180.6        1091.91881655]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2169.88550611   352.58479928]
------
Step:3, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2169.88550611   352.58479928]
New Q values:  [ -253.44886264 -1902.20915811  2162.71750838   352.58479928]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4317.87768647 -180.6        1091.91881655]
------
Step:4, Action:West
State  138
Old Q Values:  [-139.45925583 4317.87768647 -180.6        1091.91881655]
New Q values:  [-139.45925583 4317.87768647 -180.6        1174.95895244]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2462.63808604   311.87331119]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   2462.63808604   311.87331119]
New Q values:  [ -281.736      -9545.4473624   2279.81854036   311.87331119]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
xg..x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4317.87768647 -180.6        1174.95895244]
------
Step:6, Action:South
State  136
Old Q Values:  [-5281.21195651   745.29407588 -6245.61866138    65.52854677]
New Q values:  [-5281.21195651 18620.65895199 -6245.61866138    65.52854677]
Reward: 9  Episode Reward:  14
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19024.37456403 61057.13773878   483.97903422 -1455.65174173]
------
Step:7, Action:South
State  208
Old Q Values:  [19024.37456403 61057.13773878   483.97903422 -1455.65174173]
New Q values:  [19024.37456403 25170.87049034   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -225.33632464 -7525.7277781   2495.38464943]
------
Step:8, Action:West
State  288
Old Q Values:  [-5274.16868635  -225.33632464 -7525.7277781   2495.38464943]
New Q values:  [-5274.16868635  -225.33632464 -7525.7277781  23272.33482112]
Reward: 9  Episode Reward:  22
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   550.04588167 74229.26987116]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   550.04588167 74229.26987116]
New Q values:  [-2527.46239811 -8521.23367799   550.04588167 42471.55857682]
Reward: 9  Episode Reward:  31
xxxxx
x.g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[42581.50209453  2256.66526474  6585.91095232  1875.31501677]
------
Step:10, Action:North
State  260
Old Q Values:  [ 2363.15682949 -8695.4397473    815.70170409 -2601.74710518]
New Q values:  [ 2809.18214034 -8695.4397473    815.70170409 -2601.74710518]
Reward: 9  Episode Reward:  40
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-6029.94468529   675.35650772  6195.06469515 -4966.32149798]
------
Step:11, Action:East
State  176
Old Q Values:  [    0.          1327.79507613 19309.13388188     0.        ]
New Q values:  [    0.          1327.79507613 11113.76513966     0.        ]
Reward: 9  Episode Reward:  49
xxxxx
x.  x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.12823720e+04 3.60916781e+03 2.91043938e+03]
------
Step:12, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.12823720e+04 3.60916781e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.72538164e+04 3.60916781e+03 2.91043938e+03]
Reward: -1  Episode Reward:  48
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   550.04588167 42471.55857682]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   550.04588167 42471.55857682]
New Q values:  [-2527.46239811 -8521.23367799   550.04588167 29762.47405909]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[42581.50209453  2256.66526474  6585.91095232  1875.31501677]
------
Step:14, Action:North
State  257
Old Q Values:  [42581.50209453  2256.66526474  6585.91095232  1875.31501677]
New Q values:  [40550.34648131  2256.66526474  6585.91095232  1875.31501677]
Reward: -1  Episode Reward:  46
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 37619.68428217 24198.37293974     0.        ]
------
Step:15, Action:North
State  176
Old Q Values:  [    0.          1327.79507613 11113.76513966     0.        ]
New Q values:  [76485.61294353  1327.79507613 11113.76513966     0.        ]
Reward: 100009  Episode Reward:  100055
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  315.55110394  728.75611637]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   550.04588167 29762.47405909]
New Q values:  [-2527.46239811 -8521.23367799   550.04588167 12084.72275336]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  581.11043241   -40.34168621 -4673.4242958    -35.88578819]
------
Step:2, Action:North
State  260
Old Q Values:  [ 2809.18214034 -8695.4397473    815.70170409 -2601.74710518]
New Q values:  [ 2702.29056419 -8695.4397473    815.70170409 -2601.74710518]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa..x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  5244.05902686     0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [ 390.42113812  638.95274057 2479.92061107    0.        ]
New Q values:  [ 390.42113812  638.95274057 3606.8426403     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  8.69824799e+03  1.20371620e+03]
------
Step:4, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.72538164e+04 3.60916781e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.72538164e+04 9.00032827e+03 2.91043938e+03]
Reward: 9  Episode Reward:  36
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19024.37456403 25170.87049034   483.97903422 -1455.65174173]
------
Step:5, Action:South
State  210
Old Q Values:  [2.84428903e+04 4.23618905e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [2.84428903e+04 8.67557607e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  35
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  -225.33632464 -7525.7277781  23272.33482112]
------
Step:6, Action:South
State  288
Old Q Values:  [-5274.16868635  -225.33632464 -7525.7277781  23272.33482112]
New Q values:  [-5274.16868635  6710.96591648 -7525.7277781  23272.33482112]
Reward: -301  Episode Reward:  -266
xxxxx
x...x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  6710.96591648 -7525.7277781  23272.33482112]
------
Step:7, Action:West
State  288
Old Q Values:  [-5274.16868635  6710.96591648 -7525.7277781  23272.33482112]
New Q values:  [-5274.16868635  6710.96591648 -7525.7277781  12933.75075445]
Reward: -1  Episode Reward:  -267
xxxxx
x.g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   550.04588167 12084.72275336]
------
Step:8, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549  315.55110394  728.75611637]
New Q values:  [   37.74111519  -168.92307549   315.55110394 12456.00639094]
Reward: -1  Episode Reward:  -268
xxxxx
x..gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[40550.34648131  2256.66526474  6585.91095232  1875.31501677]
------
Step:9, Action:North
State  261
Old Q Values:  [  581.11043241   -40.34168621 -4673.4242958    -35.88578819]
New Q values:  [  539.26401179   -40.34168621 -4673.4242958    -35.88578819]
Reward: -1  Episode Reward:  -269
xxxxx
x...x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   73.93195328  1024.73279608 -7996.65727021  -180.6       ]
------
Step:10, Action:South
State  181
Old Q Values:  [   73.93195328  1024.73279608 -7996.65727021  -180.6       ]
New Q values:  [   73.93195328   571.07232197 -7996.65727021  -180.6       ]
Reward: -1  Episode Reward:  -270
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  539.26401179   -40.34168621 -4673.4242958    -35.88578819]
------
Step:11, Action:North
State  260
Old Q Values:  [ 2702.29056419 -8695.4397473    815.70170409 -2601.74710518]
New Q values:  [-3061.16436578 -8695.4397473    815.70170409 -2601.74710518]
Reward: -10001  Episode Reward:  -10271
xxxxx
x...x
xg  x
x   x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2162.71750838   352.58479928]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   2279.81854036   311.87331119]
New Q values:  [ -281.736      -9545.4473624   2212.69072208   311.87331119]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4317.87768647 -180.6        1174.95895244]
------
Step:2, Action:South
State  136
Old Q Values:  [-5281.21195651 18620.65895199 -6245.61866138    65.52854677]
New Q values:  [-5281.21195651 14998.9247279  -6245.61866138    65.52854677]
Reward: -1  Episode Reward:  8
xxxxx
x.g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19024.37456403 25170.87049034   483.97903422 -1455.65174173]
------
Step:3, Action:South
State  208
Old Q Values:  [19024.37456403 25170.87049034   483.97903422 -1455.65174173]
New Q values:  [19024.37456403 13953.87342247   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  17
xxxxx
x. gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  6710.96591648 -7525.7277781  12933.75075445]
------
Step:4, Action:West
State  288
Old Q Values:  [-5274.16868635  6710.96591648 -7525.7277781  12933.75075445]
New Q values:  [-5274.16868635  6710.96591648 -7525.7277781   8804.31712779]
Reward: 9  Episode Reward:  26
xxxxx
x.g x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   550.04588167 12084.72275336]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   550.04588167 12084.72275336]
New Q values:  [-2527.46239811 -8521.23367799   550.04588167 27697.46303938]
Reward: 9  Episode Reward:  35
xxxxx
xg  x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[76193.91312679 18175.54694409  5576.40109469   644.94785455]
------
Step:6, Action:North
State  257
Old Q Values:  [40550.34648131  2256.66526474  6585.91095232  1875.31501677]
New Q values:  [39743.88423602  2256.66526474  6585.91095232  1875.31501677]
Reward: 9  Episode Reward:  44
xxxxx
x.g x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 37619.68428217 24198.37293974     0.        ]
------
Step:7, Action:North
State  180
Old Q Values:  [-6029.94468529   675.35650772  6195.06469515 -4966.32149798]
New Q values:  [-8180.68959572   675.35650772  6195.06469515 -4966.32149798]
Reward: -9991  Episode Reward:  -9947
xxxxx
xg  x
x . x
x   x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4317.87768647 -180.6        1174.95895244]
------
Step:1, Action:South
State  136
Old Q Values:  [-5281.21195651 14998.9247279  -6245.61866138    65.52854677]
New Q values:  [-5281.21195651  6447.93284742 -6245.61866138    65.52854677]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1449.66632106  1476.54318754 -8489.43729461   531.09593838]
------
Step:2, Action:South
State  208
Old Q Values:  [19024.37456403 13953.87342247   483.97903422 -1455.65174173]
New Q values:  [19024.37456403  8228.24450733   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  6710.96591648 -7525.7277781   8804.31712779]
------
Step:3, Action:West
State  288
Old Q Values:  [-5274.16868635  6710.96591648 -7525.7277781   8804.31712779]
New Q values:  [-5274.16868635  6710.96591648 -7525.7277781  11836.36576293]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   550.04588167 27697.46303938]
------
Step:4, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549   315.55110394 12456.00639094]
New Q values:  [   37.74111519  -168.92307549   315.55110394 16910.96782718]
Reward: 9  Episode Reward:  36
xxxxx
x..gx
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39743.88423602  2256.66526474  6585.91095232  1875.31501677]
------
Step:5, Action:North
State  257
Old Q Values:  [39743.88423602  2256.66526474  6585.91095232  1875.31501677]
New Q values:  [39421.2993379   2256.66526474  6585.91095232  1875.31501677]
Reward: 9  Episode Reward:  45
xxxxx
x.g x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 37619.68428217 24198.37293974     0.        ]
------
Step:6, Action:North
State  181
Old Q Values:  [   73.93195328   571.07232197 -7996.65727021  -180.6       ]
New Q values:  [   39.4221046    571.07232197 -7996.65727021  -180.6       ]
Reward: 9  Episode Reward:  54
xxxxx
xa.gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[ 0.         14.83107761  0.          0.        ]
------
Step:7, Action:South
State  103
Old Q Values:  [-180.6         572.15158391    5.4           0.        ]
New Q values:  [-180.6         399.58233015    5.4           0.        ]
Reward: -1  Episode Reward:  53
xxxxx
x . x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   39.4221046    571.07232197 -7996.65727021  -180.6       ]
------
Step:8, Action:South
State  183
Old Q Values:  [ 390.42113812  638.95274057 3606.8426403     0.        ]
New Q values:  [ 390.42113812  416.76029977 3606.8426403     0.        ]
Reward: -1  Episode Reward:  52
xxxxx
x . x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  539.26401179   -40.34168621 -4673.4242958    -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [  539.26401179   -40.34168621 -4673.4242958    -35.88578819]
New Q values:  [ 1297.1583968    -40.34168621 -4673.4242958    -35.88578819]
Reward: -1  Episode Reward:  51
xxxxx
x . x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812  416.76029977 3606.8426403     0.        ]
------
Step:10, Action:East
State  183
Old Q Values:  [ 390.42113812  416.76029977 3606.8426403     0.        ]
New Q values:  [ 390.42113812  416.76029977 4079.54535198    0.        ]
Reward: -1  Episode Reward:  50
xxxxx
x . x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 2677.89620798 8791.36098619 1101.59744825]
------
Step:11, Action:East
State  193
Old Q Values:  [-5922.26708831  1126.08525554   450.62327432  1099.96026581]
New Q values:  [-5922.26708831  1126.08525554  -113.03832106  1099.96026581]
Reward: -10001  Episode Reward:  -9951
xxxxx
x . x
x  gx
x   x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   504.28859293  -180.6       ]
------
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869   79.64232007  919.96624091 -120.29354603]
New Q values:  [-177.44732869   79.64232007 1037.19371299 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2212.69072208   311.87331119]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   739.56790524    58.89272124]
New Q values:  [-9594.56523706 -8069.05606225  2235.60701632    58.89272124]
Reward: 9  Episode Reward:  18
xxxxx
x gax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  6447.93284742 -6245.61866138    65.52854677]
------
Step:3, Action:South
State  136
Old Q Values:  [-5281.21195651  6447.93284742 -6245.61866138    65.52854677]
New Q values:  [-5281.21195651  3027.53609523 -6245.61866138    65.52854677]
Reward: 9  Episode Reward:  27
xxxxx
x  gx
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1449.66632106  1476.54318754 -8489.43729461   531.09593838]
------
Step:4, Action:South
State  216
Old Q Values:  [ 1449.66632106  1476.54318754 -8489.43729461   531.09593838]
New Q values:  [ 1449.66632106  4146.92700389 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  36
xxxxx
x g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  6710.96591648 -7525.7277781  11836.36576293]
------
Step:5, Action:West
State  288
Old Q Values:  [-5274.16868635  6710.96591648 -7525.7277781  11836.36576293]
New Q values:  [-5274.16868635  6710.96591648 -7525.7277781  13049.18521699]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   550.04588167 27697.46303938]
------
Step:6, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549   315.55110394 16910.96782718]
New Q values:  [  37.74111519 -168.92307549  315.55110394 7158.93464991]
Reward: 9  Episode Reward:  54
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1297.1583968    -40.34168621 -4673.4242958    -35.88578819]
------
Step:7, Action:North
State  261
Old Q Values:  [ 1297.1583968    -40.34168621 -4673.4242958    -35.88578819]
New Q values:  [ 1742.12696432   -40.34168621 -4673.4242958    -35.88578819]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812  416.76029977 4079.54535198    0.        ]
------
Step:8, Action:East
State  177
Old Q Values:  [78394.48547832 37619.68428217 24198.37293974     0.        ]
New Q values:  [78394.48547832 37619.68428217 70022.57475256     0.        ]
Reward: 100009  Episode Reward:  100062
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1126.08525554  -113.03832106  1099.96026581]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831  1126.08525554  -113.03832106  1099.96026581]
New Q values:  [-5922.26708831  2603.51449719  -113.03832106  1099.96026581]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549  315.55110394 7158.93464991]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   550.04588167 27697.46303938]
New Q values:  [-2527.46239811 -8521.23367799   550.04588167 22910.77501712]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39421.2993379   2256.66526474  6585.91095232  1875.31501677]
------
Step:3, Action:North
State  261
Old Q Values:  [ 1742.12696432   -40.34168621 -4673.4242958    -35.88578819]
New Q values:  [24220.59642922   -40.34168621 -4673.4242958    -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x..gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 37619.68428217 70022.57475256     0.        ]
------
Step:4, Action:North
State  181
Old Q Values:  [   39.4221046    571.07232197 -7996.65727021  -180.6       ]
New Q values:  [  141.04354088   571.07232197 -7996.65727021  -180.6       ]
Reward: 9  Episode Reward:  36
xxxxx
xa..x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6         399.58233015    5.4           0.        ]
------
Step:5, Action:South
State  103
Old Q Values:  [-180.6         399.58233015    5.4           0.        ]
New Q values:  [-180.6         330.55462865    5.4           0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  141.04354088   571.07232197 -7996.65727021  -180.6       ]
------
Step:6, Action:South
State  181
Old Q Values:  [  141.04354088   571.07232197 -7996.65727021  -180.6       ]
New Q values:  [  141.04354088  7494.00785755 -7996.65727021  -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[24220.59642922   -40.34168621 -4673.4242958    -35.88578819]
------
Step:7, Action:North
State  260
Old Q Values:  [-3061.16436578 -8695.4397473    815.70170409 -2601.74710518]
New Q values:  [  633.45366223 -8695.4397473    815.70170409 -2601.74710518]
Reward: -1  Episode Reward:  33
xxxxx
xg..x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-8180.68959572   675.35650772  6195.06469515 -4966.32149798]
------
Step:8, Action:East
State  181
Old Q Values:  [  141.04354088  7494.00785755 -7996.65727021  -180.6       ]
New Q values:  [  141.04354088  7494.00785755 -2991.37425589  -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x g.x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   692.96217398   174.55451539     0.        ]
------
Step:9, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.72538164e+04 9.00032827e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 7.25484461e+03 9.00032827e+03 2.91043938e+03]
Reward: -1  Episode Reward:  31
xxxxx
xg..x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1179.7269091     29.18616789]
------
Step:10, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1179.7269091     29.18616789]
New Q values:  [   16.82637525 -5807.06396197  4392.04632874    29.18616789]
Reward: 9  Episode Reward:  40
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  6710.96591648 -7525.7277781  13049.18521699]
------
Step:11, Action:West
State  288
Old Q Values:  [-5274.16868635  6710.96591648 -7525.7277781  13049.18521699]
New Q values:  [-5274.16868635  6710.96591648 -7525.7277781  12092.30659193]
Reward: -1  Episode Reward:  39
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   550.04588167 22910.77501712]
------
Step:12, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   550.04588167 22910.77501712]
New Q values:  [-2527.46239811 -8521.23367799   550.04588167  9408.42051808]
Reward: -1  Episode Reward:  38
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  633.45366223 -8695.4397473    815.70170409 -2601.74710518]
------
Step:13, Action:East
State  260
Old Q Values:  [  633.45366223 -8695.4397473    815.70170409 -2601.74710518]
New Q values:  [  633.45366223 -8695.4397473   3148.20683706 -2601.74710518]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   550.04588167  9408.42051808]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   550.04588167  9408.42051808]
New Q values:  [-2527.46239811 -8521.23367799   550.04588167  4707.23025835]
Reward: -1  Episode Reward:  36
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  633.45366223 -8695.4397473   3148.20683706 -2601.74710518]
------
Step:15, Action:East
State  261
Old Q Values:  [24220.59642922   -40.34168621 -4673.4242958    -35.88578819]
New Q values:  [24220.59642922   -40.34168621  -552.3558197    -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  4392.04632874    29.18616789]
------
Step:16, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   550.04588167  4707.23025835]
New Q values:  [-2527.46239811 -8521.23367799  3847.11033025  4707.23025835]
Reward: -1  Episode Reward:  34
xxxxx
xg..x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  6710.96591648 -7525.7277781  12092.30659193]
------
Step:17, Action:West
State  288
Old Q Values:  [-5274.16868635  6710.96591648 -7525.7277781  12092.30659193]
New Q values:  [-5274.16868635  6710.96591648 -7525.7277781   6153.93653539]
Reward: -1  Episode Reward:  33
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  4392.04632874    29.18616789]
------
Step:18, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549  315.55110394 7158.93464991]
New Q values:  [  37.74111519 -168.92307549 2138.91021652 7158.93464991]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  6710.96591648 -7525.7277781   6153.93653539]
------
Step:19, Action:South
State  288
Old Q Values:  [-5274.16868635  6710.96591648 -7525.7277781   6153.93653539]
New Q values:  [-5274.16868635  4517.07614154 -7525.7277781   6153.93653539]
Reward: -301  Episode Reward:  -269
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  4517.07614154 -7525.7277781   6153.93653539]
------
Step:20, Action:West
State  288
Old Q Values:  [-5274.16868635  4517.07614154 -7525.7277781   6153.93653539]
New Q values:  [-5274.16868635  4517.07614154 -7525.7277781   4608.65500913]
Reward: -1  Episode Reward:  -270
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2138.91021652 7158.93464991]
------
Step:21, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3847.11033025  4707.23025835]
New Q values:  [-2527.46239811 -8521.23367799  3847.11033025  9148.47103211]
Reward: -1  Episode Reward:  -271
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[24220.59642922   -40.34168621  -552.3558197    -35.88578819]
------
Step:22, Action:North
State  261
Old Q Values:  [24220.59642922   -40.34168621  -552.3558197    -35.88578819]
New Q values:  [10911.50217728   -40.34168621  -552.3558197    -35.88578819]
Reward: -1  Episode Reward:  -272
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812  416.76029977 4079.54535198    0.        ]
------
Step:23, Action:East
State  181
Old Q Values:  [  141.04354088  7494.00785755 -2991.37425589  -180.6       ]
New Q values:  [ 141.04354088 7494.00785755 -416.0953532  -180.6       ]
Reward: -1  Episode Reward:  -273
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2603.51449719  -113.03832106  1099.96026581]
------
Step:24, Action:South
State  193
Old Q Values:  [-5922.26708831  2603.51449719  -113.03832106  1099.96026581]
New Q values:  [-5922.26708831  3188.48619385  -113.03832106  1099.96026581]
Reward: -1  Episode Reward:  -274
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2138.91021652 7158.93464991]
------
Step:25, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2138.91021652 7158.93464991]
New Q values:  [  37.74111519 -168.92307549 2138.91021652 6136.42451315]
Reward: -1  Episode Reward:  -275
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[10911.50217728   -40.34168621  -552.3558197    -35.88578819]
------
Step:26, Action:North
State  261
Old Q Values:  [10911.50217728   -40.34168621  -552.3558197    -35.88578819]
New Q values:  [6612.20322818  -40.34168621 -552.3558197   -35.88578819]
Reward: -1  Episode Reward:  -276
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 141.04354088 7494.00785755 -416.0953532  -180.6       ]
------
Step:27, Action:South
State  181
Old Q Values:  [ 141.04354088 7494.00785755 -416.0953532  -180.6       ]
New Q values:  [ 141.04354088 4980.66411148 -416.0953532  -180.6       ]
Reward: -1  Episode Reward:  -277
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6612.20322818  -40.34168621 -552.3558197   -35.88578819]
------
Step:28, Action:North
State  261
Old Q Values:  [6612.20322818  -40.34168621 -552.3558197   -35.88578819]
New Q values:  [4138.48052471  -40.34168621 -552.3558197   -35.88578819]
Reward: -1  Episode Reward:  -278
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 141.04354088 4980.66411148 -416.0953532  -180.6       ]
------
Step:29, Action:South
State  181
Old Q Values:  [ 141.04354088 4980.66411148 -416.0953532  -180.6       ]
New Q values:  [ 141.04354088 3233.209802   -416.0953532  -180.6       ]
Reward: -1  Episode Reward:  -279
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4138.48052471  -40.34168621 -552.3558197   -35.88578819]
------
Step:30, Action:North
State  261
Old Q Values:  [4138.48052471  -40.34168621 -552.3558197   -35.88578819]
New Q values:  [2624.75515049  -40.34168621 -552.3558197   -35.88578819]
Reward: -1  Episode Reward:  -280
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 141.04354088 3233.209802   -416.0953532  -180.6       ]
------
Step:31, Action:South
State  183
Old Q Values:  [ 390.42113812  416.76029977 4079.54535198    0.        ]
New Q values:  [ 390.42113812  953.53066505 4079.54535198    0.        ]
Reward: -1  Episode Reward:  -281
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2624.75515049  -40.34168621 -552.3558197   -35.88578819]
------
Step:32, Action:North
State  261
Old Q Values:  [2624.75515049  -40.34168621 -552.3558197   -35.88578819]
New Q values:  [2019.2650008   -40.34168621 -552.3558197   -35.88578819]
Reward: -1  Episode Reward:  -282
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 141.04354088 3233.209802   -416.0953532  -180.6       ]
------
Step:33, Action:South
State  181
Old Q Values:  [ 141.04354088 3233.209802   -416.0953532  -180.6       ]
New Q values:  [ 141.04354088 1898.46342104 -416.0953532  -180.6       ]
Reward: -1  Episode Reward:  -283
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2019.2650008   -40.34168621 -552.3558197   -35.88578819]
------
Step:34, Action:North
State  261
Old Q Values:  [2019.2650008   -40.34168621 -552.3558197   -35.88578819]
New Q values:  [1376.64502663  -40.34168621 -552.3558197   -35.88578819]
Reward: -1  Episode Reward:  -284
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 141.04354088 1898.46342104 -416.0953532  -180.6       ]
------
Step:35, Action:South
State  181
Old Q Values:  [ 141.04354088 1898.46342104 -416.0953532  -180.6       ]
New Q values:  [ 141.04354088 1171.77887641 -416.0953532  -180.6       ]
Reward: -1  Episode Reward:  -285
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1376.64502663  -40.34168621 -552.3558197   -35.88578819]
------
Step:36, Action:North
State  261
Old Q Values:  [1376.64502663  -40.34168621 -552.3558197   -35.88578819]
New Q values:  [ 901.59167357  -40.34168621 -552.3558197   -35.88578819]
Reward: -1  Episode Reward:  -286
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 141.04354088 1171.77887641 -416.0953532  -180.6       ]
------
Step:37, Action:South
State  181
Old Q Values:  [ 141.04354088 1171.77887641 -416.0953532  -180.6       ]
New Q values:  [ 141.04354088  738.58905263 -416.0953532  -180.6       ]
Reward: -1  Episode Reward:  -287
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 901.59167357  -40.34168621 -552.3558197   -35.88578819]
------
Step:38, Action:North
State  261
Old Q Values:  [ 901.59167357  -40.34168621 -552.3558197   -35.88578819]
New Q values:  [ 581.61338522  -40.34168621 -552.3558197   -35.88578819]
Reward: -1  Episode Reward:  -288
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 141.04354088  738.58905263 -416.0953532  -180.6       ]
------
Step:39, Action:South
State  180
Old Q Values:  [-8180.68959572   675.35650772  6195.06469515 -4966.32149798]
New Q values:  [-8180.68959572  1214.0046542   6195.06469515 -4966.32149798]
Reward: -1  Episode Reward:  -289
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  633.45366223 -8695.4397473   3148.20683706 -2601.74710518]
------
Step:40, Action:East
State  261
Old Q Values:  [ 581.61338522  -40.34168621 -552.3558197   -35.88578819]
New Q values:  [ 581.61338522  -40.34168621 2522.99898175  -35.88578819]
Reward: -1  Episode Reward:  -290
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3847.11033025  9148.47103211]
------
Step:41, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2138.91021652 6136.42451315]
New Q values:  [  37.74111519 -168.92307549 2138.91021652 3210.86949979]
Reward: -1  Episode Reward:  -291
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 581.61338522  -40.34168621 2522.99898175  -35.88578819]
------
Step:42, Action:East
State  261
Old Q Values:  [ 581.61338522  -40.34168621 2522.99898175  -35.88578819]
New Q values:  [ 581.61338522  -40.34168621 1971.86044264  -35.88578819]
Reward: -1  Episode Reward:  -292
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2138.91021652 3210.86949979]
------
Step:43, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  4392.04632874    29.18616789]
New Q values:  [   16.82637525 -5807.06396197  4392.04632874   602.63259995]
Reward: -1  Episode Reward:  -293
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 581.61338522  -40.34168621 1971.86044264  -35.88578819]
------
Step:44, Action:East
State  261
Old Q Values:  [ 581.61338522  -40.34168621 1971.86044264  -35.88578819]
New Q values:  [ 581.61338522  -40.34168621 3532.68548669  -35.88578819]
Reward: -1  Episode Reward:  -294
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3847.11033025  9148.47103211]
------
Step:45, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3847.11033025  9148.47103211]
New Q values:  [-2527.46239811 -8521.23367799  3847.11033025  4718.59405885]
Reward: -1  Episode Reward:  -295
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 581.61338522  -40.34168621 3532.68548669  -35.88578819]
------
Step:46, Action:North
State  261
Old Q Values:  [ 581.61338522  -40.34168621 3532.68548669  -35.88578819]
New Q values:  [1455.90895968  -40.34168621 3532.68548669  -35.88578819]
Reward: -1  Episode Reward:  -296
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812  953.53066505 4079.54535198    0.        ]
------
Step:47, Action:East
State  181
Old Q Values:  [ 141.04354088  738.58905263 -416.0953532  -180.6       ]
New Q values:  [ 141.04354088  738.58905263  789.50771688 -180.6       ]
Reward: -1  Episode Reward:  -297
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3188.48619385  -113.03832106  1099.96026581]
------
Step:48, Action:South
State  193
Old Q Values:  [-5922.26708831  3188.48619385  -113.03832106  1099.96026581]
New Q values:  [-5922.26708831  2238.05532748  -113.03832106  1099.96026581]
Reward: -1  Episode Reward:  -298
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2138.91021652 3210.86949979]
------
Step:49, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  4392.04632874   602.63259995]
New Q values:  [   16.82637525 -5807.06396197  4392.04632874  1300.25868599]
Reward: -1  Episode Reward:  -299
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1455.90895968  -40.34168621 3532.68548669  -35.88578819]
------
Step:50, Action:East
State  260
Old Q Values:  [  633.45366223 -8695.4397473   3148.20683706 -2601.74710518]
New Q values:  [  633.45366223 -8695.4397473   2674.26095248 -2601.74710518]
Reward: -1  Episode Reward:  -300
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3847.11033025  4718.59405885]
------
Step:51, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  4392.04632874  1300.25868599]
New Q values:  [   16.82637525 -5807.06396197  4392.04632874  1579.3091204 ]
Reward: -1  Episode Reward:  -301
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1455.90895968  -40.34168621 3532.68548669  -35.88578819]
------
Step:52, Action:East
State  261
Old Q Values:  [1455.90895968  -40.34168621 3532.68548669  -35.88578819]
New Q values:  [1455.90895968  -40.34168621 2828.05241233  -35.88578819]
Reward: -1  Episode Reward:  -302
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3847.11033025  4718.59405885]
------
Step:53, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2138.91021652 3210.86949979]
New Q values:  [  37.74111519 -168.92307549 2138.91021652 2132.16352361]
Reward: -1  Episode Reward:  -303
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1455.90895968  -40.34168621 2828.05241233  -35.88578819]
------
Step:54, Action:East
State  261
Old Q Values:  [1455.90895968  -40.34168621 2828.05241233  -35.88578819]
New Q values:  [1455.90895968  -40.34168621 1772.29402989  -35.88578819]
Reward: -1  Episode Reward:  -304
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2138.91021652 2132.16352361]
------
Step:55, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 2138.91021652 2132.16352361]
New Q values:  [  37.74111519 -168.92307549 2237.56058935 2132.16352361]
Reward: -1  Episode Reward:  -305
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  4517.07614154 -7525.7277781   4608.65500913]
------
Step:56, Action:West
State  288
Old Q Values:  [-5274.16868635  4517.07614154 -7525.7277781   4608.65500913]
New Q values:  [-5274.16868635  4517.07614154 -7525.7277781   2514.13018046]
Reward: -1  Episode Reward:  -306
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2237.56058935 2132.16352361]
------
Step:57, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2237.56058935 2132.16352361]
New Q values:  [  37.74111519 -168.92307549 2237.56058935 1383.95361841]
Reward: -1  Episode Reward:  -307
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1455.90895968  -40.34168621 1772.29402989  -35.88578819]
------
Step:58, Action:East
State  261
Old Q Values:  [1455.90895968  -40.34168621 1772.29402989  -35.88578819]
New Q values:  [1455.90895968  -40.34168621 1379.58578876  -35.88578819]
Reward: -1  Episode Reward:  -308
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2237.56058935 1383.95361841]
------
Step:59, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  4392.04632874  1579.3091204 ]
New Q values:  [   16.82637525 -5807.06396197  3111.34137396  1579.3091204 ]
Reward: -1  Episode Reward:  -309
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  4517.07614154 -7525.7277781   2514.13018046]
------
Step:60, Action:South
State  288
Old Q Values:  [-5274.16868635  4517.07614154 -7525.7277781   2514.13018046]
New Q values:  [-5274.16868635  2981.35329907 -7525.7277781   2514.13018046]
Reward: -301  Episode Reward:  -610
xxxxx
x .gx
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  2981.35329907 -7525.7277781   2514.13018046]
------
Step:61, Action:South
State  288
Old Q Values:  [-5274.16868635  2981.35329907 -7525.7277781   2514.13018046]
New Q values:  [-5274.16868635  1906.34730935 -7525.7277781   2514.13018046]
Reward: -301  Episode Reward:  -911
xxxxx
x .gx
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  1906.34730935 -7525.7277781   2514.13018046]
------
Step:62, Action:West
State  288
Old Q Values:  [-5274.16868635  1906.34730935 -7525.7277781   2514.13018046]
New Q values:  [-5274.16868635  1906.34730935 -7525.7277781   1676.32024899]
Reward: -1  Episode Reward:  -912
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2237.56058935 1383.95361841]
------
Step:63, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3847.11033025  4718.59405885]
New Q values:  [-2527.46239811 -8521.23367799  2110.14832491  4718.59405885]
Reward: -1  Episode Reward:  -913
xxxxx
x ..x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  1906.34730935 -7525.7277781   1676.32024899]
------
Step:64, Action:South
State  288
Old Q Values:  [-5274.16868635  1906.34730935 -7525.7277781   1676.32024899]
New Q values:  [-5274.16868635  1153.84311655 -7525.7277781   1676.32024899]
Reward: -301  Episode Reward:  -1214
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635  1153.84311655 -7525.7277781   1676.32024899]
------
Step:65, Action:South
State  288
Old Q Values:  [-5274.16868635  1153.84311655 -7525.7277781   1676.32024899]
New Q values:  [-5274.16868635   783.83332131 -7525.7277781   1676.32024899]
Reward: -301  Episode Reward:  -1515
xxxxx
x ..x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635   783.83332131 -7525.7277781   1676.32024899]
------
Step:66, Action:West
State  288
Old Q Values:  [-5274.16868635   783.83332131 -7525.7277781   1676.32024899]
New Q values:  [-5274.16868635   783.83332131 -7525.7277781   1341.1962764 ]
Reward: -1  Episode Reward:  -1516
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2237.56058935 1383.95361841]
------
Step:67, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 2237.56058935 1383.95361841]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134  1383.95361841]
Reward: -10001  Episode Reward:  -11517
xxxxx
x ..x
x   x
x  gx
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  633.45366223 -8695.4397473   2674.26095248 -2601.74710518]
------
Step:1, Action:East
State  261
Old Q Values:  [1455.90895968  -40.34168621 1379.58578876  -35.88578819]
New Q values:  [1455.90895968  -40.34168621 1490.63672769  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  3111.34137396  1579.3091204 ]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2110.14832491  4718.59405885]
New Q values:  [-2527.46239811 -8521.23367799  1251.81821288  4718.59405885]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635   783.83332131 -7525.7277781   1341.1962764 ]
------
Step:3, Action:West
State  288
Old Q Values:  [-5274.16868635   783.83332131 -7525.7277781   1341.1962764 ]
New Q values:  [-5274.16868635   783.83332131 -7525.7277781   1951.45672821]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1251.81821288  4718.59405885]
------
Step:4, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134  1383.95361841]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134  1000.17246567]
Reward: -1  Episode Reward:  16
xxxxx
x .gx
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1455.90895968  -40.34168621 1490.63672769  -35.88578819]
------
Step:5, Action:East
State  261
Old Q Values:  [1455.90895968  -40.34168621 1490.63672769  -35.88578819]
New Q values:  [1455.90895968  -40.34168621  895.70643078  -35.88578819]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
x..gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  1000.17246567]
------
Step:6, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134  1000.17246567]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134   836.24167417]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x...x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1455.90895968  -40.34168621  895.70643078  -35.88578819]
------
Step:7, Action:North
State  261
Old Q Values:  [1455.90895968  -40.34168621  895.70643078  -35.88578819]
New Q values:  [1811.62718947  -40.34168621  895.70643078  -35.88578819]
Reward: 9  Episode Reward:  23
xxxxx
x ..x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812  953.53066505 4079.54535198    0.        ]
------
Step:8, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  5244.05902686     0.        ]
New Q values:  [    0.         -5969.29177534  5887.67551964     0.        ]
Reward: 9  Episode Reward:  32
xxxxx
x ..x
x a.x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -6.80462050e+03  1.26155064e+04  0.00000000e+00]
------
Step:9, Action:East
State  192
Old Q Values:  [3.89777037e-01 7.25484461e+03 9.00032827e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 7.25484461e+03 9.31284368e+03 2.91043938e+03]
Reward: 9  Episode Reward:  41
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19024.37456403  8228.24450733   483.97903422 -1455.65174173]
------
Step:10, Action:North
State  208
Old Q Values:  [19024.37456403  8228.24450733   483.97903422 -1455.65174173]
New Q values:  [27305.21575954  8228.24450733   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  50
xxxxx
x .ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  7865.32005299  -180.00807518 65633.55311309]
------
Step:11, Action:West
State  130
Old Q Values:  [36041.91667283  7865.32005299  -180.00807518 65633.55311309]
New Q values:  [ 36041.91667283   7865.32005299   -180.00807518 125559.8316413 ]
Reward: 100009  Episode Reward:  100059
xxxxx
x a x
xg  x
x   x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2162.71750838   352.58479928]
------
Step:1, Action:East
State  121
Old Q Values:  [    0.             0.         -9455.07588649   187.25031635]
New Q values:  [    0.             0.         -8868.36952603   187.25031635]
Reward: -9991  Episode Reward:  -9991
xxxxx
x. gx
x.. x
x...x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 141.04354088  738.58905263  789.50771688 -180.6       ]
------
Step:1, Action:South
State  181
Old Q Values:  [ 141.04354088  738.58905263  789.50771688 -180.6       ]
New Q values:  [ 141.04354088  844.32377789  789.50771688 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1811.62718947  -40.34168621  895.70643078  -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [1811.62718947  -40.34168621  895.70643078  -35.88578819]
New Q values:  [977.34800915 -40.34168621 895.70643078 -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 141.04354088  844.32377789  789.50771688 -180.6       ]
------
Step:3, Action:South
State  183
Old Q Values:  [ 390.42113812  953.53066505 4079.54535198    0.        ]
New Q values:  [ 390.42113812  674.01666877 4079.54535198    0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[977.34800915 -40.34168621 895.70643078 -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [977.34800915 -40.34168621 895.70643078 -35.88578819]
New Q values:  [1614.20280926  -40.34168621  895.70643078  -35.88578819]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xa .x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 390.42113812  674.01666877 4079.54535198    0.        ]
------
Step:5, Action:East
State  181
Old Q Values:  [ 141.04354088  844.32377789  789.50771688 -180.6       ]
New Q values:  [ 141.04354088  844.32377789  986.61968499 -180.6       ]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2238.05532748  -113.03832106  1099.96026581]
------
Step:6, Action:South
State  193
Old Q Values:  [-5922.26708831  2238.05532748  -113.03832106  1099.96026581]
New Q values:  [-5922.26708831  1151.49463324  -113.03832106  1099.96026581]
Reward: 9  Episode Reward:  14
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   836.24167417]
------
Step:7, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  3111.34137396  1579.3091204 ]
New Q values:  [   16.82637525 -5807.06396197  3111.34137396  1115.38449094]
Reward: -1  Episode Reward:  13
xxxxx
x.g.x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1614.20280926  -40.34168621  895.70643078  -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [1614.20280926  -40.34168621  895.70643078  -35.88578819]
New Q values:  [941.0670292  -40.34168621 895.70643078 -35.88578819]
Reward: -1  Episode Reward:  12
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 141.04354088  844.32377789  986.61968499 -180.6       ]
------
Step:9, Action:South
State  181
Old Q Values:  [ 141.04354088  844.32377789  986.61968499 -180.6       ]
New Q values:  [ 141.04354088  619.44961992  986.61968499 -180.6       ]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[941.0670292  -40.34168621 895.70643078 -35.88578819]
------
Step:10, Action:North
State  261
Old Q Values:  [941.0670292  -40.34168621 895.70643078 -35.88578819]
New Q values:  [671.81271718 -40.34168621 895.70643078 -35.88578819]
Reward: -1  Episode Reward:  10
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 141.04354088  619.44961992  986.61968499 -180.6       ]
------
Step:11, Action:East
State  181
Old Q Values:  [ 141.04354088  619.44961992  986.61968499 -180.6       ]
New Q values:  [ 141.04354088  619.44961992  739.49626397 -180.6       ]
Reward: -1  Episode Reward:  9
xxxxx
x..gx
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1151.49463324  -113.03832106  1099.96026581]
------
Step:12, Action:South
State  193
Old Q Values:  [-5922.26708831  1151.49463324  -113.03832106  1099.96026581]
New Q values:  [-5922.26708831   710.87035555  -113.03832106  1099.96026581]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   836.24167417]
------
Step:13, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134   836.24167417]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134   602.6085989 ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x  .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[671.81271718 -40.34168621 895.70643078 -35.88578819]
------
Step:14, Action:East
State  261
Old Q Values:  [671.81271718 -40.34168621 895.70643078 -35.88578819]
New Q values:  [  671.81271718   -40.34168621 -4708.9150155    -35.88578819]
Reward: -10001  Episode Reward:  -9994
xxxxx
x...x
x  .x
x g.x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007 1037.19371299 -120.29354603]
------
Step:1, Action:East
State  109
Old Q Values:  [-241.10880094  769.15738698   31.9495824  -180.6       ]
New Q values:  [ -241.10880094   769.15738698 -5311.13806214  -180.6       ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x g.x
x. .x
x...x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4317.87768647 -180.6        1174.95895244]
------
Step:1, Action:West
State  136
Old Q Values:  [-5281.21195651  3027.53609523 -6245.61866138    65.52854677]
New Q values:  [-5281.21195651  3027.53609523 -6245.61866138    87.78651362]
Reward: 9  Episode Reward:  9
xxxxx
x.agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   187.25031635]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2162.71750838   352.58479928]
New Q values:  [ -253.44886264 -1902.20915811  2162.71750838   457.59203361]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007 1037.19371299 -120.29354603]
------
Step:3, Action:East
State  109
Old Q Values:  [ -241.10880094   769.15738698 -5311.13806214  -180.6       ]
New Q values:  [ -241.10880094   769.15738698 -2068.88012995  -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   187.25031635]
------
Step:4, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2162.71750838   457.59203361]
New Q values:  [ -253.44886264 -1902.20915811  2162.71750838   493.59492734]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007 1037.19371299 -120.29354603]
------
Step:5, Action:East
State  109
Old Q Values:  [ -241.10880094   769.15738698 -2068.88012995  -180.6       ]
New Q values:  [-241.10880094  769.15738698 -771.97695707 -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
x agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   187.25031635]
------
Step:6, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2162.71750838   493.59492734]
New Q values:  [ -253.44886264 -1902.20915811  2162.71750838   507.99608483]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007 1037.19371299 -120.29354603]
------
Step:7, Action:East
State  109
Old Q Values:  [-241.10880094  769.15738698 -771.97695707 -180.6       ]
New Q values:  [-241.10880094  769.15738698 -253.21568792 -180.6       ]
Reward: -1  Episode Reward:  13
xxxxx
x agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   187.25031635]
------
Step:8, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2162.71750838   507.99608483]
New Q values:  [ -253.44886264 -1902.20915811  2162.71750838   513.75654783]
Reward: -1  Episode Reward:  12
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007 1037.19371299 -120.29354603]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558   14.23378158 1119.89047676 -252.78192178]
New Q values:  [-252.35169558   14.23378158 1111.16340733 -252.78192178]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2212.69072208   311.87331119]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   2212.69072208   311.87331119]
New Q values:  [ -281.736      -9545.4473624   2179.83959477   311.87331119]
Reward: -1  Episode Reward:  10
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4317.87768647 -180.6        1174.95895244]
------
Step:11, Action:South
State  138
Old Q Values:  [-139.45925583 4317.87768647 -180.6        1174.95895244]
New Q values:  [ -139.45925583 10259.41815861  -180.6         1174.95895244]
Reward: -1  Episode Reward:  9
xxxxx
x   x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.84428903e+04 8.67557607e+03 2.24233123e+03 3.52184257e+00]
------
Step:12, Action:North
State  210
Old Q Values:  [2.84428903e+04 8.67557607e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1.44543816e+04 8.67557607e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  8
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -139.45925583 10259.41815861  -180.6         1174.95895244]
------
Step:13, Action:South
State  138
Old Q Values:  [ -139.45925583 10259.41815861  -180.6         1174.95895244]
New Q values:  [-139.45925583 8439.48173133 -180.6        1174.95895244]
Reward: -1  Episode Reward:  7
xxxxx
x   x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.44543816e+04 8.67557607e+03 2.24233123e+03 3.52184257e+00]
------
Step:14, Action:North
State  216
Old Q Values:  [ 1449.66632106  4146.92700389 -8489.43729461   531.09593838]
New Q values:  [ 3111.11104782  4146.92700389 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  6
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 8439.48173133 -180.6        1174.95895244]
------
Step:15, Action:South
State  138
Old Q Values:  [-139.45925583 8439.48173133 -180.6        1174.95895244]
New Q values:  [-139.45925583 4619.2707937  -180.6        1174.95895244]
Reward: -1  Episode Reward:  5
xxxxx
x   x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3111.11104782  4146.92700389 -8489.43729461   531.09593838]
------
Step:16, Action:South
State  208
Old Q Values:  [27305.21575954  8228.24450733   483.97903422 -1455.65174173]
New Q values:  [27305.21575954  3882.13482139   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  14
xxxxx
x   x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635   783.83332131 -7525.7277781   1951.45672821]
------
Step:17, Action:West
State  288
Old Q Values:  [-5274.16868635   783.83332131 -7525.7277781   1951.45672821]
New Q values:  [-5274.16868635   783.83332131 -7525.7277781    966.76527096]
Reward: 9  Episode Reward:  23
xxxxx
x  gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   602.6085989 ]
------
Step:18, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1251.81821288  4718.59405885]
New Q values:  [-2527.46239811 -8521.23367799  1251.81821288 13719.22742491]
Reward: 9  Episode Reward:  32
xxxxx
x g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39421.2993379   2256.66526474  6585.91095232  1875.31501677]
------
Step:19, Action:North
State  257
Old Q Values:  [39421.2993379   2256.66526474  6585.91095232  1875.31501677]
New Q values:  [39292.26537866  2256.66526474  6585.91095232  1875.31501677]
Reward: 9  Episode Reward:  41
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 37619.68428217 70022.57475256     0.        ]
------
Step:20, Action:North
State  183
Old Q Values:  [ 390.42113812  674.01666877 4079.54535198    0.        ]
New Q values:  [ 466.72656914  674.01666877 4079.54535198    0.        ]
Reward: -1  Episode Reward:  40
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007 1037.19371299 -120.29354603]
------
Step:21, Action:East
State  99
Old Q Values:  [     0.          54146.33549678 121314.428556        0.        ]
New Q values:  [    0.         54146.33549678 87826.18181846     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  43132.21661867 131003.36798688]
------
Step:22, Action:West
State  122
Old Q Values:  [ -281.736      -9545.4473624   2179.83959477   311.87331119]
New Q values:  [ -281.736      -9545.4473624   2179.83959477   275.43590235]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   504.28859293  -180.6       ]
------
Step:23, Action:East
State  99
Old Q Values:  [    0.         54146.33549678 87826.18181846     0.        ]
New Q values:  [    0.         54146.33549678 74430.88312345     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          4272.38349051  43132.21661867 131003.36798688]
------
Step:24, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2235.60701632    58.89272124]
New Q values:  [-9594.56523706 -8069.05606225  2235.60701632   253.70430459]
Reward: -1  Episode Reward:  36
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  769.15738698 -253.21568792 -180.6       ]
------
Step:25, Action:South
State  99
Old Q Values:  [    0.         54146.33549678 74430.88312345     0.        ]
New Q values:  [    0.         45176.27984221 74430.88312345     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 37619.68428217 70022.57475256     0.        ]
------
Step:26, Action:North
State  181
Old Q Values:  [ 141.04354088  619.44961992  739.49626397 -180.6       ]
New Q values:  [ 286.56463245  619.44961992  739.49626397 -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  769.15738698 -253.21568792 -180.6       ]
------
Step:27, Action:South
State  99
Old Q Values:  [    0.         45176.27984221 74430.88312345     0.        ]
New Q values:  [    0.         41588.25758038 74430.88312345     0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 37619.68428217 70022.57475256     0.        ]
------
Step:28, Action:North
State  183
Old Q Values:  [ 466.72656914  674.01666877 4079.54535198    0.        ]
New Q values:  [ 497.24874155  674.01666877 4079.54535198    0.        ]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007 1037.19371299 -120.29354603]
------
Step:29, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   504.28859293  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   855.06731561  -180.6       ]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2179.83959477   275.43590235]
------
Step:30, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   2179.83959477   275.43590235]
New Q values:  [ -281.736      -9545.4473624   2257.11707602   275.43590235]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4619.2707937  -180.6        1174.95895244]
------
Step:31, Action:South
State  136
Old Q Values:  [-5281.21195651  3027.53609523 -6245.61866138    87.78651362]
New Q values:  [-5281.21195651  9401.97916595 -6245.61866138    87.78651362]
Reward: -1  Episode Reward:  29
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27305.21575954  3882.13482139   483.97903422 -1455.65174173]
------
Step:32, Action:North
State  208
Old Q Values:  [27305.21575954  3882.13482139   483.97903422 -1455.65174173]
New Q values:  [13742.0800536   3882.13482139   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  28
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  9401.97916595 -6245.61866138    87.78651362]
------
Step:33, Action:South
State  136
Old Q Values:  [-5281.21195651  9401.97916595 -6245.61866138    87.78651362]
New Q values:  [-5281.21195651  7882.81568246 -6245.61866138    87.78651362]
Reward: -1  Episode Reward:  27
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13742.0800536   3882.13482139   483.97903422 -1455.65174173]
------
Step:34, Action:South
State  208
Old Q Values:  [13742.0800536   3882.13482139   483.97903422 -1455.65174173]
New Q values:  [13742.0800536   1842.28350984   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  26
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635   783.83332131 -7525.7277781    966.76527096]
------
Step:35, Action:West
State  288
Old Q Values:  [-5274.16868635   783.83332131 -7525.7277781    966.76527096]
New Q values:  [-5274.16868635   783.83332131 -7525.7277781   4501.87433586]
Reward: -1  Episode Reward:  25
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1251.81821288 13719.22742491]
------
Step:36, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1251.81821288 13719.22742491]
New Q values:  [-2527.46239811 -8521.23367799  1251.81821288  5688.63478512]
Reward: -1  Episode Reward:  24
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  671.81271718   -40.34168621 -4708.9150155    -35.88578819]
------
Step:37, Action:North
State  260
Old Q Values:  [  633.45366223 -8695.4397473   2674.26095248 -2601.74710518]
New Q values:  [ 2111.30087344 -8695.4397473   2674.26095248 -2601.74710518]
Reward: -1  Episode Reward:  23
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-8180.68959572  1214.0046542   6195.06469515 -4966.32149798]
------
Step:38, Action:East
State  176
Old Q Values:  [76485.61294353  1327.79507613 11113.76513966     0.        ]
New Q values:  [76485.61294353  1327.79507613 67244.75915935     0.        ]
Reward: 100009  Episode Reward:  100032
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1251.81821288  5688.63478512]
------
Step:1, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  3111.34137396  1115.38449094]
New Q values:  [   16.82637525 -5807.06396197  3111.34137396  1253.83208212]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2111.30087344 -8695.4397473   2674.26095248 -2601.74710518]
------
Step:2, Action:East
State  260
Old Q Values:  [ 2111.30087344 -8695.4397473   2674.26095248 -2601.74710518]
New Q values:  [ 2111.30087344 -8695.4397473   2775.69481653 -2601.74710518]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1251.81821288  5688.63478512]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1251.81821288  5688.63478512]
New Q values:  [-2527.46239811 -8521.23367799  1856.68958591  5688.63478512]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x. .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635   783.83332131 -7525.7277781   4501.87433586]
------
Step:4, Action:South
State  288
Old Q Values:  [-5274.16868635   783.83332131 -7525.7277781   4501.87433586]
New Q values:  [-5274.16868635 -4516.50437072 -7525.7277781   4501.87433586]
Reward: -10301  Episode Reward:  -10284
xxxxx
x...x
x. .x
x  gx
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   855.06731561  -180.6       ]
------
Step:1, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   855.06731561  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684  1024.56204905  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2257.11707602   275.43590235]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   2257.11707602   275.43590235]
New Q values:  [ -281.736      -9545.4473624   2294.02806852   275.43590235]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4619.2707937  -180.6        1174.95895244]
------
Step:3, Action:South
State  136
Old Q Values:  [-5281.21195651  7882.81568246 -6245.61866138    87.78651362]
New Q values:  [-5281.21195651  4402.60437415 -6245.61866138    87.78651362]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3111.11104782  4146.92700389 -8489.43729461   531.09593838]
------
Step:4, Action:South
State  216
Old Q Values:  [ 3111.11104782  4146.92700389 -8489.43729461   531.09593838]
New Q values:  [ 3111.11104782  3014.73310231 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -4516.50437072 -7525.7277781   4501.87433586]
------
Step:5, Action:West
State  288
Old Q Values:  [-5274.16868635 -4516.50437072 -7525.7277781   4501.87433586]
New Q values:  [-5274.16868635 -4516.50437072 -7525.7277781   3512.74016988]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1856.68958591  5688.63478512]
------
Step:6, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134   602.6085989 ]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134   447.98725471]
Reward: 9  Episode Reward:  54
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  671.81271718   -40.34168621 -4708.9150155    -35.88578819]
------
Step:7, Action:North
State  257
Old Q Values:  [39292.26537866  2256.66526474  6585.91095232  1875.31501677]
New Q values:  [39234.65179496  2256.66526474  6585.91095232  1875.31501677]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xag x
x   x
xxxxx
Step:8, Action:West
State  181
Old Q Values:  [ 286.56463245  619.44961992  739.49626397 -180.6       ]
New Q values:  [286.56463245 619.44961992 739.49626397 -30.99112081]
Reward: -301  Episode Reward:  -248
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[286.56463245 619.44961992 739.49626397 -30.99112081]
------
Step:9, Action:East
State  177
Old Q Values:  [78394.48547832 37619.68428217 70022.57475256     0.        ]
New Q values:  [78394.48547832 37619.68428217 88344.41798077     0.        ]
Reward: 100009  Episode Reward:  99761
xxxxx
x  gx
x a x
x   x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007 1037.19371299 -120.29354603]
------
Step:1, Action:East
State  107
Old Q Values:  [-252.35169558   14.23378158 1111.16340733 -252.78192178]
New Q values:  [-252.35169558   14.23378158 1098.68061545 -252.78192178]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2162.71750838   513.75654783]
------
Step:2, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2162.71750838   513.75654783]
New Q values:  [ -253.44886264 -1902.20915811  2256.26824146   513.75654783]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 4619.2707937  -180.6        1174.95895244]
------
Step:3, Action:South
State  138
Old Q Values:  [-139.45925583 4619.2707937  -180.6        1174.95895244]
New Q values:  [-139.45925583  -24.26766644 -180.6        1174.95895244]
Reward: -9991  Episode Reward:  -9973
xxxxx
x   x
x..gx
x.. x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  671.81271718   -40.34168621 -4708.9150155    -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [  671.81271718   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [ 1497.98869246   -40.34168621 -4708.9150155    -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 497.24874155  674.01666877 4079.54535198    0.        ]
------
Step:2, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  5887.67551964     0.        ]
New Q values:  [    0.         -5969.29177534  4969.94460373     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  8.69824799e+03  1.20371620e+03]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  8.69824799e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  7.82101366e+03  1.20371620e+03]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.44543816e+04 8.67557607e+03 2.24233123e+03 3.52184257e+00]
------
Step:4, Action:North
State  208
Old Q Values:  [13742.0800536   1842.28350984   483.97903422 -1455.65174173]
New Q values:  [43170.18151383  1842.28350984   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   7865.32005299   -180.00807518 125559.8316413 ]
------
Step:5, Action:West
State  130
Old Q Values:  [ 36041.91667283   7865.32005299   -180.00807518 125559.8316413 ]
New Q values:  [36041.91667283  7865.32005299  -180.00807518 89079.81083083]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29501594e+05]
------
Step:6, Action:West
State  121
Old Q Values:  [    0.             0.         -8868.36952603   187.25031635]
New Q values:  [    0.             0.         -8868.36952603   311.04734264]
Reward: 9  Episode Reward:  54
xxxxx
xa gx
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  769.15738698 -253.21568792 -180.6       ]
------
Step:7, Action:South
State  109
Old Q Values:  [-241.10880094  769.15738698 -253.21568792 -180.6       ]
New Q values:  [-241.10880094  596.36686023 -253.21568792 -180.6       ]
Reward: -1  Episode Reward:  53
xxxxx
x g x
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  9.84673294 681.98074137 964.34635147 940.95197235]
------
Step:8, Action:East
State  189
Old Q Values:  [  9.84673294 681.98074137 964.34635147 940.95197235]
New Q values:  [   9.84673294  681.98074137 1475.9137954   940.95197235]
Reward: -1  Episode Reward:  52
xxxxx
x  gx
x a x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
------
Step:9, Action:South
State  193
Old Q Values:  [-5922.26708831   710.87035555  -113.03832106  1099.96026581]
New Q values:  [-5922.26708831 60424.14431863  -113.03832106  1099.96026581]
Reward: 100009  Episode Reward:  100061
xxxxx
x   x
x  gx
x a x
xxxxx
xxxxx
x.a.x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2294.02806852   275.43590235]
------
Step:1, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2256.26824146   513.75654783]
New Q values:  [ -253.44886264 -1902.20915811  1260.39498232   513.75654783]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  -24.26766644 -180.6        1174.95895244]
------
Step:2, Action:West
State  138
Old Q Values:  [-139.45925583  -24.26766644 -180.6        1174.95895244]
New Q values:  [-139.45925583  -24.26766644 -180.6        1157.59200153]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   2294.02806852   275.43590235]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   2294.02806852   275.43590235]
New Q values:  [ -281.736      -9545.4473624   1264.28882787   275.43590235]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583  -24.26766644 -180.6        1157.59200153]
------
Step:4, Action:West
State  138
Old Q Values:  [-139.45925583  -24.26766644 -180.6        1157.59200153]
New Q values:  [-139.45925583  -24.26766644 -180.6         841.72344897]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624   1264.28882787   275.43590235]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624   1264.28882787   275.43590235]
New Q values:  [ -281.736      -9545.4473624    757.63256584   275.43590235]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
x.g.x
x. .x
xxxxx
Step:6, Action:East
State  138
Old Q Values:  [-139.45925583  -24.26766644 -180.6         841.72344897]
New Q values:  [-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  8.41723449e+02]
Reward: -301  Episode Reward:  -296
xxxxx
x. ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  8.41723449e+02]
------
Step:7, Action:West
State  136
Old Q Values:  [-5281.21195651  4402.60437415 -6245.61866138    87.78651362]
New Q values:  [-5281.21195651  4402.60437415 -6245.61866138   127.82880824]
Reward: -1  Episode Reward:  -297
xxxxx
x.agx
x...x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:8, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2235.60701632   253.70430459]
New Q values:  [-9594.56523706 -8069.05606225  2235.60701632   285.79177991]
Reward: 9  Episode Reward:  -288
xxxxx
xag x
x...x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  596.36686023 -253.21568792 -180.6       ]
------
Step:9, Action:South
State  109
Old Q Values:  [-241.10880094  596.36686023 -253.21568792 -180.6       ]
New Q values:  [-241.10880094  465.79562328 -253.21568792 -180.6       ]
Reward: 9  Episode Reward:  -279
xxxxx
x  gx
xa..x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[286.56463245 619.44961992 739.49626397 -30.99112081]
------
Step:10, Action:East
State  181
Old Q Values:  [286.56463245 619.44961992 739.49626397 -30.99112081]
New Q values:  [286.56463245 619.44961992 509.08715778 -30.99112081]
Reward: 9  Episode Reward:  -270
xxxxx
x g x
x a.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   692.96217398   174.55451539     0.        ]
------
Step:11, Action:South
State  196
Old Q Values:  [-2469.90645144   692.96217398   174.55451539     0.        ]
New Q values:  [-2469.90645144  1209.98728178   174.55451539     0.        ]
Reward: -1  Episode Reward:  -271
xxxxx
xg  x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  3111.34137396  1253.83208212]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1856.68958591  5688.63478512]
New Q values:  [-2527.46239811 -8521.23367799  1801.89788533  5688.63478512]
Reward: 9  Episode Reward:  -262
xxxxx
x g x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -4516.50437072 -7525.7277781   3512.74016988]
------
Step:13, Action:West
State  288
Old Q Values:  [-5274.16868635 -4516.50437072 -7525.7277781   3512.74016988]
New Q values:  [-5274.16868635 -4516.50437072 -7525.7277781   1538.89224437]
Reward: -1  Episode Reward:  -263
xxxxx
x  gx
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   447.98725471]
------
Step:14, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  3111.34137396  1253.83208212]
New Q values:  [   16.82637525 -5807.06396197  3111.34137396   956.32944059]
Reward: 9  Episode Reward:  -254
xxxxx
x g x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1497.98869246   -40.34168621 -4708.9150155    -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [ 1497.98869246   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [  784.43036296   -40.34168621 -4708.9150155    -35.88578819]
Reward: -1  Episode Reward:  -255
xxxxx
x   x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[286.56463245 619.44961992 509.08715778 -30.99112081]
------
Step:16, Action:South
State  177
Old Q Values:  [78394.48547832 37619.68428217 88344.41798077     0.        ]
New Q values:  [78394.48547832 26817.66925136 88344.41798077     0.        ]
Reward: -1  Episode Reward:  -256
xxxxx
x   x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39234.65179496  2256.66526474  6585.91095232  1875.31501677]
------
Step:17, Action:North
State  261
Old Q Values:  [  784.43036296   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [ 1537.03575078   -40.34168621 -4708.9150155    -35.88578819]
Reward: -1  Episode Reward:  -257
xxxxx
x   x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 497.24874155  674.01666877 4079.54535198    0.        ]
------
Step:18, Action:East
State  183
Old Q Values:  [ 497.24874155  674.01666877 4079.54535198    0.        ]
New Q values:  [ 497.24874155  674.01666877 5415.87004968    0.        ]
Reward: -1  Episode Reward:  -258
xxxxx
x   x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -6.80462050e+03  1.26155064e+04  0.00000000e+00]
------
Step:19, Action:East
State  195
Old Q Values:  [  38.85388605 2677.89620798 8791.36098619 1101.59744825]
New Q values:  [3.88538861e+01 2.67789621e+03 6.78582589e+04 1.10159745e+03]
Reward: 100009  Episode Reward:  99751
xxxxx
x   x
x  ax
x  gx
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   447.98725471]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1801.89788533  5688.63478512]
New Q values:  [-2527.46239811 -8521.23367799  1801.89788533  2741.96463928]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1537.03575078   -40.34168621 -4708.9150155    -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [ 1537.03575078   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [  806.04918629   -40.34168621 -4708.9150155    -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[286.56463245 619.44961992 509.08715778 -30.99112081]
------
Step:3, Action:South
State  180
Old Q Values:  [-8180.68959572  1214.0046542   6195.06469515 -4966.32149798]
New Q values:  [-8180.68959572  1317.71030664  6195.06469515 -4966.32149798]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xg..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2111.30087344 -8695.4397473   2775.69481653 -2601.74710518]
------
Step:4, Action:East
State  260
Old Q Values:  [ 2111.30087344 -8695.4397473   2775.69481653 -2601.74710518]
New Q values:  [ 2111.30087344 -8695.4397473   1932.26731839 -2601.74710518]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x ..x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1801.89788533  2741.96463928]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1801.89788533  2741.96463928]
New Q values:  [-2527.46239811 -8521.23367799  1181.82682744  2741.96463928]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x ..x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -4516.50437072 -7525.7277781   1538.89224437]
------
Step:6, Action:South
State  288
Old Q Values:  [-5274.16868635 -4516.50437072 -7525.7277781   1538.89224437]
New Q values:  [-5274.16868635 -7525.53407498 -7525.7277781   1538.89224437]
Reward: -10301  Episode Reward:  -10286
xxxxx
x...x
x ..x
x  gx
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   447.98725471]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1181.82682744  2741.96463928]
New Q values:  [-2527.46239811 -8521.23367799  1181.82682744  1344.0006116 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  806.04918629   -40.34168621 -4708.9150155    -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [  806.04918629   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [  513.65456049   -40.34168621 -4708.9150155    -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[286.56463245 619.44961992 509.08715778 -30.99112081]
------
Step:3, Action:South
State  181
Old Q Values:  [286.56463245 619.44961992 509.08715778 -30.99112081]
New Q values:  [286.56463245 401.27621611 509.08715778 -30.99112081]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  513.65456049   -40.34168621 -4708.9150155    -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [  513.65456049   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [  357.58797153   -40.34168621 -4708.9150155    -35.88578819]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[286.56463245 401.27621611 509.08715778 -30.99112081]
------
Step:5, Action:East
State  181
Old Q Values:  [286.56463245 401.27621611 509.08715778 -30.99112081]
New Q values:  [286.56463245 401.27621611 572.03104765 -30.99112081]
Reward: 9  Episode Reward:  25
xxxxx
x.g.x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1209.98728178   174.55451539     0.        ]
------
Step:6, Action:South
State  192
Old Q Values:  [3.89777037e-01 7.25484461e+03 9.31284368e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 3.30453803e+03 9.31284368e+03 2.91043938e+03]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1181.82682744  1344.0006116 ]
------
Step:7, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  3111.34137396   956.32944059]
New Q values:  [   16.82637525 -5807.06396197  3111.34137396   489.20816769]
Reward: -1  Episode Reward:  23
xxxxx
x.g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  357.58797153   -40.34168621 -4708.9150155    -35.88578819]
------
Step:8, Action:North
State  260
Old Q Values:  [ 2111.30087344 -8695.4397473   1932.26731839 -2601.74710518]
New Q values:  [ 2702.43975792 -8695.4397473   1932.26731839 -2601.74710518]
Reward: -1  Episode Reward:  22
xxxxx
xg..x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-8180.68959572  1317.71030664  6195.06469515 -4966.32149798]
------
Step:9, Action:East
State  180
Old Q Values:  [-8180.68959572  1317.71030664  6195.06469515 -4966.32149798]
New Q values:  [-8180.68959572  1317.71030664  5271.27898154 -4966.32149798]
Reward: -1  Episode Reward:  21
xxxxx
x...x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.30453803e+03 9.31284368e+03 2.91043938e+03]
------
Step:10, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.30453803e+03 9.31284368e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 3.30453803e+03 1.66815919e+04 2.91043938e+03]
Reward: 9  Episode Reward:  30
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[43170.18151383  1842.28350984   483.97903422 -1455.65174173]
------
Step:11, Action:North
State  208
Old Q Values:  [43170.18151383  1842.28350984   483.97903422 -1455.65174173]
New Q values:  [49199.76812627  1842.28350984   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  39
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[  8775.70846068  24033.32294218  -8652.84       106420.98506913]
------
Step:12, Action:South
State  128
Old Q Values:  [  8775.70846068  24033.32294218  -8652.84       106420.98506913]
New Q values:  [  8775.70846068  24372.65961475  -8652.84       106420.98506913]
Reward: -1  Episode Reward:  38
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[49199.76812627  1842.28350984   483.97903422 -1455.65174173]
------
Step:13, Action:North
State  208
Old Q Values:  [49199.76812627  1842.28350984   483.97903422 -1455.65174173]
New Q values:  [51605.60277125  1842.28350984   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  37
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[  8775.70846068  24372.65961475  -8652.84       106420.98506913]
------
Step:14, Action:South
State  128
Old Q Values:  [  8775.70846068  24372.65961475  -8652.84       106420.98506913]
New Q values:  [  8775.70846068  25230.14467728  -8652.84       106420.98506913]
Reward: -1  Episode Reward:  36
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[51605.60277125  1842.28350984   483.97903422 -1455.65174173]
------
Step:15, Action:North
State  208
Old Q Values:  [51605.60277125  1842.28350984   483.97903422 -1455.65174173]
New Q values:  [52567.93662924  1842.28350984   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  35
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[  8775.70846068  25230.14467728  -8652.84       106420.98506913]
------
Step:16, Action:South
State  128
Old Q Values:  [  8775.70846068  25230.14467728  -8652.84       106420.98506913]
New Q values:  [  8775.70846068  25861.83885968  -8652.84       106420.98506913]
Reward: -1  Episode Reward:  34
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[52567.93662924  1842.28350984   483.97903422 -1455.65174173]
------
Step:17, Action:North
State  208
Old Q Values:  [52567.93662924  1842.28350984   483.97903422 -1455.65174173]
New Q values:  [52952.87017244  1842.28350984   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  33
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[  8775.70846068  25861.83885968  -8652.84       106420.98506913]
------
Step:18, Action:South
State  128
Old Q Values:  [  8775.70846068  25861.83885968  -8652.84       106420.98506913]
New Q values:  [  8775.70846068  26229.9965956   -8652.84       106420.98506913]
Reward: -1  Episode Reward:  32
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[52952.87017244  1842.28350984   483.97903422 -1455.65174173]
------
Step:19, Action:North
State  208
Old Q Values:  [52952.87017244  1842.28350984   483.97903422 -1455.65174173]
New Q values:  [47904.49131822  1842.28350984   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  31
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  7865.32005299  -180.00807518 89079.81083083]
------
Step:20, Action:West
State  128
Old Q Values:  [  8775.70846068  26229.9965956   -8652.84       106420.98506913]
New Q values:  [ 8775.70846068 26229.9965956  -8652.84       72269.26637965]
Reward: 9  Episode Reward:  40
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[    0.         11059.61439394  6789.02994987 98984.90784   ]
------
Step:21, Action:South
State  114
Old Q Values:  [  -180.6          4272.38349051  43132.21661867 131003.36798688]
New Q values:  [  -180.6          6712.83097384  43132.21661867 131003.36798688]
Reward: -1  Episode Reward:  39
xxxxx
x.  x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.30453803e+03 1.66815919e+04 2.91043938e+03]
------
Step:22, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  7.82101366e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  7.46411993e+03  1.20371620e+03]
Reward: -1  Episode Reward:  38
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.44543816e+04 8.67557607e+03 2.24233123e+03 3.52184257e+00]
------
Step:23, Action:North
State  208
Old Q Values:  [47904.49131822  1842.28350984   483.97903422 -1455.65174173]
New Q values:  [45885.13977654  1842.28350984   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  37
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  7865.32005299  -180.00807518 89079.81083083]
------
Step:24, Action:West
State  130
Old Q Values:  [36041.91667283  7865.32005299  -180.00807518 89079.81083083]
New Q values:  [36041.91667283  7865.32005299  -180.00807518 74932.33472839]
Reward: -1  Episode Reward:  36
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  43132.21661867 131003.36798688]
------
Step:25, Action:West
State  112
Old Q Values:  [    0.         11059.61439394  6789.02994987 98984.90784   ]
New Q values:  [    0.         11059.61439394  6789.02994987 99599.363136  ]
Reward: 100009  Episode Reward:  100045
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1181.82682744  1344.0006116 ]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1181.82682744  1344.0006116 ]
New Q values:  [-2527.46239811 -8521.23367799   939.79840429  1344.0006116 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-5274.16868635 -7525.53407498 -7525.7277781   1538.89224437]
------
Step:2, Action:North
State  288
Old Q Values:  [-5274.16868635 -7525.53407498 -7525.7277781   1538.89224437]
New Q values:  [ 2232.04699334 -7525.53407498 -7525.7277781   1538.89224437]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.44543816e+04 8.67557607e+03 2.24233123e+03 3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [1.44543816e+04 8.67557607e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [2.82668530e+04 8.67557607e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  7865.32005299  -180.00807518 74932.33472839]
------
Step:4, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  8.41723449e+02]
New Q values:  [-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  7.20207874e+02]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.. x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1260.39498232   513.75654783]
------
Step:5, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1260.39498232   513.75654783]
New Q values:  [ -253.44886264 -1902.20915811   719.62035521   513.75654783]
Reward: -1  Episode Reward:  35
xxxxx
x. ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  7.20207874e+02]
------
Step:6, Action:West
State  130
Old Q Values:  [36041.91667283  7865.32005299  -180.00807518 74932.33472839]
New Q values:  [36041.91667283  7865.32005299  -180.00807518 69273.34428742]
Reward: -1  Episode Reward:  34
xxxxx
x.a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  43132.21661867 131003.36798688]
------
Step:7, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   719.62035521   513.75654783]
New Q values:  [ -253.44886264 -1902.20915811   719.62035521   540.50680377]
Reward: 9  Episode Reward:  43
xxxxx
xa  x
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   14.23378158 1098.68061545 -252.78192178]
------
Step:8, Action:East
State  105
Old Q Values:  [ -180.6            6.72320144 -1242.00860919     0.        ]
New Q values:  [-180.6           6.72320144 -404.08924089    0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:9, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   719.62035521   540.50680377]
New Q values:  [ -253.44886264 -1902.20915811   719.62035521   545.20690614]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   14.23378158 1098.68061545 -252.78192178]
------
Step:10, Action:East
State  105
Old Q Values:  [-180.6           6.72320144 -404.08924089    0.        ]
New Q values:  [-180.6           6.72320144  -68.92149356    0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:11, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2235.60701632   285.79177991]
New Q values:  [-9594.56523706 -8069.05606225  2235.60701632   115.7336724 ]
Reward: -1  Episode Reward:  39
xxxxx
xag x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6           6.72320144  -68.92149356    0.        ]
------
Step:12, Action:South
State  99
Old Q Values:  [    0.         41588.25758038 74430.88312345     0.        ]
New Q values:  [    0.         43144.02842638 74430.88312345     0.        ]
Reward: 9  Episode Reward:  48
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 26817.66925136 88344.41798077     0.        ]
------
Step:13, Action:North
State  183
Old Q Values:  [ 497.24874155  674.01666877 5415.87004968    0.        ]
New Q values:  [ 509.45761052  674.01666877 5415.87004968    0.        ]
Reward: -1  Episode Reward:  47
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007 1037.19371299 -120.29354603]
------
Step:14, Action:East
State  111
Old Q Values:  [-177.44732869   79.64232007 1037.19371299 -120.29354603]
New Q values:  [-177.44732869   79.64232007  630.16359176 -120.29354603]
Reward: -1  Episode Reward:  46
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   719.62035521   545.20690614]
------
Step:15, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   719.62035521   545.20690614]
New Q values:  [ -253.44886264 -1902.20915811   503.31050437   545.20690614]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  7.20207874e+02]
------
Step:16, Action:West
State  136
Old Q Values:  [-5281.21195651  4402.60437415 -6245.61866138   127.82880824]
New Q values:  [-5281.21195651  4402.60437415 -6245.61866138   143.84572609]
Reward: -1  Episode Reward:  44
xxxxx
x agx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:17, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2235.60701632   115.7336724 ]
New Q values:  [-9594.56523706 -8069.05606225  2235.60701632   185.43215594]
Reward: -1  Episode Reward:  43
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  465.79562328 -253.21568792 -180.6       ]
------
Step:18, Action:South
State  109
Old Q Values:  [-241.10880094  465.79562328 -253.21568792 -180.6       ]
New Q values:  [-241.10880094  357.32756361 -253.21568792 -180.6       ]
Reward: -1  Episode Reward:  42
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[286.56463245 401.27621611 572.03104765 -30.99112081]
------
Step:19, Action:East
State  177
Old Q Values:  [78394.48547832 26817.66925136 88344.41798077     0.        ]
New Q values:  [ 78394.48547832  26817.66925136 113470.4104879       0.        ]
Reward: 100009  Episode Reward:  100051
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007  630.16359176 -120.29354603]
------
Step:1, Action:East
State  107
Old Q Values:  [-252.35169558   14.23378158 1098.68061545 -252.78192178]
New Q values:  [-252.35169558   14.23378158  672.16201593 -252.78192178]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    757.63256584   275.43590235]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2235.60701632   185.43215594]
New Q values:  [-9594.56523706 -8069.05606225  2220.42411878   185.43215594]
Reward: 9  Episode Reward:  18
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  4402.60437415 -6245.61866138   143.84572609]
------
Step:3, Action:South
State  136
Old Q Values:  [-5281.21195651  4402.60437415 -6245.61866138   143.84572609]
New Q values:  [-5281.21195651  2693.77506401 -6245.61866138   143.84572609]
Reward: -1  Episode Reward:  17
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3111.11104782  3014.73310231 -8489.43729461   531.09593838]
------
Step:4, Action:North
State  216
Old Q Values:  [ 3111.11104782  3014.73310231 -8489.43729461   531.09593838]
New Q values:  [ 1459.90678141  3014.73310231 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  7.20207874e+02]
------
Step:5, Action:West
State  136
Old Q Values:  [-5281.21195651  2693.77506401 -6245.61866138   143.84572609]
New Q values:  [-5281.21195651  2693.77506401 -6245.61866138   723.06552607]
Reward: -1  Episode Reward:  15
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2220.42411878   185.43215594]
------
Step:6, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2220.42411878   185.43215594]
New Q values:  [-9594.56523706 -8069.05606225  1695.70216671   185.43215594]
Reward: -1  Episode Reward:  14
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  2693.77506401 -6245.61866138   723.06552607]
------
Step:7, Action:South
State  136
Old Q Values:  [-5281.21195651  2693.77506401 -6245.61866138   723.06552607]
New Q values:  [-5281.21195651  1981.3299563  -6245.61866138   723.06552607]
Reward: -1  Episode Reward:  13
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1459.90678141  3014.73310231 -8489.43729461   531.09593838]
------
Step:8, Action:South
State  216
Old Q Values:  [ 1459.90678141  3014.73310231 -8489.43729461   531.09593838]
New Q values:  [ 1459.90678141  1880.90733893 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  22
xxxxx
x   x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2232.04699334 -7525.53407498 -7525.7277781   1538.89224437]
------
Step:9, Action:North
State  288
Old Q Values:  [ 2232.04699334 -7525.53407498 -7525.7277781   1538.89224437]
New Q values:  [ 1456.49099902 -7525.53407498 -7525.7277781   1538.89224437]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1459.90678141  1880.90733893 -8489.43729461   531.09593838]
------
Step:10, Action:South
State  210
Old Q Values:  [2.82668530e+04 8.67557607e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [2.82668530e+04 3.93129810e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1456.49099902 -7525.53407498 -7525.7277781   1538.89224437]
------
Step:11, Action:North
State  288
Old Q Values:  [ 1456.49099902 -7525.53407498 -7525.7277781   1538.89224437]
New Q values:  [ 9062.05231232 -7525.53407498 -7525.7277781   1538.89224437]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.82668530e+04 3.93129810e+03 2.24233123e+03 3.52184257e+00]
------
Step:12, Action:North
State  210
Old Q Values:  [2.82668530e+04 3.93129810e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1.15222036e+04 3.93129810e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  18
xxxxx
x  ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  7.20207874e+02]
------
Step:13, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  7.20207874e+02]
New Q values:  [-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  4.51045222e+02]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   503.31050437   545.20690614]
------
Step:14, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   503.31050437   545.20690614]
New Q values:  [ -253.44886264 -1902.20915811   503.31050437   419.13136724]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   14.23378158  672.16201593 -252.78192178]
------
Step:15, Action:East
State  107
Old Q Values:  [-252.35169558   14.23378158  672.16201593 -252.78192178]
New Q values:  [-252.35169558   14.23378158  419.25795768 -252.78192178]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   503.31050437   419.13136724]
------
Step:16, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    757.63256584   275.43590235]
New Q values:  [ -281.736      -9545.4473624    437.7665928    275.43590235]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  4.51045222e+02]
------
Step:17, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  4.51045222e+02]
New Q values:  [-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  3.30811240e+02]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   503.31050437   419.13136724]
------
Step:18, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   503.31050437   419.13136724]
New Q values:  [ -253.44886264 -1902.20915811   299.96757373   419.13136724]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  3.30811240e+02]
------
Step:19, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -2.42676664e+01 -3.22965309e-01  3.30811240e+02]
New Q values:  [-139.45925583  -24.26766644   -0.32296531  263.05447381]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    437.7665928    275.43590235]
------
Step:20, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1695.70216671   185.43215594]
New Q values:  [-9594.56523706 -8069.05606225  1272.07985357   185.43215594]
Reward: -1  Episode Reward:  10
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  1981.3299563  -6245.61866138   723.06552607]
------
Step:21, Action:South
State  136
Old Q Values:  [-5281.21195651  1981.3299563  -6245.61866138   723.06552607]
New Q values:  [-5281.21195651 14557.47391548 -6245.61866138   723.06552607]
Reward: -1  Episode Reward:  9
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[45885.13977654  1842.28350984   483.97903422 -1455.65174173]
------
Step:22, Action:North
State  208
Old Q Values:  [45885.13977654  1842.28350984   483.97903422 -1455.65174173]
New Q values:  [22720.69808526  1842.28350984   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  8
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651 14557.47391548 -6245.61866138   723.06552607]
------
Step:23, Action:South
State  136
Old Q Values:  [-5281.21195651 14557.47391548 -6245.61866138   723.06552607]
New Q values:  [-5281.21195651 12638.59899177 -6245.61866138   723.06552607]
Reward: -1  Episode Reward:  7
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22720.69808526  1842.28350984   483.97903422 -1455.65174173]
------
Step:24, Action:North
State  208
Old Q Values:  [22720.69808526  1842.28350984   483.97903422 -1455.65174173]
New Q values:  [12879.25893163  1842.28350984   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  6
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651 12638.59899177 -6245.61866138   723.06552607]
------
Step:25, Action:South
State  138
Old Q Values:  [-139.45925583  -24.26766644   -0.32296531  263.05447381]
New Q values:  [-1.39459256e+02  5.53965135e+02 -3.22965309e-01  2.63054474e+02]
Reward: -1  Episode Reward:  5
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1459.90678141  1880.90733893 -8489.43729461   531.09593838]
------
Step:26, Action:South
State  208
Old Q Values:  [12879.25893163  1842.28350984   483.97903422 -1455.65174173]
New Q values:  [12879.25893163  3454.92909763   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  4
xxxxx
x   x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9062.05231232 -7525.53407498 -7525.7277781   1538.89224437]
------
Step:27, Action:West
State  288
Old Q Values:  [ 9062.05231232 -7525.53407498 -7525.7277781   1538.89224437]
New Q values:  [ 9062.05231232 -7525.53407498 -7525.7277781    755.35307416]
Reward: 9  Episode Reward:  13
xxxxx
x   x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   447.98725471]
------
Step:28, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   939.79840429  1344.0006116 ]
New Q values:  [-2527.46239811 -8521.23367799   939.79840429 12313.39578313]
Reward: 9  Episode Reward:  22
xxxxx
x   x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39234.65179496  2256.66526474  6585.91095232  1875.31501677]
------
Step:29, Action:North
State  260
Old Q Values:  [ 2702.43975792 -8695.4397473   1932.26731839 -2601.74710518]
New Q values:  [ 2577.35928429 -8695.4397473   1932.26731839 -2601.74710518]
Reward: 9  Episode Reward:  31
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:30, Action:East
State  176
Old Q Values:  [76485.61294353  1327.79507613 67244.75915935     0.        ]
New Q values:  [76485.61294353  1327.79507613 91907.78124138     0.        ]
Reward: 100009  Episode Reward:  100040
xxxxx
x   x
xga x
x   x
xxxxx
Episode # 600
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2577.35928429 -8695.4397473   1932.26731839 -2601.74710518]
------
Step:1, Action:North
State  261
Old Q Values:  [  357.58797153   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [  320.04450291   -40.34168621 -4708.9150155    -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[286.56463245 401.27621611 572.03104765 -30.99112081]
------
Step:2, Action:East
State  181
Old Q Values:  [286.56463245 401.27621611 572.03104765 -30.99112081]
New Q values:  [  286.56463245   401.27621611 -5402.79139641   -30.99112081]
Reward: -9991  Episode Reward:  -9982
xxxxx
x ..x
x g.x
x ..x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9062.05231232 -7525.53407498 -7525.7277781    755.35307416]
------
Step:1, Action:North
State  288
Old Q Values:  [ 9062.05231232 -7525.53407498 -7525.7277781    755.35307416]
New Q values:  [ 7493.99860442 -7525.53407498 -7525.7277781    755.35307416]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12879.25893163  3454.92909763   483.97903422 -1455.65174173]
------
Step:2, Action:North
State  208
Old Q Values:  [12879.25893163  3454.92909763   483.97903422 -1455.65174173]
New Q values:  [ 8948.68327018  3454.92909763   483.97903422 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651 12638.59899177 -6245.61866138   723.06552607]
------
Step:3, Action:South
State  130
Old Q Values:  [36041.91667283  7865.32005299  -180.00807518 69273.34428742]
New Q values:  [36041.91667283  5830.13300225  -180.00807518 69273.34428742]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8948.68327018  3454.92909763   483.97903422 -1455.65174173]
------
Step:4, Action:North
State  208
Old Q Values:  [ 8948.68327018  3454.92909763   483.97903422 -1455.65174173]
New Q values:  [24360.8765943   3454.92909763   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  5830.13300225  -180.00807518 69273.34428742]
------
Step:5, Action:West
State  138
Old Q Values:  [-1.39459256e+02  5.53965135e+02 -3.22965309e-01  2.63054474e+02]
New Q values:  [-1.39459256e+02  5.53965135e+02 -3.22965309e-01  2.41951767e+02]
Reward: 9  Episode Reward:  25
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    437.7665928    275.43590235]
------
Step:6, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   299.96757373   419.13136724]
New Q values:  [ -253.44886264 -1902.20915811   285.57657002   419.13136724]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  5.53965135e+02 -3.22965309e-01  2.41951767e+02]
------
Step:7, Action:West
State  138
Old Q Values:  [-1.39459256e+02  5.53965135e+02 -3.22965309e-01  2.41951767e+02]
New Q values:  [-1.39459256e+02  5.53965135e+02 -3.22965309e-01  2.27510685e+02]
Reward: -1  Episode Reward:  23
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    437.7665928    275.43590235]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -9545.4473624    437.7665928    275.43590235]
New Q values:  [ -281.736      -9545.4473624    340.69617765   275.43590235]
Reward: -1  Episode Reward:  22
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  5.53965135e+02 -3.22965309e-01  2.27510685e+02]
------
Step:9, Action:South
State  138
Old Q Values:  [-1.39459256e+02  5.53965135e+02 -3.22965309e-01  2.27510685e+02]
New Q values:  [-1.39459256e+02  3.67764713e+03 -3.22965309e-01  2.27510685e+02]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.15222036e+04 3.93129810e+03 2.24233123e+03 3.52184257e+00]
------
Step:10, Action:North
State  208
Old Q Values:  [24360.8765943   3454.92909763   483.97903422 -1455.65174173]
New Q values:  [10847.04477606  3454.92909763   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  3.67764713e+03 -3.22965309e-01  2.27510685e+02]
------
Step:11, Action:South
State  138
Old Q Values:  [-1.39459256e+02  3.67764713e+03 -3.22965309e-01  2.27510685e+02]
New Q values:  [-1.39459256e+02  4.92711992e+03 -3.22965309e-01  2.27510685e+02]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.15222036e+04 3.93129810e+03 2.24233123e+03 3.52184257e+00]
------
Step:12, Action:North
State  210
Old Q Values:  [1.15222036e+04 3.93129810e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [6.08641741e+03 3.93129810e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  18
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  4.92711992e+03 -3.22965309e-01  2.27510685e+02]
------
Step:13, Action:South
State  138
Old Q Values:  [-1.39459256e+02  4.92711992e+03 -3.22965309e-01  2.27510685e+02]
New Q values:  [-1.39459256e+02  3.79617319e+03 -3.22965309e-01  2.27510685e+02]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6.08641741e+03 3.93129810e+03 2.24233123e+03 3.52184257e+00]
------
Step:14, Action:North
State  210
Old Q Values:  [6.08641741e+03 3.93129810e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [3.57281892e+03 3.93129810e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  3.79617319e+03 -3.22965309e-01  2.27510685e+02]
------
Step:15, Action:South
State  138
Old Q Values:  [-1.39459256e+02  3.79617319e+03 -3.22965309e-01  2.27510685e+02]
New Q values:  [-1.39459256e+02  2.69725871e+03 -3.22965309e-01  2.27510685e+02]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.57281892e+03 3.93129810e+03 2.24233123e+03 3.52184257e+00]
------
Step:16, Action:South
State  210
Old Q Values:  [3.57281892e+03 3.93129810e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [3.57281892e+03 3.82011882e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7493.99860442 -7525.53407498 -7525.7277781    755.35307416]
------
Step:17, Action:North
State  288
Old Q Values:  [ 7493.99860442 -7525.53407498 -7525.7277781    755.35307416]
New Q values:  [ 4143.03508814 -7525.53407498 -7525.7277781    755.35307416]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.57281892e+03 3.82011882e+03 2.24233123e+03 3.52184257e+00]
------
Step:18, Action:North
State  210
Old Q Values:  [3.57281892e+03 3.82011882e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [2.23770518e+03 3.82011882e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  12
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  2.69725871e+03 -3.22965309e-01  2.27510685e+02]
------
Step:19, Action:South
State  138
Old Q Values:  [-1.39459256e+02  2.69725871e+03 -3.22965309e-01  2.27510685e+02]
New Q values:  [-1.39459256e+02  2.22433913e+03 -3.22965309e-01  2.27510685e+02]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.23770518e+03 3.82011882e+03 2.24233123e+03 3.52184257e+00]
------
Step:20, Action:South
State  208
Old Q Values:  [10847.04477606  3454.92909763   483.97903422 -1455.65174173]
New Q values:  [10847.04477606  2624.2821655    483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4143.03508814 -7525.53407498 -7525.7277781    755.35307416]
------
Step:21, Action:North
State  288
Old Q Values:  [ 4143.03508814 -7525.53407498 -7525.7277781    755.35307416]
New Q values:  [ 4910.72746808 -7525.53407498 -7525.7277781    755.35307416]
Reward: -1  Episode Reward:  9
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10847.04477606  2624.2821655    483.97903422 -1455.65174173]
------
Step:22, Action:North
State  208
Old Q Values:  [10847.04477606  2624.2821655    483.97903422 -1455.65174173]
New Q values:  [ 5005.51964918  2624.2821655    483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  2.22433913e+03 -3.22965309e-01  2.27510685e+02]
------
Step:23, Action:West
State  136
Old Q Values:  [-5281.21195651 12638.59899177 -6245.61866138   723.06552607]
New Q values:  [-5281.21195651 12638.59899177 -6245.61866138   381.94041322]
Reward: -1  Episode Reward:  7
xxxxx
x.agx
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:24, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   285.57657002   419.13136724]
New Q values:  [ -253.44886264 -1902.20915811   285.57657002   298.8299342 ]
Reward: 9  Episode Reward:  16
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   14.23378158  419.25795768 -252.78192178]
------
Step:25, Action:East
State  105
Old Q Values:  [-180.6           6.72320144  -68.92149356    0.        ]
New Q values:  [-180.6           6.72320144   65.14560537    0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x agx
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:26, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   285.57657002   298.8299342 ]
New Q values:  [ -253.44886264 -1902.20915811   285.57657002   244.70936098]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558   14.23378158  419.25795768 -252.78192178]
------
Step:27, Action:East
State  107
Old Q Values:  [-252.35169558   14.23378158  419.25795768 -252.78192178]
New Q values:  [-252.35169558   14.23378158  269.31203637 -252.78192178]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -9545.4473624    340.69617765   275.43590235]
------
Step:28, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   285.57657002   244.70936098]
New Q values:  [ -253.44886264 -1902.20915811   780.93236677   244.70936098]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  2.22433913e+03 -3.22965309e-01  2.27510685e+02]
------
Step:29, Action:West
State  136
Old Q Values:  [-5281.21195651 12638.59899177 -6245.61866138   381.94041322]
New Q values:  [-5281.21195651 12638.59899177 -6245.61866138   245.49036808]
Reward: -1  Episode Reward:  11
xxxxx
x agx
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:30, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1272.07985357   185.43215594]
New Q values:  [-9594.56523706 -8069.05606225  1272.07985357    93.11654399]
Reward: -1  Episode Reward:  10
xxxxx
xag x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6           6.72320144   65.14560537    0.        ]
------
Step:31, Action:South
State  107
Old Q Values:  [-252.35169558   14.23378158  269.31203637 -252.78192178]
New Q values:  [-252.35169558 1208.01858659  269.31203637 -252.78192178]
Reward: 9  Episode Reward:  19
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   9.40190913    0.         3989.75024651 -178.98      ]
------
Step:32, Action:North
State  181
Old Q Values:  [  286.56463245   401.27621611 -5402.79139641   -30.99112081]
New Q values:  [  476.43142896   401.27621611 -5402.79139641   -30.99112081]
Reward: -1  Episode Reward:  18
xxxxx
xa  x
x .gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1208.01858659  269.31203637 -252.78192178]
------
Step:33, Action:South
State  105
Old Q Values:  [-180.6           6.72320144   65.14560537    0.        ]
New Q values:  [-180.6         145.01870926   65.14560537    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x  gx
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  476.43142896   401.27621611 -5402.79139641   -30.99112081]
------
Step:34, Action:North
State  181
Old Q Values:  [  476.43142896   401.27621611 -5402.79139641   -30.99112081]
New Q values:  [  552.37814756   401.27621611 -5402.79139641   -30.99112081]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x .gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1208.01858659  269.31203637 -252.78192178]
------
Step:35, Action:South
State  107
Old Q Values:  [-252.35169558 1208.01858659  269.31203637 -252.78192178]
New Q values:  [-252.35169558 1679.53250859  269.31203637 -252.78192178]
Reward: -1  Episode Reward:  15
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   9.40190913    0.         3989.75024651 -178.98      ]
------
Step:36, Action:North
State  183
Old Q Values:  [ 509.45761052  674.01666877 5415.87004968    0.        ]
New Q values:  [ 707.04279678  674.01666877 5415.87004968    0.        ]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1679.53250859  269.31203637 -252.78192178]
------
Step:37, Action:South
State  107
Old Q Values:  [-252.35169558 1679.53250859  269.31203637 -252.78192178]
New Q values:  [-252.35169558 2295.97401834  269.31203637 -252.78192178]
Reward: -1  Episode Reward:  13
xxxxx
x   x
xa. x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 707.04279678  674.01666877 5415.87004968    0.        ]
------
Step:38, Action:East
State  185
Old Q Values:  [   9.40190913    0.         3989.75024651 -178.98      ]
New Q values:  [   9.40190913    0.         2692.07535342 -178.98      ]
Reward: 9  Episode Reward:  22
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
------
Step:39, Action:South
State  200
Old Q Values:  [  62.8218634  5839.22569338  636.07481225  408.67479662]
New Q values:  [  62.8218634  6029.10901229  636.07481225  408.67479662]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   939.79840429 12313.39578313]
------
Step:40, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   939.79840429 12313.39578313]
New Q values:  [-2527.46239811 -8521.23367799   939.79840429 87788.93225129]
Reward: 100009  Episode Reward:  100030
xxxxx
x   x
xg  x
xa  x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  552.37814756   401.27621611 -5402.79139641   -30.99112081]
------
Step:1, Action:North
State  180
Old Q Values:  [-8180.68959572  1317.71030664  5271.27898154 -4966.32149798]
New Q values:  [-2959.50722357  1317.71030664  5271.27898154 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684  1024.56204905  -180.6       ]
------
Step:2, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684  1024.56204905  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   548.35720121  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x a.x
x  .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         443.7746053  374.96879939 116.83031103]
------
Step:3, Action:South
State  122
Old Q Values:  [ -281.736      -9545.4473624    340.69617765   275.43590235]
New Q values:  [ -281.736      -3455.78276043   340.69617765   275.43590235]
Reward: -1  Episode Reward:  17
xxxxx
x  .x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1209.98728178   174.55451539     0.        ]
------
Step:4, Action:South
State  196
Old Q Values:  [-2469.90645144  1209.98728178   174.55451539     0.        ]
New Q values:  [-2469.90645144  1422.7973249    174.55451539     0.        ]
Reward: 9  Episode Reward:  26
xxxxx
xg .x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  3111.34137396   489.20816769]
------
Step:5, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  3111.34137396   489.20816769]
New Q values:  [   16.82637525 -5807.06396197  2723.15479001   489.20816769]
Reward: 9  Episode Reward:  35
xxxxx
x g.x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4910.72746808 -7525.53407498 -7525.7277781    755.35307416]
------
Step:6, Action:North
State  288
Old Q Values:  [ 4910.72746808 -7525.53407498 -7525.7277781    755.35307416]
New Q values:  [ 2533.96318891 -7525.53407498 -7525.7277781    755.35307416]
Reward: 9  Episode Reward:  44
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1459.90678141  1880.90733893 -8489.43729461   531.09593838]
------
Step:7, Action:South
State  208
Old Q Values:  [ 5005.51964918  2624.2821655    483.97903422 -1455.65174173]
New Q values:  [ 5005.51964918  1809.30182287   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  43
xxxxx
x g.x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2533.96318891 -7525.53407498 -7525.7277781    755.35307416]
------
Step:8, Action:North
State  288
Old Q Values:  [ 2533.96318891 -7525.53407498 -7525.7277781    755.35307416]
New Q values:  [ 2514.64117032 -7525.53407498 -7525.7277781    755.35307416]
Reward: -1  Episode Reward:  42
xxxxx
xg .x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5005.51964918  1809.30182287   483.97903422 -1455.65174173]
------
Step:9, Action:North
State  216
Old Q Values:  [ 1459.90678141  1880.90733893 -8489.43729461   531.09593838]
New Q values:  [ 4380.9424101   1880.90733893 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  51
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651 12638.59899177 -6245.61866138   245.49036808]
------
Step:10, Action:South
State  136
Old Q Values:  [-5281.21195651 12638.59899177 -6245.61866138   245.49036808]
New Q values:  [-5281.21195651  6369.12231974 -6245.61866138   245.49036808]
Reward: -1  Episode Reward:  50
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4380.9424101   1880.90733893 -8489.43729461   531.09593838]
------
Step:11, Action:North
State  216
Old Q Values:  [ 4380.9424101   1880.90733893 -8489.43729461   531.09593838]
New Q values:  [ 2419.0787028   1880.90733893 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  49
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  2.22433913e+03 -3.22965309e-01  2.27510685e+02]
------
Step:12, Action:South
State  130
Old Q Values:  [36041.91667283  5830.13300225  -180.00807518 69273.34428742]
New Q values:  [36041.91667283  3477.48884728  -180.00807518 69273.34428742]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.23770518e+03 3.82011882e+03 2.24233123e+03 3.52184257e+00]
------
Step:13, Action:South
State  218
Old Q Values:  [4400.40159537  237.92474934    0.          429.03841886]
New Q values:  [4400.40159537  848.96225083    0.          429.03841886]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2514.64117032 -7525.53407498 -7525.7277781    755.35307416]
------
Step:14, Action:North
State  288
Old Q Values:  [ 2514.64117032 -7525.53407498 -7525.7277781    755.35307416]
New Q values:  [ 1730.98007897 -7525.53407498 -7525.7277781    755.35307416]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2419.0787028   1880.90733893 -8489.43729461   531.09593838]
------
Step:15, Action:North
State  216
Old Q Values:  [ 2419.0787028   1880.90733893 -8489.43729461   531.09593838]
New Q values:  [ 1634.33321988  1880.90733893 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  2.22433913e+03 -3.22965309e-01  2.27510685e+02]
------
Step:16, Action:West
State  138
Old Q Values:  [-1.39459256e+02  2.22433913e+03 -3.22965309e-01  2.27510685e+02]
New Q values:  [-1.39459256e+02  2.22433913e+03 -3.22965309e-01  1.92613127e+02]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   340.69617765   275.43590235]
------
Step:17, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   340.69617765   275.43590235]
New Q values:  [ -281.736      -3455.78276043   802.98020982   275.43590235]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  2.22433913e+03 -3.22965309e-01  1.92613127e+02]
------
Step:18, Action:South
State  136
Old Q Values:  [-5281.21195651  6369.12231974 -6245.61866138   245.49036808]
New Q values:  [-5281.21195651  3111.32112957 -6245.61866138   245.49036808]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1634.33321988  1880.90733893 -8489.43729461   531.09593838]
------
Step:19, Action:South
State  216
Old Q Values:  [ 1634.33321988  1880.90733893 -8489.43729461   531.09593838]
New Q values:  [ 1634.33321988  1271.05695926 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1730.98007897 -7525.53407498 -7525.7277781    755.35307416]
------
Step:20, Action:North
State  288
Old Q Values:  [ 1730.98007897 -7525.53407498 -7525.7277781    755.35307416]
New Q values:  [ 1182.09199755 -7525.53407498 -7525.7277781    755.35307416]
Reward: -1  Episode Reward:  40
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1634.33321988  1271.05695926 -8489.43729461   531.09593838]
------
Step:21, Action:South
State  216
Old Q Values:  [ 1634.33321988  1271.05695926 -8489.43729461   531.09593838]
New Q values:  [ 1634.33321988   862.45038297 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1182.09199755 -7525.53407498 -7525.7277781    755.35307416]
------
Step:22, Action:North
State  288
Old Q Values:  [ 1182.09199755 -7525.53407498 -7525.7277781    755.35307416]
New Q values:  [  962.53676498 -7525.53407498 -7525.7277781    755.35307416]
Reward: -1  Episode Reward:  38
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1634.33321988   862.45038297 -8489.43729461   531.09593838]
------
Step:23, Action:South
State  216
Old Q Values:  [ 1634.33321988   862.45038297 -8489.43729461   531.09593838]
New Q values:  [ 1634.33321988   633.14118268 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  962.53676498 -7525.53407498 -7525.7277781    755.35307416]
------
Step:24, Action:West
State  288
Old Q Values:  [  962.53676498 -7525.53407498 -7525.7277781    755.35307416]
New Q values:  [  962.53676498 -7525.53407498 -7525.7277781  26638.22090505]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   939.79840429 87788.93225129]
------
Step:25, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   939.79840429 87788.93225129]
New Q values:  [ -2527.46239811  -8521.23367799    939.79840429 106891.368439  ]
Reward: 100009  Episode Reward:  100045
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.23770518e+03 3.82011882e+03 2.24233123e+03 3.52184257e+00]
------
Step:1, Action:South
State  210
Old Q Values:  [2.23770518e+03 3.82011882e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [2.23770518e+03 3.52491380e+03 2.24233123e+03 3.52184257e+00]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x.. x
x. gx
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -2527.46239811  -8521.23367799    939.79840429 106891.368439  ]
------
Step:1, Action:East
State  272
Old Q Values:  [ -2527.46239811  -8521.23367799    939.79840429 106891.368439  ]
New Q values:  [ -2527.46239811  -8521.23367799   8372.78563323 106891.368439  ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  962.53676498 -7525.53407498 -7525.7277781  26638.22090505]
------
Step:2, Action:West
State  288
Old Q Values:  [  962.53676498 -7525.53407498 -7525.7277781  26638.22090505]
New Q values:  [  962.53676498 -7525.53407498 -7525.7277781  42722.09889372]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -2527.46239811  -8521.23367799   8372.78563323 106891.368439  ]
------
Step:3, Action:West
State  272
Old Q Values:  [ -2527.46239811  -8521.23367799   8372.78563323 106891.368439  ]
New Q values:  [-2527.46239811 -8521.23367799  8372.78563323 43529.15516089]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xg..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2577.35928429 -8695.4397473   1932.26731839 -2601.74710518]
------
Step:4, Action:East
State  260
Old Q Values:  [ 2577.35928429 -8695.4397473   1932.26731839 -2601.74710518]
New Q values:  [ 2577.35928429 -8695.4397473  13831.05347562 -2601.74710518]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x...x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8372.78563323 43529.15516089]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8372.78563323 43529.15516089]
New Q values:  [-2527.46239811 -8521.23367799 16165.14392141 43529.15516089]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  962.53676498 -7525.53407498 -7525.7277781  42722.09889372]
------
Step:6, Action:North
State  288
Old Q Values:  [  962.53676498 -7525.53407498 -7525.7277781  42722.09889372]
New Q values:  [ 1447.888846   -7525.53407498 -7525.7277781  42722.09889372]
Reward: 9  Episode Reward:  14
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.23770518e+03 3.52491380e+03 2.24233123e+03 3.52184257e+00]
------
Step:7, Action:South
State  208
Old Q Values:  [ 5005.51964918  1809.30182287   483.97903422 -1455.65174173]
New Q values:  [ 5005.51964918 13539.75039726   483.97903422 -1455.65174173]
Reward: -1  Episode Reward:  13
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1447.888846   -7525.53407498 -7525.7277781  42722.09889372]
------
Step:8, Action:West
State  288
Old Q Values:  [ 1447.888846   -7525.53407498 -7525.7277781  42722.09889372]
New Q values:  [ 1447.888846   -7525.53407498 -7525.7277781  30146.98610575]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x.. x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 16165.14392141 43529.15516089]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 16165.14392141 43529.15516089]
New Q values:  [-2527.46239811 -8521.23367799 15509.55340029 43529.15516089]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1447.888846   -7525.53407498 -7525.7277781  30146.98610575]
------
Step:10, Action:North
State  288
Old Q Values:  [ 1447.888846   -7525.53407498 -7525.7277781  30146.98610575]
New Q values:  [ 1636.0296784  -7525.53407498 -7525.7277781  30146.98610575]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.23770518e+03 3.52491380e+03 2.24233123e+03 3.52184257e+00]
------
Step:11, Action:South
State  210
Old Q Values:  [2.23770518e+03 3.52491380e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [2.23770518e+03 1.04534614e+04 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  9
xxxxx
x...x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1636.0296784  -7525.53407498 -7525.7277781  30146.98610575]
------
Step:12, Action:North
State  288
Old Q Values:  [ 1636.0296784  -7525.53407498 -7525.7277781  30146.98610575]
New Q values:  [ 3789.85027688 -7525.53407498 -7525.7277781  30146.98610575]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.23770518e+03 1.04534614e+04 2.24233123e+03 3.52184257e+00]
------
Step:13, Action:East
State  208
Old Q Values:  [ 5005.51964918 13539.75039726   483.97903422 -1455.65174173]
New Q values:  [ 5005.51964918 13539.75039726 -1925.08326713 -1455.65174173]
Reward: -10301  Episode Reward:  -10293
xxxxx
x...x
x..gx
x   x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007  630.16359176 -120.29354603]
------
Step:1, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   548.35720121  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   465.63694343  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   802.98020982   275.43590235]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   802.98020982   275.43590235]
New Q values:  [ -281.736      -3455.78276043   993.89382268   275.43590235]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  2.22433913e+03 -3.22965309e-01  1.92613127e+02]
------
Step:3, Action:South
State  138
Old Q Values:  [-1.39459256e+02  2.22433913e+03 -3.22965309e-01  1.92613127e+02]
New Q values:  [-1.39459256e+02  1.38543562e+03 -3.22965309e-01  1.92613127e+02]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1634.33321988   633.14118268 -8489.43729461   531.09593838]
------
Step:4, Action:North
State  208
Old Q Values:  [ 5005.51964918 13539.75039726 -1925.08326713 -1455.65174173]
New Q values:  [ 2935.00419854 13539.75039726 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  26
xxxxx
x gax
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  3111.32112957 -6245.61866138   245.49036808]
------
Step:5, Action:South
State  138
Old Q Values:  [-1.39459256e+02  1.38543562e+03 -3.22965309e-01  1.92613127e+02]
New Q values:  [-1.39459256e+02  1.04387421e+03 -3.22965309e-01  1.92613127e+02]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1634.33321988   633.14118268 -8489.43729461   531.09593838]
------
Step:6, Action:North
State  208
Old Q Values:  [ 2935.00419854 13539.75039726 -1925.08326713 -1455.65174173]
New Q values:  [ 1486.56394332 13539.75039726 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  1.04387421e+03 -3.22965309e-01  1.92613127e+02]
------
Step:7, Action:West
State  136
Old Q Values:  [-5281.21195651  3111.32112957 -6245.61866138   245.49036808]
New Q values:  [-5281.21195651  3111.32112957 -6245.61866138   190.91035002]
Reward: -1  Episode Reward:  23
xxxxx
x agx
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:8, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1272.07985357    93.11654399]
New Q values:  [-9594.56523706 -8069.05606225  1272.07985357    80.15223037]
Reward: -1  Episode Reward:  22
xxxxx
xag x
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         145.01870926   65.14560537    0.        ]
------
Step:9, Action:South
State  107
Old Q Values:  [-252.35169558 2295.97401834  269.31203637 -252.78192178]
New Q values:  [-252.35169558 1731.41221336  269.31203637 -252.78192178]
Reward: 9  Episode Reward:  31
xxxxx
x   x
xag x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   9.40190913    0.         2692.07535342 -178.98      ]
------
Step:10, Action:North
State  183
Old Q Values:  [ 707.04279678  674.01666877 5415.87004968    0.        ]
New Q values:  [ 471.26619624  674.01666877 5415.87004968    0.        ]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007  630.16359176 -120.29354603]
------
Step:11, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   465.63694343  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   483.82292418  -180.6       ]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x . x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   993.89382268   275.43590235]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   993.89382268   275.43590235]
New Q values:  [ -281.736      -3455.78276043   710.11979298   275.43590235]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  1.04387421e+03 -3.22965309e-01  1.92613127e+02]
------
Step:13, Action:South
State  138
Old Q Values:  [-1.39459256e+02  1.04387421e+03 -3.22965309e-01  1.92613127e+02]
New Q values:  [-1.39459256e+02  3.55298809e+03 -3.22965309e-01  1.92613127e+02]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x .ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.23770518e+03 1.04534614e+04 2.24233123e+03 3.52184257e+00]
------
Step:14, Action:South
State  210
Old Q Values:  [2.23770518e+03 1.04534614e+04 2.24233123e+03 3.52184257e+00]
New Q values:  [2.23770518e+03 1.32308804e+04 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3789.85027688 -7525.53407498 -7525.7277781  30146.98610575]
------
Step:15, Action:North
State  288
Old Q Values:  [ 3789.85027688 -7525.53407498 -7525.7277781  30146.98610575]
New Q values:  [ 5484.60422248 -7525.53407498 -7525.7277781  30146.98610575]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.23770518e+03 1.32308804e+04 2.24233123e+03 3.52184257e+00]
------
Step:16, Action:South
State  210
Old Q Values:  [2.23770518e+03 1.32308804e+04 2.24233123e+03 3.52184257e+00]
New Q values:  [2.23770518e+03 1.43358480e+04 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5484.60422248 -7525.53407498 -7525.7277781  30146.98610575]
------
Step:17, Action:North
State  288
Old Q Values:  [ 5484.60422248 -7525.53407498 -7525.7277781  30146.98610575]
New Q values:  [ 2683.54165495 -7525.53407498 -7525.7277781  30146.98610575]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1634.33321988   633.14118268 -8489.43729461   531.09593838]
------
Step:18, Action:North
State  210
Old Q Values:  [2.23770518e+03 1.43358480e+04 2.24233123e+03 3.52184257e+00]
New Q values:  [1.96037850e+03 1.43358480e+04 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  3.55298809e+03 -3.22965309e-01  1.92613127e+02]
------
Step:19, Action:South
State  138
Old Q Values:  [-1.39459256e+02  3.55298809e+03 -3.22965309e-01  1.92613127e+02]
New Q values:  [-1.39459256e+02  5.72134963e+03 -3.22965309e-01  1.92613127e+02]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.96037850e+03 1.43358480e+04 2.24233123e+03 3.52184257e+00]
------
Step:20, Action:South
State  210
Old Q Values:  [1.96037850e+03 1.43358480e+04 2.24233123e+03 3.52184257e+00]
New Q values:  [1.96037850e+03 1.47778350e+04 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2683.54165495 -7525.53407498 -7525.7277781  30146.98610575]
------
Step:21, Action:North
State  288
Old Q Values:  [ 2683.54165495 -7525.53407498 -7525.7277781  30146.98610575]
New Q values:  [ 5506.16716918 -7525.53407498 -7525.7277781  30146.98610575]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.96037850e+03 1.47778350e+04 2.24233123e+03 3.52184257e+00]
------
Step:22, Action:South
State  208
Old Q Values:  [ 1486.56394332 13539.75039726 -1925.08326713 -1455.65174173]
New Q values:  [ 1486.56394332 14459.39599063 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5506.16716918 -7525.53407498 -7525.7277781  30146.98610575]
------
Step:23, Action:West
State  288
Old Q Values:  [ 5506.16716918 -7525.53407498 -7525.7277781  30146.98610575]
New Q values:  [ 5506.16716918 -7525.53407498 -7525.7277781  25116.94099057]
Reward: -1  Episode Reward:  27
xxxxx
xg  x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 15509.55340029 43529.15516089]
------
Step:24, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 15509.55340029 43529.15516089]
New Q values:  [-2527.46239811 -8521.23367799 15509.55340029 17513.07541523]
Reward: 9  Episode Reward:  36
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  320.04450291   -40.34168621 -4708.9150155    -35.88578819]
------
Step:25, Action:North
State  261
Old Q Values:  [  320.04450291   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [  293.13124543   -40.34168621 -4708.9150155    -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  552.37814756   401.27621611 -5402.79139641   -30.99112081]
------
Step:26, Action:North
State  181
Old Q Values:  [  552.37814756   401.27621611 -5402.79139641   -30.99112081]
New Q values:  [  327.54952811   401.27621611 -5402.79139641   -30.99112081]
Reward: -1  Episode Reward:  34
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  357.32756361 -253.21568792 -180.6       ]
------
Step:27, Action:South
State  109
Old Q Values:  [-241.10880094  357.32756361 -253.21568792 -180.6       ]
New Q values:  [-241.10880094  262.71389028 -253.21568792 -180.6       ]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  327.54952811   401.27621611 -5402.79139641   -30.99112081]
------
Step:28, Action:South
State  181
Old Q Values:  [  327.54952811   401.27621611 -5402.79139641   -30.99112081]
New Q values:  [  327.54952811   247.84986007 -5402.79139641   -30.99112081]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  293.13124543   -40.34168621 -4708.9150155    -35.88578819]
------
Step:29, Action:North
State  261
Old Q Values:  [  293.13124543   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [ 1741.41351308   -40.34168621 -4708.9150155    -35.88578819]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  674.01666877 5415.87004968    0.        ]
------
Step:30, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 72867.52471477     0.        ]
New Q values:  [    0.          4614.46100011 91391.64586576     0.        ]
Reward: 100009  Episode Reward:  100040
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  262.71389028 -253.21568792 -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [-241.10880094  262.71389028 -253.21568792 -180.6       ]
New Q values:  [-241.10880094  208.75041454 -253.21568792 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x g x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  327.54952811   247.84986007 -5402.79139641   -30.99112081]
------
Step:2, Action:North
State  181
Old Q Values:  [  327.54952811   247.84986007 -5402.79139641   -30.99112081]
New Q values:  [  193.04493561   247.84986007 -5402.79139641   -30.99112081]
Reward: -1  Episode Reward:  8
xxxxx
xa.gx
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  208.75041454 -253.21568792 -180.6       ]
------
Step:3, Action:South
State  109
Old Q Values:  [-241.10880094  208.75041454 -253.21568792 -180.6       ]
New Q values:  [-241.10880094  157.25512384 -253.21568792 -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x g x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  193.04493561   247.84986007 -5402.79139641   -30.99112081]
------
Step:4, Action:South
State  181
Old Q Values:  [  193.04493561   247.84986007 -5402.79139641   -30.99112081]
New Q values:  [  193.04493561   626.96399795 -5402.79139641   -30.99112081]
Reward: 9  Episode Reward:  16
xxxxx
x . x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1741.41351308   -40.34168621 -4708.9150155    -35.88578819]
------
Step:5, Action:North
State  260
Old Q Values:  [ 2577.35928429 -8695.4397473  13831.05347562 -2601.74710518]
New Q values:  [-3388.27259182 -8695.4397473  13831.05347562 -2601.74710518]
Reward: -10001  Episode Reward:  -9985
xxxxx
x . x
xg..x
x ..x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  157.25512384 -253.21568792 -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [-241.10880094  157.25512384 -253.21568792 -180.6       ]
New Q values:  [-241.10880094  256.39124892 -253.21568792 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x g x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  193.04493561   626.96399795 -5402.79139641   -30.99112081]
------
Step:2, Action:South
State  181
Old Q Values:  [  193.04493561   626.96399795 -5402.79139641   -30.99112081]
New Q values:  [  193.04493561   778.6096531  -5402.79139641   -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
x . x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1741.41351308   -40.34168621 -4708.9150155    -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [ 1741.41351308   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [  929.54830116   -40.34168621 -4708.9150155    -35.88578819]
Reward: -1  Episode Reward:  17
xxxxx
x g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  193.04493561   778.6096531  -5402.79139641   -30.99112081]
------
Step:4, Action:South
State  181
Old Q Values:  [  193.04493561   778.6096531  -5402.79139641   -30.99112081]
New Q values:  [  193.04493561   589.70835159 -5402.79139641   -30.99112081]
Reward: -1  Episode Reward:  16
xxxxx
x . x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  929.54830116   -40.34168621 -4708.9150155    -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [  929.54830116   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [  548.13182594   -40.34168621 -4708.9150155    -35.88578819]
Reward: -1  Episode Reward:  15
xxxxx
x g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  193.04493561   589.70835159 -5402.79139641   -30.99112081]
------
Step:6, Action:South
State  180
Old Q Values:  [-2959.50722357  1317.71030664  5271.27898154 -4966.32149798]
New Q values:  [-2959.50722357  4675.80016534  5271.27898154 -4966.32149798]
Reward: -1  Episode Reward:  14
xxxxx
xg. x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3388.27259182 -8695.4397473  13831.05347562 -2601.74710518]
------
Step:7, Action:East
State  260
Old Q Values:  [-3388.27259182 -8695.4397473  13831.05347562 -2601.74710518]
New Q values:  [-3388.27259182 -8695.4397473  10791.74401482 -2601.74710518]
Reward: 9  Episode Reward:  23
xxxxx
x . x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 15509.55340029 17513.07541523]
------
Step:8, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2723.15479001   489.20816769]
New Q values:  [   16.82637525 -5807.06396197  2723.15479001   359.52281486]
Reward: -1  Episode Reward:  22
xxxxx
x . x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  548.13182594   -40.34168621 -4708.9150155    -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [  548.13182594   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [  395.56523585   -40.34168621 -4708.9150155    -35.88578819]
Reward: -1  Episode Reward:  21
xxxxx
x g x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  193.04493561   589.70835159 -5402.79139641   -30.99112081]
------
Step:10, Action:South
State  181
Old Q Values:  [  193.04493561   589.70835159 -5402.79139641   -30.99112081]
New Q values:  [  193.04493561   353.95291139 -5402.79139641   -30.99112081]
Reward: -1  Episode Reward:  20
xxxxx
x . x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  395.56523585   -40.34168621 -4708.9150155    -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [  395.56523585   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [  263.81196776   -40.34168621 -4708.9150155    -35.88578819]
Reward: -1  Episode Reward:  19
xxxxx
x . x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  193.04493561   353.95291139 -5402.79139641   -30.99112081]
------
Step:12, Action:South
State  181
Old Q Values:  [  193.04493561   353.95291139 -5402.79139641   -30.99112081]
New Q values:  [  193.04493561   220.12475488 -5402.79139641   -30.99112081]
Reward: -1  Episode Reward:  18
xxxxx
x .gx
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  263.81196776   -40.34168621 -4708.9150155    -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [  263.81196776   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [  170.96221357   -40.34168621 -4708.9150155    -35.88578819]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  193.04493561   220.12475488 -5402.79139641   -30.99112081]
------
Step:14, Action:South
State  181
Old Q Values:  [  193.04493561   220.12475488 -5402.79139641   -30.99112081]
New Q values:  [  193.04493561   138.73856602 -5402.79139641   -30.99112081]
Reward: -1  Episode Reward:  16
xxxxx
x . x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  170.96221357   -40.34168621 -4708.9150155    -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [  170.96221357   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [ 1692.54590033   -40.34168621 -4708.9150155    -35.88578819]
Reward: -1  Episode Reward:  15
xxxxx
x . x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  674.01666877 5415.87004968    0.        ]
------
Step:16, Action:East
State  181
Old Q Values:  [  193.04493561   138.73856602 -5402.79139641   -30.99112081]
New Q values:  [  193.04493561   138.73856602 -3151.23898092   -30.99112081]
Reward: -9991  Episode Reward:  -9976
xxxxx
x . x
x g.x
x  .x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 15509.55340029 17513.07541523]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 15509.55340029 17513.07541523]
New Q values:  [-2527.46239811 -8521.23367799 13744.30365729 17513.07541523]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5506.16716918 -7525.53407498 -7525.7277781  25116.94099057]
------
Step:2, Action:West
State  288
Old Q Values:  [ 5506.16716918 -7525.53407498 -7525.7277781  25116.94099057]
New Q values:  [ 5506.16716918 -7525.53407498 -7525.7277781  15300.0990208 ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 13744.30365729 17513.07541523]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 13744.30365729 17513.07541523]
New Q values:  [-2527.46239811 -8521.23367799 13744.30365729  7512.39393619]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x...x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1692.54590033   -40.34168621 -4708.9150155    -35.88578819]
------
Step:4, Action:North
State  260
Old Q Values:  [-3388.27259182 -8695.4397473  10791.74401482 -2601.74710518]
New Q values:  [  141.07434439 -8695.4397473  10791.74401482 -2601.74710518]
Reward: 9  Episode Reward:  16
xxxxx
x...x
xa..x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [ 471.26619624  674.01666877 5415.87004968    0.        ]
New Q values:  [ 471.26619624  674.01666877 4410.98399972    0.        ]
Reward: 9  Episode Reward:  25
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  7.46411993e+03  1.20371620e+03]
------
Step:6, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.30453803e+03 1.66815919e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 3.30453803e+03 1.10158556e+04 2.91043938e+03]
Reward: 9  Episode Reward:  34
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1486.56394332 14459.39599063 -1925.08326713 -1455.65174173]
------
Step:7, Action:South
State  210
Old Q Values:  [1.96037850e+03 1.47778350e+04 2.24233123e+03 3.52184257e+00]
New Q values:  [1.96037850e+03 1.05005637e+04 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  33
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5506.16716918 -7525.53407498 -7525.7277781  15300.0990208 ]
------
Step:8, Action:North
State  288
Old Q Values:  [ 5506.16716918 -7525.53407498 -7525.7277781  15300.0990208 ]
New Q values:  [ 5352.03598243 -7525.53407498 -7525.7277781  15300.0990208 ]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.96037850e+03 1.05005637e+04 2.24233123e+03 3.52184257e+00]
------
Step:9, Action:South
State  210
Old Q Values:  [1.96037850e+03 1.05005637e+04 2.24233123e+03 3.52184257e+00]
New Q values:  [1.96037850e+03 8.78965519e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  31
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5352.03598243 -7525.53407498 -7525.7277781  15300.0990208 ]
------
Step:10, Action:North
State  288
Old Q Values:  [ 5352.03598243 -7525.53407498 -7525.7277781  15300.0990208 ]
New Q values:  [ 6478.03319016 -7525.53407498 -7525.7277781  15300.0990208 ]
Reward: -1  Episode Reward:  30
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1486.56394332 14459.39599063 -1925.08326713 -1455.65174173]
------
Step:11, Action:South
State  208
Old Q Values:  [ 1486.56394332 14459.39599063 -1925.08326713 -1455.65174173]
New Q values:  [ 1486.56394332 10373.18810249 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  29
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6478.03319016 -7525.53407498 -7525.7277781  15300.0990208 ]
------
Step:12, Action:West
State  288
Old Q Values:  [ 6478.03319016 -7525.53407498 -7525.7277781  15300.0990208 ]
New Q values:  [ 6478.03319016 -7525.53407498 -7525.7277781   6253.83578473]
Reward: -1  Episode Reward:  28
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   447.98725471]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 13744.30365729  7512.39393619]
New Q values:  [-2527.46239811 -8521.23367799 13744.30365729 14774.75311296]
Reward: -1  Episode Reward:  27
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39234.65179496  2256.66526474  6585.91095232  1875.31501677]
------
Step:14, Action:North
State  260
Old Q Values:  [  141.07434439 -8695.4397473  10791.74401482 -2601.74710518]
New Q values:  [ 1637.21343222 -8695.4397473  10791.74401482 -2601.74710518]
Reward: -1  Episode Reward:  26
xxxxx
xg..x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2959.50722357  4675.80016534  5271.27898154 -4966.32149798]
------
Step:15, Action:East
State  177
Old Q Values:  [ 78394.48547832  26817.66925136 113470.4104879       0.        ]
New Q values:  [78394.48547832 26817.66925136 48692.32086537     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x.g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.30453803e+03 1.10158556e+04 2.91043938e+03]
------
Step:16, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.30453803e+03 1.10158556e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 3.30453803e+03 7.51769866e+03 2.91043938e+03]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1486.56394332 10373.18810249 -1925.08326713 -1455.65174173]
------
Step:17, Action:South
State  208
Old Q Values:  [ 1486.56394332 10373.18810249 -1925.08326713 -1455.65174173]
New Q values:  [ 1486.56394332  6092.08519804 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6478.03319016 -7525.53407498 -7525.7277781   6253.83578473]
------
Step:18, Action:West
State  288
Old Q Values:  [ 6478.03319016 -7525.53407498 -7525.7277781   6253.83578473]
New Q values:  [ 6478.03319016 -7525.53407498 -7525.7277781   2635.33049031]
Reward: -1  Episode Reward:  22
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   447.98725471]
------
Step:19, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134   447.98725471]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134   686.35867199]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1692.54590033   -40.34168621 -4708.9150155    -35.88578819]
------
Step:20, Action:North
State  257
Old Q Values:  [39234.65179496  2256.66526474  6585.91095232  1875.31501677]
New Q values:  [39211.60636148  2256.66526474  6585.91095232  1875.31501677]
Reward: -1  Episode Reward:  20
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 26817.66925136 48692.32086537     0.        ]
------
Step:21, Action:North
State  181
Old Q Values:  [  193.04493561   138.73856602 -3151.23898092   -30.99112081]
New Q values:  [   87.06729753   138.73856602 -3151.23898092   -30.99112081]
Reward: 9  Episode Reward:  29
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[ 0.         14.83107761  0.          0.        ]
------
Step:22, Action:South
State  101
Old Q Values:  [ 0.         14.83107761  0.          0.        ]
New Q values:  [ 0.         46.95400085  0.          0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   87.06729753   138.73856602 -3151.23898092   -30.99112081]
------
Step:23, Action:South
State  181
Old Q Values:  [   87.06729753   138.73856602 -3151.23898092   -30.99112081]
New Q values:  [   87.06729753   562.65919651 -3151.23898092   -30.99112081]
Reward: -1  Episode Reward:  27
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1692.54590033   -40.34168621 -4708.9150155    -35.88578819]
------
Step:24, Action:North
State  260
Old Q Values:  [ 1637.21343222 -8695.4397473  10791.74401482 -2601.74710518]
New Q values:  [ 2235.66906735 -8695.4397473  10791.74401482 -2601.74710518]
Reward: -1  Episode Reward:  26
xxxxx
xg..x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2959.50722357  4675.80016534  5271.27898154 -4966.32149798]
------
Step:25, Action:East
State  180
Old Q Values:  [-2959.50722357  4675.80016534  5271.27898154 -4966.32149798]
New Q values:  [-2959.50722357  4675.80016534  4363.22118993 -4966.32149798]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.30453803e+03 7.51769866e+03 2.91043938e+03]
------
Step:26, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.30453803e+03 7.51769866e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 3.30453803e+03 4.83410502e+03 2.91043938e+03]
Reward: -1  Episode Reward:  24
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1486.56394332  6092.08519804 -1925.08326713 -1455.65174173]
------
Step:27, Action:South
State  208
Old Q Values:  [ 1486.56394332  6092.08519804 -1925.08326713 -1455.65174173]
New Q values:  [ 1486.56394332  4379.64403627 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6478.03319016 -7525.53407498 -7525.7277781   2635.33049031]
------
Step:28, Action:North
State  288
Old Q Values:  [ 6478.03319016 -7525.53407498 -7525.7277781   2635.33049031]
New Q values:  [ 5227.50983384 -7525.53407498 -7525.7277781   2635.33049031]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.96037850e+03 8.78965519e+03 2.24233123e+03 3.52184257e+00]
------
Step:29, Action:South
State  210
Old Q Values:  [1.96037850e+03 8.78965519e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1.96037850e+03 5.08351503e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5227.50983384 -7525.53407498 -7525.7277781   2635.33049031]
------
Step:30, Action:North
State  288
Old Q Values:  [ 5227.50983384 -7525.53407498 -7525.7277781   2635.33049031]
New Q values:  [ 3615.45844169 -7525.53407498 -7525.7277781   2635.33049031]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.96037850e+03 5.08351503e+03 2.24233123e+03 3.52184257e+00]
------
Step:31, Action:South
State  208
Old Q Values:  [ 1486.56394332  4379.64403627 -1925.08326713 -1455.65174173]
New Q values:  [ 1486.56394332  2835.89514701 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  19
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3615.45844169 -7525.53407498 -7525.7277781   2635.33049031]
------
Step:32, Action:North
State  288
Old Q Values:  [ 3615.45844169 -7525.53407498 -7525.7277781   2635.33049031]
New Q values:  [ 2970.63788483 -7525.53407498 -7525.7277781   2635.33049031]
Reward: -1  Episode Reward:  18
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.96037850e+03 5.08351503e+03 2.24233123e+03 3.52184257e+00]
------
Step:33, Action:South
State  210
Old Q Values:  [1.96037850e+03 5.08351503e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1960.3784995  2923.99737632 2242.33123263    3.52184257]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2970.63788483 -7525.53407498 -7525.7277781   2635.33049031]
------
Step:34, Action:North
State  288
Old Q Values:  [ 2970.63788483 -7525.53407498 -7525.7277781   2635.33049031]
New Q values:  [ 2038.42369804 -7525.53407498 -7525.7277781   2635.33049031]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1486.56394332  2835.89514701 -1925.08326713 -1455.65174173]
------
Step:35, Action:South
State  208
Old Q Values:  [ 1486.56394332  2835.89514701 -1925.08326713 -1455.65174173]
New Q values:  [ 1486.56394332  1924.3572059  -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  15
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2038.42369804 -7525.53407498 -7525.7277781   2635.33049031]
------
Step:36, Action:West
State  288
Old Q Values:  [ 2038.42369804 -7525.53407498 -7525.7277781   2635.33049031]
New Q values:  [ 2038.42369804 -7525.53407498 -7525.7277781   5485.95813001]
Reward: -1  Episode Reward:  14
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 13744.30365729 14774.75311296]
------
Step:37, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 13744.30365729 14774.75311296]
New Q values:  [-2527.46239811 -8521.23367799 13744.30365729  9146.82444963]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2235.66906735 -8695.4397473  10791.74401482 -2601.74710518]
------
Step:38, Action:East
State  261
Old Q Values:  [ 1692.54590033   -40.34168621 -4708.9150155    -35.88578819]
New Q values:  [1692.54590033  -40.34168621 2239.12509098  -35.88578819]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 13744.30365729  9146.82444963]
------
Step:39, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 13744.30365729  9146.82444963]
New Q values:  [-2527.46239811 -8521.23367799  7142.90890192  9146.82444963]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2038.42369804 -7525.53407498 -7525.7277781   5485.95813001]
------
Step:40, Action:West
State  288
Old Q Values:  [ 2038.42369804 -7525.53407498 -7525.7277781   5485.95813001]
New Q values:  [ 2038.42369804 -7525.53407498 -7525.7277781   4937.83058689]
Reward: -1  Episode Reward:  10
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7142.90890192  9146.82444963]
------
Step:41, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7142.90890192  9146.82444963]
New Q values:  [-2527.46239811 -8521.23367799  4337.91273684  9146.82444963]
Reward: -1  Episode Reward:  9
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2038.42369804 -7525.53407498 -7525.7277781   4937.83058689]
------
Step:42, Action:North
State  288
Old Q Values:  [ 2038.42369804 -7525.53407498 -7525.7277781   4937.83058689]
New Q values:  [ 1392.07664098 -7525.53407498 -7525.7277781   4937.83058689]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1486.56394332  1924.3572059  -1925.08326713 -1455.65174173]
------
Step:43, Action:South
State  208
Old Q Values:  [ 1486.56394332  1924.3572059  -1925.08326713 -1455.65174173]
New Q values:  [ 1486.56394332  2250.49205843 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1392.07664098 -7525.53407498 -7525.7277781   4937.83058689]
------
Step:44, Action:West
State  288
Old Q Values:  [ 1392.07664098 -7525.53407498 -7525.7277781   4937.83058689]
New Q values:  [ 1392.07664098 -7525.53407498 -7525.7277781   4718.57956965]
Reward: -1  Episode Reward:  6
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4337.91273684  9146.82444963]
------
Step:45, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4337.91273684  9146.82444963]
New Q values:  [-2527.46239811 -8521.23367799  4337.91273684  6895.6529843 ]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2235.66906735 -8695.4397473  10791.74401482 -2601.74710518]
------
Step:46, Action:East
State  261
Old Q Values:  [1692.54590033  -40.34168621 2239.12509098  -35.88578819]
New Q values:  [1692.54590033  -40.34168621 2963.74593168  -35.88578819]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4337.91273684  6895.6529843 ]
------
Step:47, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2723.15479001   359.52281486]
New Q values:  [   16.82637525 -5807.06396197  2723.15479001  1032.33290545]
Reward: -1  Episode Reward:  3
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1692.54590033  -40.34168621 2963.74593168  -35.88578819]
------
Step:48, Action:East
State  260
Old Q Values:  [ 2235.66906735 -8695.4397473  10791.74401482 -2601.74710518]
New Q values:  [ 2235.66906735 -8695.4397473   6384.79350122 -2601.74710518]
Reward: -1  Episode Reward:  2
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4337.91273684  6895.6529843 ]
------
Step:49, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2723.15479001  1032.33290545]
New Q values:  [   16.82637525 -5807.06396197  2723.15479001  1301.45694168]
Reward: -1  Episode Reward:  1
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1692.54590033  -40.34168621 2963.74593168  -35.88578819]
------
Step:50, Action:East
State  260
Old Q Values:  [ 2235.66906735 -8695.4397473   6384.79350122 -2601.74710518]
New Q values:  [ 2235.66906735 -8695.4397473   4622.01329578 -2601.74710518]
Reward: -1  Episode Reward:  0
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4337.91273684  6895.6529843 ]
------
Step:51, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4337.91273684  6895.6529843 ]
New Q values:  [-2527.46239811 -8521.23367799  4337.91273684  4144.26518245]
Reward: -1  Episode Reward:  -1
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2235.66906735 -8695.4397473   4622.01329578 -2601.74710518]
------
Step:52, Action:East
State  260
Old Q Values:  [ 2235.66906735 -8695.4397473   4622.01329578 -2601.74710518]
New Q values:  [ 2235.66906735 -8695.4397473   3149.57913936 -2601.74710518]
Reward: -1  Episode Reward:  -2
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4337.91273684  4144.26518245]
------
Step:53, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2723.15479001  1301.45694168]
New Q values:  [   16.82637525 -5807.06396197  2504.2357869   1301.45694168]
Reward: -1  Episode Reward:  -3
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1392.07664098 -7525.53407498 -7525.7277781   4718.57956965]
------
Step:54, Action:West
State  288
Old Q Values:  [ 1392.07664098 -7525.53407498 -7525.7277781   4718.57956965]
New Q values:  [ 1392.07664098 -7525.53407498 -7525.7277781   3188.20564891]
Reward: -1  Episode Reward:  -4
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4337.91273684  4144.26518245]
------
Step:55, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4337.91273684  4144.26518245]
New Q values:  [-2527.46239811 -8521.23367799  2691.02678941  4144.26518245]
Reward: -1  Episode Reward:  -5
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1392.07664098 -7525.53407498 -7525.7277781   3188.20564891]
------
Step:56, Action:West
State  288
Old Q Values:  [ 1392.07664098 -7525.53407498 -7525.7277781   3188.20564891]
New Q values:  [ 1392.07664098 -7525.53407498 -7525.7277781   2517.9618143 ]
Reward: -1  Episode Reward:  -6
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2691.02678941  4144.26518245]
------
Step:57, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2504.2357869   1301.45694168]
New Q values:  [   16.82637525 -5807.06396197  2504.2357869   1409.10655618]
Reward: -1  Episode Reward:  -7
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1692.54590033  -40.34168621 2963.74593168  -35.88578819]
------
Step:58, Action:East
State  260
Old Q Values:  [ 2235.66906735 -8695.4397473   3149.57913936 -2601.74710518]
New Q values:  [ 2235.66906735 -8695.4397473   2502.51121048 -2601.74710518]
Reward: -1  Episode Reward:  -8
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2691.02678941  4144.26518245]
------
Step:59, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2504.2357869   1409.10655618]
New Q values:  [   16.82637525 -5807.06396197  2504.2357869   1452.16640198]
Reward: -1  Episode Reward:  -9
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1692.54590033  -40.34168621 2963.74593168  -35.88578819]
------
Step:60, Action:East
State  260
Old Q Values:  [ 2235.66906735 -8695.4397473   2502.51121048 -2601.74710518]
New Q values:  [ 2235.66906735 -8695.4397473   2243.68403893 -2601.74710518]
Reward: -1  Episode Reward:  -10
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2691.02678941  4144.26518245]
------
Step:61, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2504.2357869   1452.16640198]
New Q values:  [   16.82637525 -5807.06396197  2504.2357869   1469.3903403 ]
Reward: -1  Episode Reward:  -11
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1692.54590033  -40.34168621 2963.74593168  -35.88578819]
------
Step:62, Action:East
State  261
Old Q Values:  [1692.54590033  -40.34168621 2963.74593168  -35.88578819]
New Q values:  [1692.54590033  -40.34168621 2428.17792741  -35.88578819]
Reward: -1  Episode Reward:  -12
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2691.02678941  4144.26518245]
------
Step:63, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2691.02678941  4144.26518245]
New Q values:  [-2527.46239811 -8521.23367799  2691.02678941  2385.5594512 ]
Reward: -1  Episode Reward:  -13
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1692.54590033  -40.34168621 2428.17792741  -35.88578819]
------
Step:64, Action:North
State  261
Old Q Values:  [1692.54590033  -40.34168621 2428.17792741  -35.88578819]
New Q values:  [ 845.21611909  -40.34168621 2428.17792741  -35.88578819]
Reward: -1  Episode Reward:  -14
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   87.06729753   562.65919651 -3151.23898092   -30.99112081]
------
Step:65, Action:South
State  183
Old Q Values:  [ 471.26619624  674.01666877 4410.98399972    0.        ]
New Q values:  [ 471.26619624  997.46004573 4410.98399972    0.        ]
Reward: -1  Episode Reward:  -15
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 845.21611909  -40.34168621 2428.17792741  -35.88578819]
------
Step:66, Action:North
State  261
Old Q Values:  [ 845.21611909  -40.34168621 2428.17792741  -35.88578819]
New Q values:  [ 506.28420659  -40.34168621 2428.17792741  -35.88578819]
Reward: -1  Episode Reward:  -16
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   87.06729753   562.65919651 -3151.23898092   -30.99112081]
------
Step:67, Action:South
State  183
Old Q Values:  [ 471.26619624  997.46004573 4410.98399972    0.        ]
New Q values:  [ 471.26619624 1126.83739651 4410.98399972    0.        ]
Reward: -1  Episode Reward:  -17
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 506.28420659  -40.34168621 2428.17792741  -35.88578819]
------
Step:68, Action:North
State  261
Old Q Values:  [ 506.28420659  -40.34168621 2428.17792741  -35.88578819]
New Q values:  [ 370.71144159  -40.34168621 2428.17792741  -35.88578819]
Reward: -1  Episode Reward:  -18
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   87.06729753   562.65919651 -3151.23898092   -30.99112081]
------
Step:69, Action:South
State  180
Old Q Values:  [-2959.50722357  4675.80016534  4363.22118993 -4966.32149798]
New Q values:  [-2959.50722357  2542.82527782  4363.22118993 -4966.32149798]
Reward: -1  Episode Reward:  -19
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2235.66906735 -8695.4397473   2243.68403893 -2601.74710518]
------
Step:70, Action:East
State  260
Old Q Values:  [ 2235.66906735 -8695.4397473   2243.68403893 -2601.74710518]
New Q values:  [ 2235.66906735 -8695.4397473   1704.18165239 -2601.74710518]
Reward: -1  Episode Reward:  -20
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2691.02678941  2385.5594512 ]
------
Step:71, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2691.02678941  2385.5594512 ]
New Q values:  [-2527.46239811 -8521.23367799  1831.19926005  2385.5594512 ]
Reward: -1  Episode Reward:  -21
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1392.07664098 -7525.53407498 -7525.7277781   2517.9618143 ]
------
Step:72, Action:West
State  288
Old Q Values:  [ 1392.07664098 -7525.53407498 -7525.7277781   2517.9618143 ]
New Q values:  [ 1392.07664098 -7525.53407498 -7525.7277781   1722.25256108]
Reward: -1  Episode Reward:  -22
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1831.19926005  2385.5594512 ]
------
Step:73, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134   686.35867199]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134  1002.39684702]
Reward: -1  Episode Reward:  -23
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 370.71144159  -40.34168621 2428.17792741  -35.88578819]
------
Step:74, Action:East
State  261
Old Q Values:  [ 370.71144159  -40.34168621 2428.17792741  -35.88578819]
New Q values:  [ 370.71144159  -40.34168621 1271.39022507  -35.88578819]
Reward: -1  Episode Reward:  -24
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  1002.39684702]
------
Step:75, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2504.2357869   1469.3903403 ]
New Q values:  [   16.82637525 -5807.06396197  2504.2357869    968.57320364]
Reward: -1  Episode Reward:  -25
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 370.71144159  -40.34168621 1271.39022507  -35.88578819]
------
Step:76, Action:East
State  261
Old Q Values:  [ 370.71144159  -40.34168621 1271.39022507  -35.88578819]
New Q values:  [ 370.71144159  -40.34168621 1223.62392539  -35.88578819]
Reward: -1  Episode Reward:  -26
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1831.19926005  2385.5594512 ]
------
Step:77, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1831.19926005  2385.5594512 ]
New Q values:  [-2527.46239811 -8521.23367799  1831.19926005  1624.32450069]
Reward: -1  Episode Reward:  -27
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2235.66906735 -8695.4397473   1704.18165239 -2601.74710518]
------
Step:78, Action:East
State  260
Old Q Values:  [ 2235.66906735 -8695.4397473   1704.18165239 -2601.74710518]
New Q values:  [ 2235.66906735 -8695.4397473   1230.43243897 -2601.74710518]
Reward: -1  Episode Reward:  -28
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1831.19926005  1624.32450069]
------
Step:79, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1831.19926005  1624.32450069]
New Q values:  [-2527.46239811 -8521.23367799  1248.55547235  1624.32450069]
Reward: -1  Episode Reward:  -29
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1392.07664098 -7525.53407498 -7525.7277781   1722.25256108]
------
Step:80, Action:West
State  288
Old Q Values:  [ 1392.07664098 -7525.53407498 -7525.7277781   1722.25256108]
New Q values:  [ 1392.07664098 -7525.53407498 -7525.7277781   1175.59837464]
Reward: -1  Episode Reward:  -30
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1248.55547235  1624.32450069]
------
Step:81, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2504.2357869    968.57320364]
New Q values:  [   16.82637525 -5807.06396197  2504.2357869    753.91645907]
Reward: -1  Episode Reward:  -31
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 370.71144159  -40.34168621 1223.62392539  -35.88578819]
------
Step:82, Action:East
State  261
Old Q Values:  [ 370.71144159  -40.34168621 1223.62392539  -35.88578819]
New Q values:  [370.71144159 -40.34168621 976.14692036 -35.88578819]
Reward: -1  Episode Reward:  -32
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1248.55547235  1624.32450069]
------
Step:83, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1248.55547235  1624.32450069]
New Q values:  [-2527.46239811 -8521.23367799  1248.55547235  1319.83052048]
Reward: -1  Episode Reward:  -33
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2235.66906735 -8695.4397473   1230.43243897 -2601.74710518]
------
Step:84, Action:East
State  260
Old Q Values:  [ 2235.66906735 -8695.4397473   1230.43243897 -2601.74710518]
New Q values:  [ 2235.66906735 -8695.4397473    887.52213173 -2601.74710518]
Reward: -1  Episode Reward:  -34
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1248.55547235  1319.83052048]
------
Step:85, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1248.55547235  1319.83052048]
New Q values:  [-2527.46239811 -8521.23367799  1248.55547235  1198.0329284 ]
Reward: -1  Episode Reward:  -35
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2235.66906735 -8695.4397473    887.52213173 -2601.74710518]
------
Step:86, Action:East
State  260
Old Q Values:  [ 2235.66906735 -8695.4397473    887.52213173 -2601.74710518]
New Q values:  [ 2235.66906735 -8695.4397473    728.9754944  -2601.74710518]
Reward: -1  Episode Reward:  -36
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1248.55547235  1198.0329284 ]
------
Step:87, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1248.55547235  1198.0329284 ]
New Q values:  [-2527.46239811 -8521.23367799   916.44518123  1198.0329284 ]
Reward: -1  Episode Reward:  -37
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1392.07664098 -7525.53407498 -7525.7277781   1175.59837464]
------
Step:88, Action:North
State  288
Old Q Values:  [ 1392.07664098 -7525.53407498 -7525.7277781   1175.59837464]
New Q values:  [ 1231.37827392 -7525.53407498 -7525.7277781   1175.59837464]
Reward: -1  Episode Reward:  -38
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1486.56394332  2250.49205843 -1925.08326713 -1455.65174173]
------
Step:89, Action:South
State  208
Old Q Values:  [ 1486.56394332  2250.49205843 -1925.08326713 -1455.65174173]
New Q values:  [ 1486.56394332  1269.01030555 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  -39
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1231.37827392 -7525.53407498 -7525.7277781   1175.59837464]
------
Step:90, Action:North
State  288
Old Q Values:  [ 1231.37827392 -7525.53407498 -7525.7277781   1175.59837464]
New Q values:  [  937.92049257 -7525.53407498 -7525.7277781   1175.59837464]
Reward: -1  Episode Reward:  -40
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1486.56394332  1269.01030555 -1925.08326713 -1455.65174173]
------
Step:91, Action:South
State  208
Old Q Values:  [ 1486.56394332  1269.01030555 -1925.08326713 -1455.65174173]
New Q values:  [ 1486.56394332   859.68363461 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  -41
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  937.92049257 -7525.53407498 -7525.7277781   1175.59837464]
------
Step:92, Action:West
State  288
Old Q Values:  [  937.92049257 -7525.53407498 -7525.7277781   1175.59837464]
New Q values:  [  937.92049257 -7525.53407498 -7525.7277781    829.04922837]
Reward: -1  Episode Reward:  -42
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   916.44518123  1198.0329284 ]
------
Step:93, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2504.2357869    753.91645907]
New Q values:  [   16.82637525 -5807.06396197  2504.2357869    593.81065974]
Reward: -1  Episode Reward:  -43
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[370.71144159 -40.34168621 976.14692036 -35.88578819]
------
Step:94, Action:East
State  261
Old Q Values:  [370.71144159 -40.34168621 976.14692036 -35.88578819]
New Q values:  [370.71144159 -40.34168621 749.26864666 -35.88578819]
Reward: -1  Episode Reward:  -44
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   916.44518123  1198.0329284 ]
------
Step:95, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   916.44518123  1198.0329284 ]
New Q values:  [-2527.46239811 -8521.23367799   916.44518123   703.39376536]
Reward: -1  Episode Reward:  -45
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[370.71144159 -40.34168621 749.26864666 -35.88578819]
------
Step:96, Action:North
State  261
Old Q Values:  [370.71144159 -40.34168621 749.26864666 -35.88578819]
New Q values:  [316.48233559 -40.34168621 749.26864666 -35.88578819]
Reward: -1  Episode Reward:  -46
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   87.06729753   562.65919651 -3151.23898092   -30.99112081]
------
Step:97, Action:South
State  183
Old Q Values:  [ 471.26619624 1126.83739651 4410.98399972    0.        ]
New Q values:  [ 471.26619624  674.9155526  4410.98399972    0.        ]
Reward: -1  Episode Reward:  -47
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[316.48233559 -40.34168621 749.26864666 -35.88578819]
------
Step:98, Action:North
State  261
Old Q Values:  [316.48233559 -40.34168621 749.26864666 -35.88578819]
New Q values:  [1449.28813415  -40.34168621  749.26864666  -35.88578819]
Reward: -1  Episode Reward:  -48
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  674.9155526  4410.98399972    0.        ]
------
Step:99, Action:East
State  183
Old Q Values:  [ 471.26619624  674.9155526  4410.98399972    0.        ]
New Q values:  [  471.26619624   674.9155526  22121.2712586      0.        ]
Reward: -1  Episode Reward:  -49
xxxxx
x ..x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[3.88538861e+01 2.67789621e+03 6.78582589e+04 1.10159745e+03]
------
Step:100, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  7.46411993e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  3.86224719e+03  1.20371620e+03]
Reward: -1  Episode Reward:  -50
xxxxx
x ..x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1960.3784995  2923.99737632 2242.33123263    3.52184257]
------
Step:101, Action:South
State  208
Old Q Values:  [ 1486.56394332   859.68363461 -1925.08326713 -1455.65174173]
New Q values:  [ 1486.56394332   624.64960161 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  -51
xxxxx
x ..x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  937.92049257 -7525.53407498 -7525.7277781    829.04922837]
------
Step:102, Action:North
State  288
Old Q Values:  [  937.92049257 -7525.53407498 -7525.7277781    829.04922837]
New Q values:  [  820.53738002 -7525.53407498 -7525.7277781    829.04922837]
Reward: -1  Episode Reward:  -52
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1486.56394332   624.64960161 -1925.08326713 -1455.65174173]
------
Step:103, Action:North
State  208
Old Q Values:  [ 1486.56394332   624.64960161 -1925.08326713 -1455.65174173]
New Q values:  [21382.02886356   624.64960161 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  -43
xxxxx
x .ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  3477.48884728  -180.00807518 69273.34428742]
------
Step:104, Action:West
State  130
Old Q Values:  [36041.91667283  3477.48884728  -180.00807518 69273.34428742]
New Q values:  [ 36041.91667283   3477.48884728   -180.00807518 126565.21588927]
Reward: 100009  Episode Reward:  99966
xxxxx
x a x
x  gx
x   x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.30453803e+03 4.83410502e+03 2.91043938e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.30453803e+03 4.83410502e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 3.30453803e+03 8.35365067e+03 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21382.02886356   624.64960161 -1925.08326713 -1455.65174173]
------
Step:2, Action:North
State  210
Old Q Values:  [1960.3784995  2923.99737632 2242.33123263    3.52184257]
New Q values:  [3.87591162e+04 2.92399738e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   3477.48884728   -180.00807518 126565.21588927]
------
Step:3, Action:West
State  138
Old Q Values:  [-1.39459256e+02  5.72134963e+03 -3.22965309e-01  1.92613127e+02]
New Q values:  [-1.39459256e+02  5.72134963e+03 -3.22965309e-01  3.16724961e+02]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   780.93236677   244.70936098]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   780.93236677   244.70936098]
New Q values:  [ -253.44886264 -1902.20915811  2028.17783586   244.70936098]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  5.72134963e+03 -3.22965309e-01  3.16724961e+02]
------
Step:5, Action:West
State  136
Old Q Values:  [-5281.21195651  3111.32112957 -6245.61866138   190.91035002]
New Q values:  [-5281.21195651  3111.32112957 -6245.61866138   169.0783428 ]
Reward: -1  Episode Reward:  25
xxxxx
x.agx
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:6, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1272.07985357    80.15223037]
New Q values:  [-9594.56523706 -8069.05606225  1272.07985357   114.37826683]
Reward: 9  Episode Reward:  34
xxxxx
xag x
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  256.39124892 -253.21568792 -180.6       ]
------
Step:7, Action:South
State  109
Old Q Values:  [-241.10880094  256.39124892 -253.21568792 -180.6       ]
New Q values:  [-241.10880094  544.73063819 -253.21568792 -180.6       ]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
xa  x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  681.98074137 1475.9137954   940.95197235]
------
Step:8, Action:East
State  189
Old Q Values:  [   9.84673294  681.98074137 1475.9137954   940.95197235]
New Q values:  [   9.84673294  681.98074137 1680.54077297  940.95197235]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
------
Step:9, Action:South
State  200
Old Q Values:  [  62.8218634  6029.10901229  636.07481225  408.67479662]
New Q values:  [  62.8218634  2691.97715929  636.07481225  408.67479662]
Reward: 9  Episode Reward:  41
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   916.44518123   703.39376536]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   916.44518123   703.39376536]
New Q values:  [-2527.46239811 -8521.23367799   620.69284101   703.39376536]
Reward: 9  Episode Reward:  50
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  820.53738002 -7525.53407498 -7525.7277781    829.04922837]
------
Step:11, Action:West
State  288
Old Q Values:  [  820.53738002 -7525.53407498 -7525.7277781    829.04922837]
New Q values:  [  820.53738002 -7525.53407498 -7525.7277781    542.03782096]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   620.69284101   703.39376536]
------
Step:12, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   620.69284101   703.39376536]
New Q values:  [-2527.46239811 -8521.23367799   620.69284101 72050.23941459]
Reward: 100009  Episode Reward:  100058
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1449.28813415  -40.34168621  749.26864666  -35.88578819]
------
Step:1, Action:North
State  260
Old Q Values:  [ 2235.66906735 -8695.4397473    728.9754944  -2601.74710518]
New Q values:  [ 2390.65100806 -8695.4397473    728.9754944  -2601.74710518]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [  471.26619624   674.9155526  22121.2712586      0.        ]
New Q values:  [  471.26619624   674.9155526  10012.58265925     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  3.86224719e+03  1.20371620e+03]
------
Step:3, Action:East
State  195
Old Q Values:  [3.88538861e+01 2.67789621e+03 6.78582589e+04 1.10159745e+03]
New Q values:  [   38.85388605  2677.89620798 38776.43839492  1101.59744825]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.87591162e+04 2.92399738e+03 2.24233123e+03 3.52184257e+00]
------
Step:4, Action:North
State  210
Old Q Values:  [3.87591162e+04 2.92399738e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [5.34786112e+04 2.92399738e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   3477.48884728   -180.00807518 126565.21588927]
------
Step:5, Action:West
State  130
Old Q Values:  [ 36041.91667283   3477.48884728   -180.00807518 126565.21588927]
New Q values:  [36041.91667283  3477.48884728  -180.00807518 89932.49675177]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  43132.21661867 131003.36798688]
------
Step:6, Action:West
State  126
Old Q Values:  [  0.         443.7746053  374.96879939 116.83031103]
New Q values:  [  0.         443.7746053  374.96879939 241.18120194]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007  630.16359176 -120.29354603]
------
Step:7, Action:East
State  99
Old Q Values:  [    0.         43144.02842638 74430.88312345     0.        ]
New Q values:  [    0.         43144.02842638 68622.23142368     0.        ]
Reward: -1  Episode Reward:  53
xxxxx
x a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29501594e+05]
------
Step:8, Action:West
State  126
Old Q Values:  [  0.         443.7746053  374.96879939 241.18120194]
New Q values:  [  0.         443.7746053  374.96879939 284.9215583 ]
Reward: -1  Episode Reward:  52
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007  630.16359176 -120.29354603]
------
Step:9, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   483.82292418  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   326.06155126  -180.6       ]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         443.7746053  374.96879939 284.9215583 ]
------
Step:10, Action:South
State  126
Old Q Values:  [  0.         443.7746053  374.96879939 284.9215583 ]
New Q values:  [  0.         331.64678262 374.96879939 284.9215583 ]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         135.48456638 515.78980165   0.        ]
------
Step:11, Action:East
State  204
Old Q Values:  [   0.         1115.71533251  814.02974331  399.75525955]
New Q values:  [   0.         1115.71533251  815.31186329  399.75525955]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xg ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1634.33321988   633.14118268 -8489.43729461   531.09593838]
------
Step:12, Action:North
State  216
Old Q Values:  [ 1634.33321988   633.14118268 -8489.43729461   531.09593838]
New Q values:  [ 2369.5381771    633.14118268 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  48
xxxxx
x  ax
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  5.72134963e+03 -3.22965309e-01  3.16724961e+02]
------
Step:13, Action:South
State  138
Old Q Values:  [-1.39459256e+02  5.72134963e+03 -3.22965309e-01  3.16724961e+02]
New Q values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  3.16724961e+02]
Reward: -10001  Episode Reward:  -9953
xxxxx
x   x
x  gx
x  .x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  820.53738002 -7525.53407498 -7525.7277781    542.03782096]
------
Step:1, Action:North
State  288
Old Q Values:  [  820.53738002 -7525.53407498 -7525.7277781    542.03782096]
New Q values:  [ 6748.22361108 -7525.53407498 -7525.7277781    542.03782096]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21382.02886356   624.64960161 -1925.08326713 -1455.65174173]
------
Step:2, Action:North
State  208
Old Q Values:  [21382.02886356   624.64960161 -1925.08326713 -1455.65174173]
New Q values:  [ 3491.60788429   624.64960161 -1925.08326713 -1455.65174173]
Reward: -9991  Episode Reward:  -9982
xxxxx
x..gx
x.  x
x.. x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3491.60788429   624.64960161 -1925.08326713 -1455.65174173]
------
Step:1, Action:North
State  216
Old Q Values:  [ 2369.5381771    633.14118268 -8489.43729461   531.09593838]
New Q values:  [ 1048.23275911   633.14118268 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  3.16724961e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  3.16724961e+02]
New Q values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  7.34543335e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2028.17783586   244.70936098]
------
Step:3, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2028.17783586   244.70936098]
New Q values:  [ -253.44886264 -1902.20915811  1031.03413488   244.70936098]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  7.34543335e+02]
------
Step:4, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  7.34543335e+02]
New Q values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  5.06253272e+02]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   710.11979298   275.43590235]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   710.11979298   275.43590235]
New Q values:  [ -281.736      -3455.78276043   435.32389877   275.43590235]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  5.06253272e+02]
------
Step:6, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  5.06253272e+02]
New Q values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  3.32498478e+02]
Reward: -1  Episode Reward:  4
xxxxx
x.a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   435.32389877   275.43590235]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   435.32389877   275.43590235]
New Q values:  [ -281.736      -3455.78276043   273.27910303   275.43590235]
Reward: -1  Episode Reward:  3
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  3.32498478e+02]
------
Step:8, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  3.32498478e+02]
New Q values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  2.15030162e+02]
Reward: -1  Episode Reward:  2
xxxxx
x.a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   273.27910303   275.43590235]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   273.27910303   275.43590235]
New Q values:  [ -281.736      -3455.78276043   273.27910303   634.99802495]
Reward: 9  Episode Reward:  11
xxxxx
xa  x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1731.41221336  269.31203637 -252.78192178]
------
Step:10, Action:South
State  109
Old Q Values:  [-241.10880094  544.73063819 -253.21568792 -180.6       ]
New Q values:  [-241.10880094  727.45448717 -253.21568792 -180.6       ]
Reward: 9  Episode Reward:  20
xxxxx
x g x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  681.98074137 1680.54077297  940.95197235]
------
Step:11, Action:East
State  188
Old Q Values:  [-6523.78898263   712.69143835  1026.09328485     0.        ]
New Q values:  [-6523.78898263   712.69143835  1223.43046172     0.        ]
Reward: 9  Episode Reward:  29
xxxxx
xg  x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  2691.97715929  636.07481225  408.67479662]
------
Step:12, Action:South
State  200
Old Q Values:  [  62.8218634  2691.97715929  636.07481225  408.67479662]
New Q values:  [   62.8218634  22697.26268809   636.07481225   408.67479662]
Reward: 9  Episode Reward:  38
xxxxx
x g x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   620.69284101 72050.23941459]
------
Step:13, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2504.2357869    593.81065974]
New Q values:  [   16.82637525 -5807.06396197  2504.2357869    677.71070414]
Reward: 9  Episode Reward:  47
xxxxx
x   x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1449.28813415  -40.34168621  749.26864666  -35.88578819]
------
Step:14, Action:North
State  260
Old Q Values:  [ 2390.65100806 -8695.4397473    728.9754944  -2601.74710518]
New Q values:  [-4677.31045826 -8695.4397473    728.9754944  -2601.74710518]
Reward: -10001  Episode Reward:  -9954
xxxxx
x   x
xg  x
x  .x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1449.28813415  -40.34168621  749.26864666  -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [1449.28813415  -40.34168621  749.26864666  -35.88578819]
New Q values:  [753.91301261 -40.34168621 749.26864666 -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   87.06729753   562.65919651 -3151.23898092   -30.99112081]
------
Step:2, Action:South
State  183
Old Q Values:  [  471.26619624   674.9155526  10012.58265925     0.        ]
New Q values:  [  471.26619624   495.54012483 10012.58265925     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x ..x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[753.91301261 -40.34168621 749.26864666 -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [753.91301261 -40.34168621 749.26864666 -35.88578819]
New Q values:  [469.762964   -40.34168621 749.26864666 -35.88578819]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   87.06729753   562.65919651 -3151.23898092   -30.99112081]
------
Step:4, Action:South
State  183
Old Q Values:  [  471.26619624   495.54012483 10012.58265925     0.        ]
New Q values:  [  471.26619624   422.39664393 10012.58265925     0.        ]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x ..x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[469.762964   -40.34168621 749.26864666 -35.88578819]
------
Step:5, Action:East
State  261
Old Q Values:  [469.762964   -40.34168621 749.26864666 -35.88578819]
New Q values:  [469.762964   -40.34168621 605.82651277 -35.88578819]
Reward: 9  Episode Reward:  15
xxxxx
x...x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  1002.39684702]
------
Step:6, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134  1002.39684702]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134   582.10669264]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x ..x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[469.762964   -40.34168621 605.82651277 -35.88578819]
------
Step:7, Action:East
State  261
Old Q Values:  [469.762964   -40.34168621 605.82651277 -35.88578819]
New Q values:  [  469.762964     -40.34168621 15856.80242948   -35.88578819]
Reward: -10001  Episode Reward:  -9987
xxxxx
x...x
x ..x
x g x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869   79.64232007  630.16359176 -120.29354603]
------
Step:1, Action:East
State  107
Old Q Values:  [-252.35169558 1731.41221336  269.31203637 -252.78192178]
New Q values:  [-252.35169558 1731.41221336  303.62422203 -252.78192178]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   273.27910303   634.99802495]
------
Step:2, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1272.07985357   114.37826683]
New Q values:  [-9594.56523706 -8069.05606225  1272.07985357   263.38765288]
Reward: -1  Episode Reward:  8
xxxxx
xag.x
x...x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  727.45448717 -253.21568792 -180.6       ]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869   79.64232007  630.16359176 -120.29354603]
New Q values:  [-177.44732869  206.05468698  630.16359176 -120.29354603]
Reward: 9  Episode Reward:  17
xxxxx
x  .x
xag.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   87.06729753   562.65919651 -3151.23898092   -30.99112081]
------
Step:4, Action:South
State  181
Old Q Values:  [   87.06729753   562.65919651 -3151.23898092   -30.99112081]
New Q values:  [   87.06729753  4987.50440745 -3151.23898092   -30.99112081]
Reward: 9  Episode Reward:  26
xxxxx
x g.x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  469.762964     -40.34168621 15856.80242948   -35.88578819]
------
Step:5, Action:East
State  260
Old Q Values:  [-4677.31045826 -8695.4397473    728.9754944  -2601.74710518]
New Q values:  [-4677.31045826 -8695.4397473   1042.26093383 -2601.74710518]
Reward: -1  Episode Reward:  25
xxxxx
xg .x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2504.2357869    677.71070414]
------
Step:6, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2504.2357869    677.71070414]
New Q values:  [   16.82637525 -5807.06396197  3031.56139808   677.71070414]
Reward: 9  Episode Reward:  34
xxxxx
x g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6748.22361108 -7525.53407498 -7525.7277781    542.03782096]
------
Step:7, Action:North
State  288
Old Q Values:  [ 6748.22361108 -7525.53407498 -7525.7277781    542.03782096]
New Q values:  [ 3752.17180972 -7525.53407498 -7525.7277781    542.03782096]
Reward: 9  Episode Reward:  43
xxxxx
xg .x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3491.60788429   624.64960161 -1925.08326713 -1455.65174173]
------
Step:8, Action:North
State  208
Old Q Values:  [ 3491.60788429   624.64960161 -1925.08326713 -1455.65174173]
New Q values:  [ 2335.43949259   624.64960161 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  52
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  3111.32112957 -6245.61866138   169.0783428 ]
------
Step:9, Action:South
State  130
Old Q Values:  [36041.91667283  3477.48884728  -180.00807518 89932.49675177]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 89932.49675177]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2335.43949259   624.64960161 -1925.08326713 -1455.65174173]
------
Step:10, Action:North
State  208
Old Q Values:  [ 2335.43949259   624.64960161 -1925.08326713 -1455.65174173]
New Q values:  [  998.08484566   624.64960161 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  2.15030162e+02]
------
Step:11, Action:West
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 89932.49675177]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 75273.40909677]
Reward: -1  Episode Reward:  49
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  43132.21661867 131003.36798688]
------
Step:12, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1272.07985357   263.38765288]
New Q values:  [-9594.56523706 -8069.05606225  1272.07985357   322.9914073 ]
Reward: -1  Episode Reward:  48
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  727.45448717 -253.21568792 -180.6       ]
------
Step:13, Action:South
State  109
Old Q Values:  [-241.10880094  727.45448717 -253.21568792 -180.6       ]
New Q values:  [-241.10880094 1786.6331171  -253.21568792 -180.6       ]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   87.06729753  4987.50440745 -3151.23898092   -30.99112081]
------
Step:14, Action:South
State  181
Old Q Values:  [   87.06729753  4987.50440745 -3151.23898092   -30.99112081]
New Q values:  [   87.06729753  6751.44249183 -3151.23898092   -30.99112081]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  469.762964     -40.34168621 15856.80242948   -35.88578819]
------
Step:15, Action:East
State  257
Old Q Values:  [39211.60636148  2256.66526474  6585.91095232  1875.31501677]
New Q values:  [39211.60636148  2256.66526474 24248.83620531  1875.31501677]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   620.69284101 72050.23941459]
------
Step:16, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   620.69284101 72050.23941459]
New Q values:  [-2527.46239811 -8521.23367799   620.69284101 29132.17404598]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4677.31045826 -8695.4397473   1042.26093383 -2601.74710518]
------
Step:17, Action:East
State  257
Old Q Values:  [39211.60636148  2256.66526474 24248.83620531  1875.31501677]
New Q values:  [39211.60636148  2256.66526474 18438.58669592  1875.31501677]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   620.69284101 29132.17404598]
------
Step:18, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   620.69284101 29132.17404598]
New Q values:  [-2527.46239811 -8521.23367799   620.69284101 11964.94789854]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4677.31045826 -8695.4397473   1042.26093383 -2601.74710518]
------
Step:19, Action:East
State  260
Old Q Values:  [-4677.31045826 -8695.4397473   1042.26093383 -2601.74710518]
New Q values:  [-4677.31045826 -8695.4397473   4005.78874309 -2601.74710518]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   620.69284101 11964.94789854]
------
Step:20, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   620.69284101 11964.94789854]
New Q values:  [-2527.46239811 -8521.23367799   620.69284101  5987.11578234]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4677.31045826 -8695.4397473   4005.78874309 -2601.74710518]
------
Step:21, Action:East
State  260
Old Q Values:  [-4677.31045826 -8695.4397473   4005.78874309 -2601.74710518]
New Q values:  [-4677.31045826 -8695.4397473   3397.85023194 -2601.74710518]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   620.69284101  5987.11578234]
------
Step:22, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   620.69284101  5987.11578234]
New Q values:  [-2527.46239811 -8521.23367799  1373.32867932  5987.11578234]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3752.17180972 -7525.53407498 -7525.7277781    542.03782096]
------
Step:23, Action:North
State  288
Old Q Values:  [ 3752.17180972 -7525.53407498 -7525.7277781    542.03782096]
New Q values:  [17543.85209391 -7525.53407498 -7525.7277781    542.03782096]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.34786112e+04 2.92399738e+03 2.24233123e+03 3.52184257e+00]
------
Step:24, Action:North
State  208
Old Q Values:  [  998.08484566   624.64960161 -1925.08326713 -1455.65174173]
New Q values:  [  463.14298688   624.64960161 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  2.15030162e+02]
------
Step:25, Action:West
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 75273.40909677]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 69409.77403477]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  43132.21661867 131003.36798688]
------
Step:26, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   273.27910303   634.99802495]
New Q values:  [ -281.736      -3455.78276043   273.27910303   442.44828751]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  206.05468698  630.16359176 -120.29354603]
------
Step:27, Action:East
State  111
Old Q Values:  [-177.44732869  206.05468698  630.16359176 -120.29354603]
New Q values:  [-177.44732869  206.05468698  560.77567717 -120.29354603]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1031.03413488   244.70936098]
------
Step:28, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   273.27910303   442.44828751]
New Q values:  [ -281.736      -3455.78276043   173.22068983   442.44828751]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  2.15030162e+02]
------
Step:29, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  2.15030162e+02]
New Q values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  2.18146551e+02]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   173.22068983   442.44828751]
------
Step:30, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   173.22068983   442.44828751]
New Q values:  [ -281.736      -3455.78276043   173.22068983   274.19778038]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   326.06155126  -180.6       ]
------
Step:31, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   326.06155126  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   212.08395462  -180.6       ]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   173.22068983   274.19778038]
------
Step:32, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   173.22068983   274.19778038]
New Q values:  [ -281.736      -3455.78276043   173.22068983   277.3118153 ]
Reward: -1  Episode Reward:  28
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  206.05468698  560.77567717 -120.29354603]
------
Step:33, Action:East
State  111
Old Q Values:  [-177.44732869  206.05468698  560.77567717 -120.29354603]
New Q values:  [-177.44732869  206.05468698  533.02051133 -120.29354603]
Reward: -1  Episode Reward:  27
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1031.03413488   244.70936098]
------
Step:34, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1031.03413488   244.70936098]
New Q values:  [ -253.44886264 -1902.20915811   477.25761928   244.70936098]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  2.18146551e+02]
------
Step:35, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  2.18146551e+02]
New Q values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  2.29835906e+02]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   477.25761928   244.70936098]
------
Step:36, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   477.25761928   244.70936098]
New Q values:  [ -253.44886264 -1902.20915811   259.25381957   244.70936098]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  2.29835906e+02]
------
Step:37, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  2.29835906e+02]
New Q values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  1.69110508e+02]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   259.25381957   244.70936098]
------
Step:38, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   259.25381957   244.70936098]
New Q values:  [ -253.44886264 -1902.20915811   153.83468034   244.70936098]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  1.69110508e+02]
------
Step:39, Action:West
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 69409.77403477]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 67064.32000997]
Reward: -1  Episode Reward:  21
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  43132.21661867 131003.36798688]
------
Step:40, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   173.22068983   277.3118153 ]
New Q values:  [ -281.736      -3455.78276043   173.22068983   173.94991251]
Reward: -1  Episode Reward:  20
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   212.08395462  -180.6       ]
------
Step:41, Action:East
State  99
Old Q Values:  [    0.         43144.02842638 68622.23142368     0.        ]
New Q values:  [    0.         43144.02842638 66749.30296554     0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  43132.21661867 131003.36798688]
------
Step:42, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1272.07985357   322.9914073 ]
New Q values:  [-9594.56523706 -8069.05606225  1272.07985357   664.58649805]
Reward: -1  Episode Reward:  18
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1786.6331171  -253.21568792 -180.6       ]
------
Step:43, Action:South
State  99
Old Q Values:  [    0.         43144.02842638 66749.30296554     0.        ]
New Q values:  [    0.         40775.35701405 66749.30296554     0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 26817.66925136 48692.32086537     0.        ]
------
Step:44, Action:North
State  180
Old Q Values:  [-2959.50722357  2542.82527782  4363.22118993 -4966.32149798]
New Q values:  [-1120.77770304  2542.82527782  4363.22118993 -4966.32149798]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   212.08395462  -180.6       ]
------
Step:45, Action:East
State  108
Old Q Values:  [-8463.16477134   752.96092799   344.90773917     0.        ]
New Q values:  [-8463.16477134   752.96092799   518.98705174     0.        ]
Reward: -1  Episode Reward:  15
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1272.07985357   664.58649805]
------
Step:46, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1272.07985357   664.58649805]
New Q values:  [-9594.56523706 -8069.05606225  1441.6282803    664.58649805]
Reward: -1  Episode Reward:  14
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  3111.32112957 -6245.61866138   169.0783428 ]
------
Step:47, Action:South
State  136
Old Q Values:  [-5281.21195651  3111.32112957 -6245.61866138   169.0783428 ]
New Q values:  [-5281.21195651  1431.32333231 -6245.61866138   169.0783428 ]
Reward: -1  Episode Reward:  13
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  463.14298688   624.64960161 -1925.08326713 -1455.65174173]
------
Step:48, Action:South
State  208
Old Q Values:  [  463.14298688   624.64960161 -1925.08326713 -1455.65174173]
New Q values:  [  463.14298688  5512.41546882 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  12
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17543.85209391 -7525.53407498 -7525.7277781    542.03782096]
------
Step:49, Action:North
State  288
Old Q Values:  [17543.85209391 -7525.53407498 -7525.7277781    542.03782096]
New Q values:  [23060.52420759 -7525.53407498 -7525.7277781    542.03782096]
Reward: -1  Episode Reward:  11
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.34786112e+04 2.92399738e+03 2.24233123e+03 3.52184257e+00]
------
Step:50, Action:North
State  208
Old Q Values:  [  463.14298688  5512.41546882 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726  5512.41546882 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  10
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  1.69110508e+02]
------
Step:51, Action:West
State  136
Old Q Values:  [-5281.21195651  1431.32333231 -6245.61866138   169.0783428 ]
New Q values:  [-5281.21195651  1431.32333231 -6245.61866138   499.51982121]
Reward: -1  Episode Reward:  9
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1441.6282803    664.58649805]
------
Step:52, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   173.22068983   173.94991251]
New Q values:  [ -281.736      -3455.78276043   119.42142844   173.94991251]
Reward: -1  Episode Reward:  8
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  1.69110508e+02]
------
Step:53, Action:West
State  136
Old Q Values:  [-5281.21195651  1431.32333231 -6245.61866138   499.51982121]
New Q values:  [-5281.21195651  1431.32333231 -6245.61866138   631.69641257]
Reward: -1  Episode Reward:  7
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1441.6282803    664.58649805]
------
Step:54, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   119.42142844   173.94991251]
New Q values:  [ -281.736      -3455.78276043    97.90172388   173.94991251]
Reward: -1  Episode Reward:  6
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  1.69110508e+02]
------
Step:55, Action:West
State  136
Old Q Values:  [-5281.21195651  1431.32333231 -6245.61866138   631.69641257]
New Q values:  [-5281.21195651  1431.32333231 -6245.61866138   684.56704912]
Reward: -1  Episode Reward:  5
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1441.6282803    664.58649805]
------
Step:56, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1441.6282803    664.58649805]
New Q values:  [-9594.56523706 -8069.05606225  1005.44831181   664.58649805]
Reward: -1  Episode Reward:  4
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  1431.32333231 -6245.61866138   684.56704912]
------
Step:57, Action:South
State  136
Old Q Values:  [-5281.21195651  1431.32333231 -6245.61866138   684.56704912]
New Q values:  [-5281.21195651  2225.65397357 -6245.61866138   684.56704912]
Reward: -1  Episode Reward:  3
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726  5512.41546882 -1925.08326713 -1455.65174173]
------
Step:58, Action:South
State  208
Old Q Values:  [  235.39034726  5512.41546882 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726  9122.5234498  -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  2
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[23060.52420759 -7525.53407498 -7525.7277781    542.03782096]
------
Step:59, Action:North
State  288
Old Q Values:  [23060.52420759 -7525.53407498 -7525.7277781    542.03782096]
New Q values:  [11960.36671798 -7525.53407498 -7525.7277781    542.03782096]
Reward: -1  Episode Reward:  1
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726  9122.5234498  -1925.08326713 -1455.65174173]
------
Step:60, Action:South
State  208
Old Q Values:  [  235.39034726  9122.5234498  -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726  7236.51939531 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  0
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11960.36671798 -7525.53407498 -7525.7277781    542.03782096]
------
Step:61, Action:West
State  288
Old Q Values:  [11960.36671798 -7525.53407498 -7525.7277781    542.03782096]
New Q values:  [11960.36671798 -7525.53407498 -7525.7277781    390.84713617]
Reward: -1  Episode Reward:  -1
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   582.10669264]
------
Step:62, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134   582.10669264]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134  4989.2834059 ]
Reward: -1  Episode Reward:  -2
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  469.762964     -40.34168621 15856.80242948   -35.88578819]
------
Step:63, Action:East
State  257
Old Q Values:  [39211.60636148  2256.66526474 18438.58669592  1875.31501677]
New Q values:  [39211.60636148  2256.66526474  9170.96941307  1875.31501677]
Reward: -1  Episode Reward:  -3
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1373.32867932  5987.11578234]
------
Step:64, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1373.32867932  5987.11578234]
New Q values:  [-2527.46239811 -8521.23367799  1373.32867932  7151.28704178]
Reward: -1  Episode Reward:  -4
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  469.762964     -40.34168621 15856.80242948   -35.88578819]
------
Step:65, Action:North
State  257
Old Q Values:  [39211.60636148  2256.66526474  9170.96941307  1875.31501677]
New Q values:  [39202.38818809  2256.66526474  9170.96941307  1875.31501677]
Reward: -1  Episode Reward:  -5
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 26817.66925136 48692.32086537     0.        ]
------
Step:66, Action:North
State  181
Old Q Values:  [   87.06729753  6751.44249183 -3151.23898092   -30.99112081]
New Q values:  [  570.21685414  6751.44249183 -3151.23898092   -30.99112081]
Reward: -1  Episode Reward:  -6
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1786.6331171  -253.21568792 -180.6       ]
------
Step:67, Action:South
State  109
Old Q Values:  [-241.10880094 1786.6331171  -253.21568792 -180.6       ]
New Q values:  [-241.10880094 2739.48599439 -253.21568792 -180.6       ]
Reward: -1  Episode Reward:  -7
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  570.21685414  6751.44249183 -3151.23898092   -30.99112081]
------
Step:68, Action:South
State  181
Old Q Values:  [  570.21685414  6751.44249183 -3151.23898092   -30.99112081]
New Q values:  [  570.21685414  7457.01772558 -3151.23898092   -30.99112081]
Reward: -1  Episode Reward:  -8
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  469.762964     -40.34168621 15856.80242948   -35.88578819]
------
Step:69, Action:East
State  261
Old Q Values:  [  469.762964     -40.34168621 15856.80242948   -35.88578819]
New Q values:  [ 469.762964    -40.34168621 7838.90599356  -35.88578819]
Reward: -1  Episode Reward:  -9
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  4989.2834059 ]
------
Step:70, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1373.32867932  7151.28704178]
New Q values:  [-2527.46239811 -8521.23367799  1373.32867932  5211.58661478]
Reward: -1  Episode Reward:  -10
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 469.762964    -40.34168621 7838.90599356  -35.88578819]
------
Step:71, Action:East
State  257
Old Q Values:  [39202.38818809  2256.66526474  9170.96941307  1875.31501677]
New Q values:  [39202.38818809  2256.66526474  5231.26374966  1875.31501677]
Reward: -1  Episode Reward:  -11
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1373.32867932  5211.58661478]
------
Step:72, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134  4989.2834059 ]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134  4346.78516043]
Reward: -1  Episode Reward:  -12
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 469.762964    -40.34168621 7838.90599356  -35.88578819]
------
Step:73, Action:East
State  261
Old Q Values:  [ 469.762964    -40.34168621 7838.90599356  -35.88578819]
New Q values:  [ 469.762964    -40.34168621 4438.99794555  -35.88578819]
Reward: -1  Episode Reward:  -13
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  4346.78516043]
------
Step:74, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134  4346.78516043]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134  3069.81344784]
Reward: -1  Episode Reward:  -14
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 469.762964    -40.34168621 4438.99794555  -35.88578819]
------
Step:75, Action:East
State  261
Old Q Values:  [ 469.762964    -40.34168621 4438.99794555  -35.88578819]
New Q values:  [ 469.762964    -40.34168621 2695.94321257  -35.88578819]
Reward: -1  Episode Reward:  -15
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  3069.81344784]
------
Step:76, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1373.32867932  5211.58661478]
New Q values:  [-2527.46239811 -8521.23367799  1373.32867932  2892.81760968]
Reward: -1  Episode Reward:  -16
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 469.762964    -40.34168621 2695.94321257  -35.88578819]
------
Step:77, Action:North
State  260
Old Q Values:  [-4677.31045826 -8695.4397473   3397.85023194 -2601.74710518]
New Q values:  [ -380.54080219 -8695.4397473   3397.85023194 -2601.74710518]
Reward: -1  Episode Reward:  -17
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:78, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 91391.64586576     0.        ]
New Q values:  [    0.          4614.46100011 97720.73250211     0.        ]
Reward: 100009  Episode Reward:  99992
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -380.54080219 -8695.4397473   3397.85023194 -2601.74710518]
------
Step:1, Action:East
State  261
Old Q Values:  [ 469.762964    -40.34168621 2695.94321257  -35.88578819]
New Q values:  [ 469.762964    -40.34168621 1993.24570445  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  3031.56139808   677.71070414]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1373.32867932  2892.81760968]
New Q values:  [-2527.46239811 -8521.23367799  4142.84148712  2892.81760968]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11960.36671798 -7525.53407498 -7525.7277781    390.84713617]
------
Step:3, Action:North
State  288
Old Q Values:  [11960.36671798 -7525.53407498 -7525.7277781    390.84713617]
New Q values:  [ 6960.50250579 -7525.53407498 -7525.7277781    390.84713617]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726  7236.51939531 -1925.08326713 -1455.65174173]
------
Step:4, Action:South
State  208
Old Q Values:  [  235.39034726  7236.51939531 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726  4982.15850986 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6960.50250579 -7525.53407498 -7525.7277781    390.84713617]
------
Step:5, Action:North
State  288
Old Q Values:  [ 6960.50250579 -7525.53407498 -7525.7277781    390.84713617]
New Q values:  [ 4278.24855527 -7525.53407498 -7525.7277781    390.84713617]
Reward: -1  Episode Reward:  25
xxxxx
x..gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726  4982.15850986 -1925.08326713 -1455.65174173]
------
Step:6, Action:South
State  208
Old Q Values:  [  235.39034726  4982.15850986 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726  3275.73797053 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  24
xxxxx
x.g.x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4278.24855527 -7525.53407498 -7525.7277781    390.84713617]
------
Step:7, Action:North
State  288
Old Q Values:  [ 4278.24855527 -7525.53407498 -7525.7277781    390.84713617]
New Q values:  [ 2693.42081327 -7525.53407498 -7525.7277781    390.84713617]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726  3275.73797053 -1925.08326713 -1455.65174173]
------
Step:8, Action:South
State  208
Old Q Values:  [  235.39034726  3275.73797053 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726  2117.72143219 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  22
xxxxx
x.g.x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2693.42081327 -7525.53407498 -7525.7277781    390.84713617]
------
Step:9, Action:North
State  288
Old Q Values:  [ 2693.42081327 -7525.53407498 -7525.7277781    390.84713617]
New Q values:  [ 1712.08475496 -7525.53407498 -7525.7277781    390.84713617]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726  2117.72143219 -1925.08326713 -1455.65174173]
------
Step:10, Action:South
State  208
Old Q Values:  [  235.39034726  2117.72143219 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726  1360.11399937 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  20
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1712.08475496 -7525.53407498 -7525.7277781    390.84713617]
------
Step:11, Action:North
State  288
Old Q Values:  [ 1712.08475496 -7525.53407498 -7525.7277781    390.84713617]
New Q values:  [ 1092.2681018  -7525.53407498 -7525.7277781    390.84713617]
Reward: -1  Episode Reward:  19
xxxxx
xg..x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726  1360.11399937 -1925.08326713 -1455.65174173]
------
Step:12, Action:South
State  208
Old Q Values:  [  235.39034726  1360.11399937 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726   871.12603028 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  18
xxxxx
x.g.x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1092.2681018  -7525.53407498 -7525.7277781    390.84713617]
------
Step:13, Action:North
State  288
Old Q Values:  [ 1092.2681018  -7525.53407498 -7525.7277781    390.84713617]
New Q values:  [  697.6450498  -7525.53407498 -7525.7277781    390.84713617]
Reward: -1  Episode Reward:  17
xxxxx
x..gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726   871.12603028 -1925.08326713 -1455.65174173]
------
Step:14, Action:South
State  208
Old Q Values:  [  235.39034726   871.12603028 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726   557.14392705 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  697.6450498  -7525.53407498 -7525.7277781    390.84713617]
------
Step:15, Action:North
State  288
Old Q Values:  [  697.6450498  -7525.53407498 -7525.7277781    390.84713617]
New Q values:  [  445.60119804 -7525.53407498 -7525.7277781    390.84713617]
Reward: -1  Episode Reward:  15
xxxxx
xg..x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726   557.14392705 -1925.08326713 -1455.65174173]
------
Step:16, Action:South
State  208
Old Q Values:  [  235.39034726   557.14392705 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726   355.93793023 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  14
xxxxx
x.g.x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  445.60119804 -7525.53407498 -7525.7277781    390.84713617]
------
Step:17, Action:North
State  288
Old Q Values:  [  445.60119804 -7525.53407498 -7525.7277781    390.84713617]
New Q values:  [  284.42185829 -7525.53407498 -7525.7277781    390.84713617]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726   355.93793023 -1925.08326713 -1455.65174173]
------
Step:18, Action:South
State  210
Old Q Values:  [5.34786112e+04 2.92399738e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [5.34786112e+04 1.28625309e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  284.42185829 -7525.53407498 -7525.7277781    390.84713617]
------
Step:19, Action:North
State  288
Old Q Values:  [  284.42185829 -7525.53407498 -7525.7277781    390.84713617]
New Q values:  [  219.95012238 -7525.53407498 -7525.7277781    390.84713617]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726   355.93793023 -1925.08326713 -1455.65174173]
------
Step:20, Action:South
State  208
Old Q Values:  [  235.39034726   355.93793023 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726   259.02931295 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  219.95012238 -7525.53407498 -7525.7277781    390.84713617]
------
Step:21, Action:West
State  288
Old Q Values:  [  219.95012238 -7525.53407498 -7525.7277781    390.84713617]
New Q values:  [  219.95012238 -7525.53407498 -7525.7277781   1076.68288882]
Reward: -1  Episode Reward:  9
xxxxx
x..gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  3069.81344784]
------
Step:22, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134  3069.81344784]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134  1825.29909047]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 469.762964    -40.34168621 1993.24570445  -35.88578819]
------
Step:23, Action:East
State  261
Old Q Values:  [ 469.762964    -40.34168621 1993.24570445  -35.88578819]
New Q values:  [ 469.762964    -40.34168621 2039.55072792  -35.88578819]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4142.84148712  2892.81760968]
------
Step:24, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4142.84148712  2892.81760968]
New Q values:  [-2527.46239811 -8521.23367799  1979.54146149  2892.81760968]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  219.95012238 -7525.53407498 -7525.7277781   1076.68288882]
------
Step:25, Action:West
State  288
Old Q Values:  [  219.95012238 -7525.53407498 -7525.7277781   1076.68288882]
New Q values:  [  219.95012238 -7525.53407498 -7525.7277781   1297.91843843]
Reward: -1  Episode Reward:  5
xxxxx
xg..x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1979.54146149  2892.81760968]
------
Step:26, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1979.54146149  2892.81760968]
New Q values:  [-2527.46239811 -8521.23367799  1979.54146149  1768.39226225]
Reward: -1  Episode Reward:  4
xxxxx
x.g.x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 469.762964    -40.34168621 2039.55072792  -35.88578819]
------
Step:27, Action:East
State  260
Old Q Values:  [ -380.54080219 -8695.4397473   3397.85023194 -2601.74710518]
New Q values:  [ -380.54080219 -8695.4397473   1952.40253122 -2601.74710518]
Reward: -1  Episode Reward:  3
xxxxx
xg..x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1979.54146149  1768.39226225]
------
Step:28, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1979.54146149  1768.39226225]
New Q values:  [-2527.46239811 -8521.23367799  1180.59211613  1768.39226225]
Reward: -1  Episode Reward:  2
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  219.95012238 -7525.53407498 -7525.7277781   1297.91843843]
------
Step:29, Action:West
State  288
Old Q Values:  [  219.95012238 -7525.53407498 -7525.7277781   1297.91843843]
New Q values:  [  219.95012238 -7525.53407498 -7525.7277781   1049.08505405]
Reward: -1  Episode Reward:  1
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1180.59211613  1768.39226225]
------
Step:30, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134  1825.29909047]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134  1341.38485456]
Reward: -1  Episode Reward:  0
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 469.762964    -40.34168621 2039.55072792  -35.88578819]
------
Step:31, Action:East
State  261
Old Q Values:  [ 469.762964    -40.34168621 2039.55072792  -35.88578819]
New Q values:  [ 469.762964    -40.34168621 1217.63574754  -35.88578819]
Reward: -1  Episode Reward:  -1
xxxxx
x...x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  1341.38485456]
------
Step:32, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134  1341.38485456]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134   901.24466609]
Reward: -1  Episode Reward:  -2
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 469.762964    -40.34168621 1217.63574754  -35.88578819]
------
Step:33, Action:East
State  261
Old Q Values:  [ 469.762964    -40.34168621 1217.63574754  -35.88578819]
New Q values:  [469.762964   -40.34168621 756.82769884 -35.88578819]
Reward: -1  Episode Reward:  -3
xxxxx
x..gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   901.24466609]
------
Step:34, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1180.59211613  1768.39226225]
New Q values:  [-2527.46239811 -8521.23367799  1180.59211613   933.80521455]
Reward: -1  Episode Reward:  -4
xxxxx
x.g.x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[469.762964   -40.34168621 756.82769884 -35.88578819]
------
Step:35, Action:East
State  261
Old Q Values:  [469.762964   -40.34168621 756.82769884 -35.88578819]
New Q values:  [469.762964   -40.34168621 572.50447936 -35.88578819]
Reward: -1  Episode Reward:  -5
xxxxx
x..gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   901.24466609]
------
Step:36, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134   901.24466609]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134   531.64921024]
Reward: -1  Episode Reward:  -6
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[469.762964   -40.34168621 572.50447936 -35.88578819]
------
Step:37, Action:East
State  261
Old Q Values:  [469.762964   -40.34168621 572.50447936 -35.88578819]
New Q values:  [469.762964   -40.34168621 582.57942658 -35.88578819]
Reward: -1  Episode Reward:  -7
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1180.59211613   933.80521455]
------
Step:38, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1180.59211613   933.80521455]
New Q values:  [-2527.46239811 -8521.23367799   786.36236267   933.80521455]
Reward: -1  Episode Reward:  -8
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  219.95012238 -7525.53407498 -7525.7277781   1049.08505405]
------
Step:39, Action:West
State  288
Old Q Values:  [  219.95012238 -7525.53407498 -7525.7277781   1049.08505405]
New Q values:  [  219.95012238 -7525.53407498 -7525.7277781    699.17558598]
Reward: -1  Episode Reward:  -9
xxxxx
x...x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   786.36236267   933.80521455]
------
Step:40, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   786.36236267   933.80521455]
New Q values:  [-2527.46239811 -8521.23367799   523.69762086   933.80521455]
Reward: -1  Episode Reward:  -10
xxxxx
x...x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  219.95012238 -7525.53407498 -7525.7277781    699.17558598]
------
Step:41, Action:North
State  288
Old Q Values:  [  219.95012238 -7525.53407498 -7525.7277781    699.17558598]
New Q values:  [  165.08884284 -7525.53407498 -7525.7277781    699.17558598]
Reward: -1  Episode Reward:  -11
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726   259.02931295 -1925.08326713 -1455.65174173]
------
Step:42, Action:South
State  208
Old Q Values:  [  235.39034726   259.02931295 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726   312.76440097 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  -12
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  165.08884284 -7525.53407498 -7525.7277781    699.17558598]
------
Step:43, Action:West
State  288
Old Q Values:  [  165.08884284 -7525.53407498 -7525.7277781    699.17558598]
New Q values:  [  165.08884284 -7525.53407498 -7525.7277781    559.21179876]
Reward: -1  Episode Reward:  -13
xxxxx
x...x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   523.69762086   933.80521455]
------
Step:44, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   523.69762086   933.80521455]
New Q values:  [-2527.46239811 -8521.23367799   376.64258797   933.80521455]
Reward: -1  Episode Reward:  -14
xxxxx
x...x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  165.08884284 -7525.53407498 -7525.7277781    559.21179876]
------
Step:45, Action:North
State  288
Old Q Values:  [  165.08884284 -7525.53407498 -7525.7277781    559.21179876]
New Q values:  [16109.01890716 -7525.53407498 -7525.7277781    559.21179876]
Reward: -1  Episode Reward:  -15
xxxxx
x...x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.34786112e+04 1.28625309e+03 2.24233123e+03 3.52184257e+00]
------
Step:46, Action:North
State  210
Old Q Values:  [5.34786112e+04 1.28625309e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [4.15161405e+04 1.28625309e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  -6
xxxxx
x..ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2091.02738669  -180.00807518 67064.32000997]
------
Step:47, Action:West
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 67064.32000997]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 65681.60617829]
Reward: 9  Episode Reward:  3
xxxxx
x.a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29501594e+05]
------
Step:48, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   153.83468034   244.70936098]
New Q values:  [ -253.44886264 -1902.20915811   153.83468034   263.18989779]
Reward: 9  Episode Reward:  12
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  206.05468698  533.02051133 -120.29354603]
------
Step:49, Action:East
State  99
Old Q Values:  [    0.         40775.35701405 66749.30296554     0.        ]
New Q values:  [    0.         40775.35701405 66000.13158228     0.        ]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  43132.21661867 131003.36798688]
------
Step:50, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043    97.90172388   173.94991251]
New Q values:  [ -281.736      -3455.78276043    97.90172388   228.8861184 ]
Reward: -1  Episode Reward:  10
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  206.05468698  533.02051133 -120.29354603]
------
Step:51, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   212.08395462  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684   152.89941737  -180.6       ]
Reward: -1  Episode Reward:  9
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043    97.90172388   228.8861184 ]
------
Step:52, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043    97.90172388   228.8861184 ]
New Q values:  [ -281.736      -3455.78276043    97.90172388   136.82427257]
Reward: -1  Episode Reward:  8
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   152.89941737  -180.6       ]
------
Step:53, Action:East
State  108
Old Q Values:  [-8463.16477134   752.96092799   518.98705174     0.        ]
New Q values:  [-8463.16477134   752.96092799   508.62931424     0.        ]
Reward: -1  Episode Reward:  7
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1005.44831181   664.58649805]
------
Step:54, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043    97.90172388   136.82427257]
New Q values:  [ -281.736      -3455.78276043    89.29384206   136.82427257]
Reward: -1  Episode Reward:  6
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  1.69110508e+02]
------
Step:55, Action:West
State  136
Old Q Values:  [-5281.21195651  2225.65397357 -6245.61866138   684.56704912]
New Q values:  [-5281.21195651  2225.65397357 -6245.61866138   574.86131319]
Reward: -1  Episode Reward:  5
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1005.44831181   664.58649805]
------
Step:56, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1005.44831181   664.58649805]
New Q values:  [-9594.56523706 -8069.05606225  1069.2755168    664.58649805]
Reward: -1  Episode Reward:  4
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  2225.65397357 -6245.61866138   574.86131319]
------
Step:57, Action:South
State  136
Old Q Values:  [-5281.21195651  2225.65397357 -6245.61866138   574.86131319]
New Q values:  [-5281.21195651   983.49090972 -6245.61866138   574.86131319]
Reward: -1  Episode Reward:  3
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726   312.76440097 -1925.08326713 -1455.65174173]
------
Step:58, Action:South
State  208
Old Q Values:  [  235.39034726   312.76440097 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726  4957.21143254 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  2
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16109.01890716 -7525.53407498 -7525.7277781    559.21179876]
------
Step:59, Action:North
State  288
Old Q Values:  [16109.01890716 -7525.53407498 -7525.7277781    559.21179876]
New Q values:  [ 7930.17099262 -7525.53407498 -7525.7277781    559.21179876]
Reward: -1  Episode Reward:  1
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  235.39034726  4957.21143254 -1925.08326713 -1455.65174173]
------
Step:60, Action:South
State  208
Old Q Values:  [  235.39034726  4957.21143254 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726  4361.3358708  -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  0
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7930.17099262 -7525.53407498 -7525.7277781    559.21179876]
------
Step:61, Action:West
State  288
Old Q Values:  [ 7930.17099262 -7525.53407498 -7525.7277781    559.21179876]
New Q values:  [ 7930.17099262 -7525.53407498 -7525.7277781    382.57948258]
Reward: -1  Episode Reward:  -1
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   531.64921024]
------
Step:62, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134   531.64921024]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134   386.83351207]
Reward: -1  Episode Reward:  -2
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[469.762964   -40.34168621 582.57942658 -35.88578819]
------
Step:63, Action:East
State  261
Old Q Values:  [469.762964   -40.34168621 582.57942658 -35.88578819]
New Q values:  [469.762964   -40.34168621 348.48182425 -35.88578819]
Reward: -1  Episode Reward:  -3
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   386.83351207]
------
Step:64, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   376.64258797   933.80521455]
New Q values:  [-2527.46239811 -8521.23367799   376.64258797   513.85097502]
Reward: -1  Episode Reward:  -4
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[469.762964   -40.34168621 348.48182425 -35.88578819]
------
Step:65, Action:North
State  257
Old Q Values:  [39202.38818809  2256.66526474  5231.26374966  1875.31501677]
New Q values:  [39198.70091873  2256.66526474  5231.26374966  1875.31501677]
Reward: -1  Episode Reward:  -5
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 26817.66925136 48692.32086537     0.        ]
------
Step:66, Action:North
State  181
Old Q Values:  [  570.21685414  7457.01772558 -3151.23898092   -30.99112081]
New Q values:  [  387.39289506  7457.01772558 -3151.23898092   -30.99112081]
Reward: -1  Episode Reward:  -6
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  206.05468698  533.02051133 -120.29354603]
------
Step:67, Action:East
State  111
Old Q Values:  [-177.44732869  206.05468698  533.02051133 -120.29354603]
New Q values:  [-177.44732869  206.05468698  291.56517387 -120.29354603]
Reward: -1  Episode Reward:  -7
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   153.83468034   263.18989779]
------
Step:68, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043    89.29384206   136.82427257]
New Q values:  [ -281.736      -3455.78276043    89.29384206   141.59926119]
Reward: -1  Episode Reward:  -8
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  206.05468698  291.56517387 -120.29354603]
------
Step:69, Action:East
State  99
Old Q Values:  [    0.         40775.35701405 66000.13158228     0.        ]
New Q values:  [    0.         40775.35701405 65700.46302897     0.        ]
Reward: -1  Episode Reward:  -9
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  43132.21661867 131003.36798688]
------
Step:70, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   153.83468034   263.18989779]
New Q values:  [ -253.44886264 -1902.20915811   153.83468034   192.14551128]
Reward: -1  Episode Reward:  -10
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  206.05468698  291.56517387 -120.29354603]
------
Step:71, Action:East
State  109
Old Q Values:  [-241.10880094 2739.48599439 -253.21568792 -180.6       ]
New Q values:  [-241.10880094 2739.48599439   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  -11
xxxxx
x agx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:72, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1069.2755168    664.58649805]
New Q values:  [-9594.56523706 -8069.05606225  1069.2755168   1087.08039754]
Reward: -1  Episode Reward:  -12
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 2739.48599439   -8.57207238 -180.6       ]
------
Step:73, Action:South
State  108
Old Q Values:  [-8463.16477134   752.96092799   508.62931424     0.        ]
New Q values:  [-8463.16477134  1609.55072817   508.62931424     0.        ]
Reward: -1  Episode Reward:  -13
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1120.77770304  2542.82527782  4363.22118993 -4966.32149798]
------
Step:74, Action:East
State  176
Old Q Values:  [76485.61294353  1327.79507613 91907.78124138     0.        ]
New Q values:  [76485.61294353  1327.79507613 99274.60769697     0.        ]
Reward: 100009  Episode Reward:  99996
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
Step:1, Action:East
State  136
Old Q Values:  [-5281.21195651   983.49090972 -6245.61866138   574.86131319]
New Q values:  [-5281.21195651   983.49090972 -2383.80019164   574.86131319]
Reward: -301  Episode Reward:  -301
xxxxx
xg ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   983.49090972 -2383.80019164   574.86131319]
------
Step:2, Action:South
State  138
Old Q Values:  [-1.39459256e+02 -3.00119869e+03 -3.22965309e-01  1.69110508e+02]
New Q values:  [-1.39459256e+02 -8.80609650e+02 -3.22965309e-01  1.69110508e+02]
Reward: 9  Episode Reward:  -292
xxxxx
x.  x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1048.23275911   633.14118268 -8489.43729461   531.09593838]
------
Step:3, Action:North
State  210
Old Q Values:  [4.15161405e+04 1.28625309e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1.66565894e+04 1.28625309e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  -293
xxxxx
x. ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -8.80609650e+02 -3.22965309e-01  1.69110508e+02]
------
Step:4, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -8.80609650e+02 -3.22965309e-01  1.69110508e+02]
New Q values:  [-1.39459256e+02 -8.80609650e+02 -3.22965309e-01  1.09523982e+02]
Reward: -1  Episode Reward:  -294
xxxxx
x.a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043    89.29384206   141.59926119]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043    89.29384206   141.59926119]
New Q values:  [ -281.736      -3455.78276043    89.29384206   107.90952969]
Reward: 9  Episode Reward:  -285
xxxxx
xa  x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3078.62531684   152.89941737  -180.6       ]
------
Step:6, Action:East
State  110
Old Q Values:  [ -239.29051573 -3078.62531684   152.89941737  -180.6       ]
New Q values:  [ -239.29051573 -3078.62531684    92.93262585  -180.6       ]
Reward: -1  Episode Reward:  -286
xxxxx
x a x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043    89.29384206   107.90952969]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043    89.29384206   107.90952969]
New Q values:  [ -281.736      -3455.78276043    89.29384206   561.98747588]
Reward: -1  Episode Reward:  -287
xxxxx
xa  x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1731.41221336  303.62422203 -252.78192178]
------
Step:8, Action:South
State  110
Old Q Values:  [ -239.29051573 -3078.62531684    92.93262585  -180.6       ]
New Q values:  [ -239.29051573 -6859.02098822    92.93262585  -180.6       ]
Reward: -9991  Episode Reward:  -10278
xxxxx
x   x
xg. x
x...x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.30453803e+03 8.35365067e+03 2.91043938e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [   62.8218634  22697.26268809   636.07481225   408.67479662]
New Q values:  [   62.8218634  22697.26268809   574.29975264   408.67479662]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1048.23275911   633.14118268 -8489.43729461   531.09593838]
------
Step:2, Action:North
State  216
Old Q Values:  [ 1048.23275911   633.14118268 -8489.43729461   531.09593838]
New Q values:  [  719.74037656   633.14118268 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  18
xxxxx
xg.ax
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   983.49090972 -2383.80019164   574.86131319]
------
Step:3, Action:South
State  138
Old Q Values:  [-1.39459256e+02 -8.80609650e+02 -3.22965309e-01  1.09523982e+02]
New Q values:  [-139.45925583 -136.92174709   -0.32296531  109.5239817 ]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  719.74037656   633.14118268 -8489.43729461   531.09593838]
------
Step:4, Action:North
State  216
Old Q Values:  [  719.74037656   633.14118268 -8489.43729461   531.09593838]
New Q values:  [  582.34342354   633.14118268 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  16
xxxxx
xg.ax
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   983.49090972 -2383.80019164   574.86131319]
------
Step:5, Action:South
State  136
Old Q Values:  [-5281.21195651   983.49090972 -2383.80019164   574.86131319]
New Q values:  [-5281.21195651   582.73871869 -2383.80019164   574.86131319]
Reward: -1  Episode Reward:  15
xxxxx
x g x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  582.34342354   633.14118268 -8489.43729461   531.09593838]
------
Step:6, Action:South
State  208
Old Q Values:  [  235.39034726  4361.3358708  -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726  4128.98564611 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  24
xxxxx
xg. x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7930.17099262 -7525.53407498 -7525.7277781    382.57948258]
------
Step:7, Action:North
State  288
Old Q Values:  [ 7930.17099262 -7525.53407498 -7525.7277781    382.57948258]
New Q values:  [ 3361.41075185 -7525.53407498 -7525.7277781    382.57948258]
Reward: -1  Episode Reward:  23
xxxxx
x g x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  582.34342354   633.14118268 -8489.43729461   531.09593838]
------
Step:8, Action:South
State  208
Old Q Values:  [  235.39034726  4128.98564611 -1925.08326713 -1455.65174173]
New Q values:  [  235.39034726  2659.417484   -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  22
xxxxx
x . x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3361.41075185 -7525.53407498 -7525.7277781    382.57948258]
------
Step:9, Action:North
State  288
Old Q Values:  [ 3361.41075185 -7525.53407498 -7525.7277781    382.57948258]
New Q values:  [ 6340.94110606 -7525.53407498 -7525.7277781    382.57948258]
Reward: -1  Episode Reward:  21
xxxxx
x . x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.66565894e+04 1.28625309e+03 2.24233123e+03 3.52184257e+00]
------
Step:10, Action:North
State  208
Old Q Values:  [  235.39034726  2659.417484   -1925.08326713 -1455.65174173]
New Q values:  [  126.41333341  2659.417484   -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  20
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 -136.92174709   -0.32296531  109.5239817 ]
------
Step:11, Action:West
State  136
Old Q Values:  [-5281.21195651   582.73871869 -2383.80019164   574.86131319]
New Q values:  [-5281.21195651   582.73871869 -2383.80019164 -5438.53135546]
Reward: -9991  Episode Reward:  -9971
xxxxx
x g x
x.  x
x.. x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 -136.92174709   -0.32296531  109.5239817 ]
------
Step:1, Action:West
State  138
Old Q Values:  [-139.45925583 -136.92174709   -0.32296531  109.5239817 ]
New Q values:  [-139.45925583 -136.92174709   -0.32296531  106.85324606]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   153.83468034   192.14551128]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   153.83468034   192.14551128]
New Q values:  [ -253.44886264 -1902.20915811   153.83468034   601.68186852]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1731.41221336  303.62422203 -252.78192178]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869  206.05468698  291.56517387 -120.29354603]
New Q values:  [-177.44732869  591.98410668  291.56517387 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  681.98074137 1680.54077297  940.95197235]
------
Step:4, Action:East
State  189
Old Q Values:  [   9.84673294  681.98074137 1680.54077297  940.95197235]
New Q values:  [   9.84673294  681.98074137 1762.391564    940.95197235]
Reward: -1  Episode Reward:  26
xxxxx
x  gx
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
------
Step:5, Action:South
State  196
Old Q Values:  [-2469.90645144  1422.7973249    174.55451539     0.        ]
New Q values:  [-2469.90645144  1483.98734938   174.55451539     0.        ]
Reward: 9  Episode Reward:  35
xxxxx
x g x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  3031.56139808   677.71070414]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   376.64258797   513.85097502]
New Q values:  [-2527.46239811 -8521.23367799  2058.33936701   513.85097502]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6340.94110606 -7525.53407498 -7525.7277781    382.57948258]
------
Step:7, Action:North
State  288
Old Q Values:  [ 6340.94110606 -7525.53407498 -7525.7277781    382.57948258]
New Q values:  [-3268.28120277 -7525.53407498 -7525.7277781    382.57948258]
Reward: -9991  Episode Reward:  -9947
xxxxx
x   x
x  gx
x.  x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   153.83468034   601.68186852]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043    89.29384206   561.98747588]
New Q values:  [ -281.736      -3455.78276043    89.29384206   749.61865436]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1731.41221336  303.62422203 -252.78192178]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  591.98410668  291.56517387 -120.29354603]
New Q values:  [-177.44732869 2479.29896035  291.56517387 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xag.x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  387.39289506  7457.01772558 -3151.23898092   -30.99112081]
------
Step:3, Action:South
State  180
Old Q Values:  [-1120.77770304  2542.82527782  4363.22118993 -4966.32149798]
New Q values:  [-1120.77770304  1608.25087049  4363.22118993 -4966.32149798]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
xg..x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -380.54080219 -8695.4397473   1952.40253122 -2601.74710518]
------
Step:4, Action:East
State  260
Old Q Values:  [ -380.54080219 -8695.4397473   1952.40253122 -2601.74710518]
New Q values:  [ -380.54080219 -8695.4397473   1695.82943191 -2601.74710518]
Reward: 9  Episode Reward:  36
xxxxx
xg .x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  3031.56139808   677.71070414]
------
Step:5, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  3031.56139808   677.71070414]
New Q values:  [   16.82637525 -5807.06396197  1326.79840401   677.71070414]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3268.28120277 -7525.53407498 -7525.7277781    382.57948258]
------
Step:6, Action:West
State  288
Old Q Values:  [-3268.28120277 -7525.53407498 -7525.7277781    382.57948258]
New Q values:  [-3268.28120277 -7525.53407498 -7525.7277781    550.47131423]
Reward: -1  Episode Reward:  34
xxxxx
xg .x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1326.79840401   677.71070414]
------
Step:7, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1326.79840401   677.71070414]
New Q values:  [   16.82637525 -5807.06396197   695.26075587   677.71070414]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3268.28120277 -7525.53407498 -7525.7277781    550.47131423]
------
Step:8, Action:West
State  288
Old Q Values:  [-3268.28120277 -7525.53407498 -7525.7277781    550.47131423]
New Q values:  [-3268.28120277 -7525.53407498 -7525.7277781    428.16675245]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   695.26075587   677.71070414]
------
Step:9, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197   695.26075587   677.71070414]
New Q values:  [   16.82637525 -5807.06396197   405.95432809   677.71070414]
Reward: -1  Episode Reward:  31
xxxxx
x  .x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3268.28120277 -7525.53407498 -7525.7277781    428.16675245]
------
Step:10, Action:West
State  288
Old Q Values:  [-3268.28120277 -7525.53407498 -7525.7277781    428.16675245]
New Q values:  [-3268.28120277 -7525.53407498 -7525.7277781    373.97991222]
Reward: -1  Episode Reward:  30
xxxxx
x  .x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   405.95432809   677.71070414]
------
Step:11, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   405.95432809   677.71070414]
New Q values:  [   16.82637525 -5807.06396197   405.95432809   411.41317086]
Reward: -1  Episode Reward:  29
xxxxx
x  .x
x ..x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[469.762964   -40.34168621 348.48182425 -35.88578819]
------
Step:12, Action:North
State  260
Old Q Values:  [ -380.54080219 -8695.4397473   1695.82943191 -2601.74710518]
New Q values:  [ 1338.16706024 -8695.4397473   1695.82943191 -2601.74710518]
Reward: -1  Episode Reward:  28
xxxxx
x  .x
xa..x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:13, Action:East
State  180
Old Q Values:  [-1120.77770304  1608.25087049  4363.22118993 -4966.32149798]
New Q values:  [-1120.77770304  1608.25087049  2195.88468079 -4966.32149798]
Reward: 9  Episode Reward:  37
xxxxx
x  .x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1483.98734938   174.55451539     0.        ]
------
Step:14, Action:South
State  196
Old Q Values:  [-2469.90645144  1483.98734938   174.55451539     0.        ]
New Q values:  [-2469.90645144   716.41889101   174.55451539     0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x  .x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   405.95432809   411.41317086]
------
Step:15, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   405.95432809   411.41317086]
New Q values:  [   16.82637525 -5807.06396197   405.95432809   304.89415754]
Reward: -1  Episode Reward:  35
xxxxx
x g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[469.762964   -40.34168621 348.48182425 -35.88578819]
------
Step:16, Action:North
State  260
Old Q Values:  [ 1338.16706024 -8695.4397473   1695.82943191 -2601.74710518]
New Q values:  [ 1193.43222833 -8695.4397473   1695.82943191 -2601.74710518]
Reward: -1  Episode Reward:  34
xxxxx
xg .x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1120.77770304  1608.25087049  2195.88468079 -4966.32149798]
------
Step:17, Action:East
State  180
Old Q Values:  [-1120.77770304  1608.25087049  2195.88468079 -4966.32149798]
New Q values:  [-1120.77770304  1608.25087049  1092.67953962 -4966.32149798]
Reward: -1  Episode Reward:  33
xxxxx
xg .x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   716.41889101   174.55451539     0.        ]
------
Step:18, Action:South
State  196
Old Q Values:  [-2469.90645144   716.41889101   174.55451539     0.        ]
New Q values:  [-2469.90645144   407.75385483   174.55451539     0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
xg .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   405.95432809   304.89415754]
------
Step:19, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197   405.95432809   304.89415754]
New Q values:  [   16.82637525 -5807.06396197   273.9757049    304.89415754]
Reward: -1  Episode Reward:  31
xxxxx
x  .x
x  .x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3268.28120277 -7525.53407498 -7525.7277781    373.97991222]
------
Step:20, Action:West
State  288
Old Q Values:  [-3268.28120277 -7525.53407498 -7525.7277781    373.97991222]
New Q values:  [-3268.28120277 -7525.53407498 -7525.7277781    240.46021215]
Reward: -1  Episode Reward:  30
xxxxx
x  .x
xg .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   273.9757049    304.89415754]
------
Step:21, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   273.9757049    304.89415754]
New Q values:  [   16.82637525 -5807.06396197   273.9757049    630.10649259]
Reward: -1  Episode Reward:  29
xxxxx
xg .x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1193.43222833 -8695.4397473   1695.82943191 -2601.74710518]
------
Step:22, Action:East
State  261
Old Q Values:  [469.762964   -40.34168621 348.48182425 -35.88578819]
New Q values:  [469.762964   -40.34168621 327.82467748 -35.88578819]
Reward: -1  Episode Reward:  28
xxxxx
x g.x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   273.9757049    630.10649259]
------
Step:23, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   273.9757049    630.10649259]
New Q values:  [   16.82637525 -5807.06396197   273.9757049    760.19142661]
Reward: -1  Episode Reward:  27
xxxxx
xg .x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1193.43222833 -8695.4397473   1695.82943191 -2601.74710518]
------
Step:24, Action:East
State  260
Old Q Values:  [ 1193.43222833 -8695.4397473   1695.82943191 -2601.74710518]
New Q values:  [ 1193.43222833 -8695.4397473    905.78920075 -2601.74710518]
Reward: -1  Episode Reward:  26
xxxxx
x  .x
xg .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   273.9757049    760.19142661]
------
Step:25, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   273.9757049    760.19142661]
New Q values:  [   16.82637525 -5807.06396197   273.9757049    444.40545984]
Reward: -1  Episode Reward:  25
xxxxx
x  .x
x g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[469.762964   -40.34168621 327.82467748 -35.88578819]
------
Step:26, Action:North
State  260
Old Q Values:  [ 1193.43222833 -8695.4397473    905.78920075 -2601.74710518]
New Q values:  [-5040.75184752 -8695.4397473    905.78920075 -2601.74710518]
Reward: -10001  Episode Reward:  -9976
xxxxx
x  .x
xg .x
x   x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  126.41333341  2659.417484   -1925.08326713 -1455.65174173]
------
Step:1, Action:South
State  208
Old Q Values:  [  126.41333341  2659.417484   -1925.08326713 -1455.65174173]
New Q values:  [  126.41333341  1141.30505725 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3268.28120277 -7525.53407498 -7525.7277781    240.46021215]
------
Step:2, Action:West
State  288
Old Q Values:  [-3268.28120277 -7525.53407498 -7525.7277781    240.46021215]
New Q values:  [-3268.28120277 -7525.53407498 -7525.7277781    217.63413848]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   386.83351207]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2058.33936701   513.85097502]
New Q values:  [-2527.46239811 -8521.23367799  2058.33936701 11970.55066563]
Reward: 9  Episode Reward:  27
xxxxx
x.g.x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39198.70091873  2256.66526474  5231.26374966  1875.31501677]
------
Step:4, Action:North
State  257
Old Q Values:  [39198.70091873  2256.66526474  5231.26374966  1875.31501677]
New Q values:  [39203.22601099  2256.66526474  5231.26374966  1875.31501677]
Reward: 9  Episode Reward:  36
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 26817.66925136 48692.32086537     0.        ]
------
Step:5, Action:North
State  181
Old Q Values:  [  387.39289506  7457.01772558 -3151.23898092   -30.99112081]
New Q values:  [  174.44335828  7457.01772558 -3151.23898092   -30.99112081]
Reward: 9  Episode Reward:  45
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[ 0.         46.95400085  0.          0.        ]
------
Step:6, Action:South
State  100
Old Q Values:  [ 0.00000000e+00  1.55726009e+03 -6.00000000e-01  0.00000000e+00]
New Q values:  [ 0.0000000e+00  1.1047793e+03 -6.0000000e-01  0.0000000e+00]
Reward: -1  Episode Reward:  44
xxxxx
xg..x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1120.77770304  1608.25087049  1092.67953962 -4966.32149798]
------
Step:7, Action:South
State  180
Old Q Values:  [-1120.77770304  1608.25087049  1092.67953962 -4966.32149798]
New Q values:  [-1120.77770304   914.43710842  1092.67953962 -4966.32149798]
Reward: -1  Episode Reward:  43
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5040.75184752 -8695.4397473    905.78920075 -2601.74710518]
------
Step:8, Action:East
State  260
Old Q Values:  [-5040.75184752 -8695.4397473    905.78920075 -2601.74710518]
New Q values:  [-5040.75184752 -8695.4397473   3952.88087999 -2601.74710518]
Reward: -1  Episode Reward:  42
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2058.33936701 11970.55066563]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2058.33936701 11970.55066563]
New Q values:  [-2527.46239811 -8521.23367799   888.02598835 11970.55066563]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3268.28120277 -7525.53407498 -7525.7277781    217.63413848]
------
Step:10, Action:North
State  288
Old Q Values:  [-3268.28120277 -7525.53407498 -7525.7277781    217.63413848]
New Q values:  [ -965.52096394 -7525.53407498 -7525.7277781    217.63413848]
Reward: -1  Episode Reward:  40
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  126.41333341  1141.30505725 -1925.08326713 -1455.65174173]
------
Step:11, Action:South
State  208
Old Q Values:  [  126.41333341  1141.30505725 -1925.08326713 -1455.65174173]
New Q values:  [  126.41333341   521.21226444 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -965.52096394 -7525.53407498 -7525.7277781    217.63413848]
------
Step:12, Action:West
State  288
Old Q Values:  [ -965.52096394 -7525.53407498 -7525.7277781    217.63413848]
New Q values:  [ -965.52096394 -7525.53407498 -7525.7277781   3677.61885508]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   888.02598835 11970.55066563]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   888.02598835 11970.55066563]
New Q values:  [-2527.46239811 -8521.23367799   888.02598835  5973.48453025]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5040.75184752 -8695.4397473   3952.88087999 -2601.74710518]
------
Step:14, Action:East
State  260
Old Q Values:  [-5040.75184752 -8695.4397473   3952.88087999 -2601.74710518]
New Q values:  [-5040.75184752 -8695.4397473   3372.59771107 -2601.74710518]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   888.02598835  5973.48453025]
------
Step:15, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   888.02598835  5973.48453025]
New Q values:  [-2527.46239811 -8521.23367799  1457.89605186  5973.48453025]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -965.52096394 -7525.53407498 -7525.7277781   3677.61885508]
------
Step:16, Action:North
State  288
Old Q Values:  [ -965.52096394 -7525.53407498 -7525.7277781   3677.61885508]
New Q values:  [ -230.44470624 -7525.53407498 -7525.7277781   3677.61885508]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  126.41333341   521.21226444 -1925.08326713 -1455.65174173]
------
Step:17, Action:South
State  208
Old Q Values:  [  126.41333341   521.21226444 -1925.08326713 -1455.65174173]
New Q values:  [  126.41333341  1311.1705623  -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -230.44470624 -7525.53407498 -7525.7277781   3677.61885508]
------
Step:18, Action:West
State  288
Old Q Values:  [ -230.44470624 -7525.53407498 -7525.7277781   3677.61885508]
New Q values:  [ -230.44470624 -7525.53407498 -7525.7277781   3262.49290111]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1457.89605186  5973.48453025]
------
Step:19, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1457.89605186  5973.48453025]
New Q values:  [-2527.46239811 -8521.23367799  1457.89605186  3400.57312542]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5040.75184752 -8695.4397473   3372.59771107 -2601.74710518]
------
Step:20, Action:East
State  260
Old Q Values:  [-5040.75184752 -8695.4397473   3372.59771107 -2601.74710518]
New Q values:  [-5040.75184752 -8695.4397473   2368.61102205 -2601.74710518]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1457.89605186  3400.57312542]
------
Step:21, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1457.89605186  3400.57312542]
New Q values:  [-2527.46239811 -8521.23367799  1561.30629108  3400.57312542]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -230.44470624 -7525.53407498 -7525.7277781   3262.49290111]
------
Step:22, Action:West
State  288
Old Q Values:  [ -230.44470624 -7525.53407498 -7525.7277781   3262.49290111]
New Q values:  [ -230.44470624 -7525.53407498 -7525.7277781   2324.56909807]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1561.30629108  3400.57312542]
------
Step:23, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1561.30629108  3400.57312542]
New Q values:  [-2527.46239811 -8521.23367799  1561.30629108  1500.55813937]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[469.762964   -40.34168621 327.82467748 -35.88578819]
------
Step:24, Action:North
State  261
Old Q Values:  [469.762964   -40.34168621 327.82467748 -35.88578819]
New Q values:  [3191.07998337  -40.34168621  327.82467748  -35.88578819]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  471.26619624   422.39664393 10012.58265925     0.        ]
------
Step:25, Action:East
State  181
Old Q Values:  [  174.44335828  7457.01772558 -3151.23898092   -30.99112081]
New Q values:  [  174.44335828  7457.01772558 16866.14770322   -30.99112081]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 60424.14431863  -113.03832106  1099.96026581]
------
Step:26, Action:South
State  193
Old Q Values:  [-5922.26708831 60424.14431863  -113.03832106  1099.96026581]
New Q values:  [-5922.26708831 24285.10778108  -113.03832106  1099.96026581]
Reward: -1  Episode Reward:  24
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134   386.83351207]
------
Step:27, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134   386.83351207]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134  1111.45739984]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3191.07998337  -40.34168621  327.82467748  -35.88578819]
------
Step:28, Action:North
State  261
Old Q Values:  [3191.07998337  -40.34168621  327.82467748  -35.88578819]
New Q values:  [6335.67630432  -40.34168621  327.82467748  -35.88578819]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  174.44335828  7457.01772558 16866.14770322   -30.99112081]
------
Step:29, Action:South
State  181
Old Q Values:  [  174.44335828  7457.01772558 16866.14770322   -30.99112081]
New Q values:  [  174.44335828  4882.90998152 16866.14770322   -30.99112081]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6335.67630432  -40.34168621  327.82467748  -35.88578819]
------
Step:30, Action:North
State  261
Old Q Values:  [6335.67630432  -40.34168621  327.82467748  -35.88578819]
New Q values:  [7593.51483269  -40.34168621  327.82467748  -35.88578819]
Reward: -1  Episode Reward:  20
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  174.44335828  4882.90998152 16866.14770322   -30.99112081]
------
Step:31, Action:East
State  181
Old Q Values:  [  174.44335828  4882.90998152 16866.14770322   -30.99112081]
New Q values:  [ 174.44335828 4882.90998152 6868.18523774  -30.99112081]
Reward: -1  Episode Reward:  19
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   407.75385483   174.55451539     0.        ]
------
Step:32, Action:South
State  193
Old Q Values:  [-5922.26708831 24285.10778108  -113.03832106  1099.96026581]
New Q values:  [-5922.26708831 10046.88033238  -113.03832106  1099.96026581]
Reward: -1  Episode Reward:  18
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  1111.45739984]
------
Step:33, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   273.9757049    444.40545984]
New Q values:  [   16.82637525 -5807.06396197   273.9757049   2455.21663375]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[7593.51483269  -40.34168621  327.82467748  -35.88578819]
------
Step:34, Action:North
State  261
Old Q Values:  [7593.51483269  -40.34168621  327.82467748  -35.88578819]
New Q values:  [5097.2615044   -40.34168621  327.82467748  -35.88578819]
Reward: -1  Episode Reward:  16
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 174.44335828 4882.90998152 6868.18523774  -30.99112081]
------
Step:35, Action:East
State  181
Old Q Values:  [ 174.44335828 4882.90998152 6868.18523774  -30.99112081]
New Q values:  [ 174.44335828 4882.90998152 2869.00025154  -30.99112081]
Reward: -1  Episode Reward:  15
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   407.75385483   174.55451539     0.        ]
------
Step:36, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.30453803e+03 8.35365067e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.78960710e+03 8.35365067e+03 2.91043938e+03]
Reward: -1  Episode Reward:  14
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1561.30629108  1500.55813937]
------
Step:37, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197   273.9757049   2455.21663375]
New Q values:  [   16.82637525 -5807.06396197   806.36101138  2455.21663375]
Reward: -1  Episode Reward:  13
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -230.44470624 -7525.53407498 -7525.7277781   2324.56909807]
------
Step:38, Action:West
State  288
Old Q Values:  [ -230.44470624 -7525.53407498 -7525.7277781   2324.56909807]
New Q values:  [ -230.44470624 -7525.53407498 -7525.7277781   1397.61952655]
Reward: -1  Episode Reward:  12
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1561.30629108  1500.55813937]
------
Step:39, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1561.30629108  1500.55813937]
New Q values:  [-2527.46239811 -8521.23367799  1043.2083744   1500.55813937]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -230.44470624 -7525.53407498 -7525.7277781   1397.61952655]
------
Step:40, Action:West
State  288
Old Q Values:  [ -230.44470624 -7525.53407498 -7525.7277781   1397.61952655]
New Q values:  [ -230.44470624 -7525.53407498 -7525.7277781   1008.61525243]
Reward: -1  Episode Reward:  10
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1043.2083744   1500.55813937]
------
Step:41, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1043.2083744   1500.55813937]
New Q values:  [-2527.46239811 -8521.23367799   719.26792549  1500.55813937]
Reward: -1  Episode Reward:  9
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -230.44470624 -7525.53407498 -7525.7277781   1008.61525243]
------
Step:42, Action:West
State  288
Old Q Values:  [ -230.44470624 -7525.53407498 -7525.7277781   1008.61525243]
New Q values:  [ -230.44470624 -7525.53407498 -7525.7277781    853.01354278]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   719.26792549  1500.55813937]
------
Step:43, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   806.36101138  2455.21663375]
New Q values:  [   16.82637525 -5807.06396197   806.36101138  2510.66510482]
Reward: -1  Episode Reward:  7
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5097.2615044   -40.34168621  327.82467748  -35.88578819]
------
Step:44, Action:North
State  261
Old Q Values:  [5097.2615044   -40.34168621  327.82467748  -35.88578819]
New Q values:  [3503.17759622  -40.34168621  327.82467748  -35.88578819]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 174.44335828 4882.90998152 2869.00025154  -30.99112081]
------
Step:45, Action:South
State  180
Old Q Values:  [-1120.77770304   914.43710842  1092.67953962 -4966.32149798]
New Q values:  [-1120.77770304  1075.75814998  1092.67953962 -4966.32149798]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5040.75184752 -8695.4397473   2368.61102205 -2601.74710518]
------
Step:46, Action:East
State  260
Old Q Values:  [-5040.75184752 -8695.4397473   2368.61102205 -2601.74710518]
New Q values:  [-5040.75184752 -8695.4397473   1397.01185063 -2601.74710518]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   719.26792549  1500.55813937]
------
Step:47, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   719.26792549  1500.55813937]
New Q values:  [-2527.46239811 -8521.23367799   543.01123303  1500.55813937]
Reward: -1  Episode Reward:  3
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -230.44470624 -7525.53407498 -7525.7277781    853.01354278]
------
Step:48, Action:West
State  288
Old Q Values:  [ -230.44470624 -7525.53407498 -7525.7277781    853.01354278]
New Q values:  [ -230.44470624 -7525.53407498 -7525.7277781    790.77285892]
Reward: -1  Episode Reward:  2
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   543.01123303  1500.55813937]
------
Step:49, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   543.01123303  1500.55813937]
New Q values:  [-2527.46239811 -8521.23367799   543.01123303  1018.72681094]
Reward: -1  Episode Reward:  1
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5040.75184752 -8695.4397473   1397.01185063 -2601.74710518]
------
Step:50, Action:East
State  260
Old Q Values:  [-5040.75184752 -8695.4397473   1397.01185063 -2601.74710518]
New Q values:  [-5040.75184752 -8695.4397473    863.82278353 -2601.74710518]
Reward: -1  Episode Reward:  0
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   543.01123303  1018.72681094]
------
Step:51, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   543.01123303  1018.72681094]
New Q values:  [-2527.46239811 -8521.23367799   453.83635089  1018.72681094]
Reward: -1  Episode Reward:  -1
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -230.44470624 -7525.53407498 -7525.7277781    790.77285892]
------
Step:52, Action:North
State  288
Old Q Values:  [ -230.44470624 -7525.53407498 -7525.7277781    790.77285892]
New Q values:  [  300.57328619 -7525.53407498 -7525.7277781    790.77285892]
Reward: -1  Episode Reward:  -2
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  126.41333341  1311.1705623  -1925.08326713 -1455.65174173]
------
Step:53, Action:South
State  210
Old Q Values:  [1.66565894e+04 1.28625309e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1.66565894e+04 7.51133094e+02 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  -3
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  300.57328619 -7525.53407498 -7525.7277781    790.77285892]
------
Step:54, Action:North
State  288
Old Q Values:  [  300.57328619 -7525.53407498 -7525.7277781    790.77285892]
New Q values:  [  512.98048317 -7525.53407498 -7525.7277781    790.77285892]
Reward: -1  Episode Reward:  -4
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  126.41333341  1311.1705623  -1925.08326713 -1455.65174173]
------
Step:55, Action:South
State  210
Old Q Values:  [1.66565894e+04 7.51133094e+02 2.24233123e+03 3.52184257e+00]
New Q values:  [1.66565894e+04 5.37085095e+02 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  -5
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  512.98048317 -7525.53407498 -7525.7277781    790.77285892]
------
Step:56, Action:North
State  288
Old Q Values:  [  512.98048317 -7525.53407498 -7525.7277781    790.77285892]
New Q values:  [  597.94336196 -7525.53407498 -7525.7277781    790.77285892]
Reward: -1  Episode Reward:  -6
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  126.41333341  1311.1705623  -1925.08326713 -1455.65174173]
------
Step:57, Action:South
State  210
Old Q Values:  [1.66565894e+04 5.37085095e+02 2.24233123e+03 3.52184257e+00]
New Q values:  [1.66565894e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  -7
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  597.94336196 -7525.53407498 -7525.7277781    790.77285892]
------
Step:58, Action:North
State  288
Old Q Values:  [  597.94336196 -7525.53407498 -7525.7277781    790.77285892]
New Q values:  [ 5235.5541501  -7525.53407498 -7525.7277781    790.77285892]
Reward: -1  Episode Reward:  -8
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.66565894e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
------
Step:59, Action:North
State  210
Old Q Values:  [1.66565894e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
New Q values:  [2.63725176e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  1
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2091.02738669  -180.00807518 65681.60617829]
------
Step:60, Action:West
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 65681.60617829]
New Q values:  [ 36041.91667283   2091.02738669   -180.00807518 125579.05286738]
Reward: 100009  Episode Reward:  100010
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   453.83635089  1018.72681094]
------
Step:1, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   806.36101138  2510.66510482]
New Q values:  [   16.82637525 -5807.06396197   806.36101138  2060.61932079]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3503.17759622  -40.34168621  327.82467748  -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [3503.17759622  -40.34168621  327.82467748  -35.88578819]
New Q values:  [2871.54403294  -40.34168621  327.82467748  -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 174.44335828 4882.90998152 2869.00025154  -30.99112081]
------
Step:3, Action:South
State  181
Old Q Values:  [ 174.44335828 4882.90998152 2869.00025154  -30.99112081]
New Q values:  [ 174.44335828 2814.02720249 2869.00025154  -30.99112081]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2871.54403294  -40.34168621  327.82467748  -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [2871.54403294  -40.34168621  327.82467748  -35.88578819]
New Q values:  [4151.79241095  -40.34168621  327.82467748  -35.88578819]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  471.26619624   422.39664393 10012.58265925     0.        ]
------
Step:5, Action:East
State  181
Old Q Values:  [ 174.44335828 2814.02720249 2869.00025154  -30.99112081]
New Q values:  [  174.44335828  2814.02720249 -4724.67374293   -30.99112081]
Reward: -9991  Episode Reward:  -9975
xxxxx
x. .x
x g.x
x  .x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   582.73871869 -2383.80019164 -5438.53135546]
------
Step:1, Action:South
State  136
Old Q Values:  [-5281.21195651   582.73871869 -2383.80019164 -5438.53135546]
New Q values:  [-5281.21195651   428.43784228 -2383.80019164 -5438.53135546]
Reward: 9  Episode Reward:  9
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  582.34342354   633.14118268 -8489.43729461   531.09593838]
------
Step:2, Action:South
State  208
Old Q Values:  [  126.41333341  1311.1705623  -1925.08326713 -1455.65174173]
New Q values:  [  126.41333341  2100.53446995 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5235.5541501  -7525.53407498 -7525.7277781    790.77285892]
------
Step:3, Action:North
State  288
Old Q Values:  [ 5235.5541501  -7525.53407498 -7525.7277781    790.77285892]
New Q values:  [10005.37693821 -7525.53407498 -7525.7277781    790.77285892]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.63725176e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
------
Step:4, Action:North
State  210
Old Q Values:  [2.63725176e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
New Q values:  [1.05804630e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 -136.92174709   -0.32296531  106.85324606]
------
Step:5, Action:West
State  138
Old Q Values:  [-139.45925583 -136.92174709   -0.32296531  106.85324606]
New Q values:  [-139.45925583 -136.92174709   -0.32296531  267.02689473]
Reward: -1  Episode Reward:  15
xxxxx
x.a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043    89.29384206   749.61865436]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043    89.29384206   749.61865436]
New Q values:  [ -281.736      -3455.78276043    89.29384206   824.67112575]
Reward: 9  Episode Reward:  24
xxxxx
xa  x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1731.41221336  303.62422203 -252.78192178]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869 2479.29896035  291.56517387 -120.29354603]
New Q values:  [-177.44732869 4000.89438191  291.56517387 -120.29354603]
Reward: 9  Episode Reward:  33
xxxxx
x   x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  471.26619624   422.39664393 10012.58265925     0.        ]
------
Step:8, Action:East
State  185
Old Q Values:  [   9.40190913    0.         2692.07535342 -178.98      ]
New Q values:  [   9.40190913    0.         2173.00539618 -178.98      ]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
------
Step:9, Action:South
State  203
Old Q Values:  [   3.60604218  917.47486444 1822.13470504    0.        ]
New Q values:  [   3.60604218  705.82716573 1822.13470504    0.        ]
Reward: 9  Episode Reward:  51
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  1111.45739984]
------
Step:10, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   453.83635089  1018.72681094]
New Q values:  [-2527.46239811 -8521.23367799   453.83635089 72173.85852767]
Reward: 100009  Episode Reward:  100060
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  174.44335828  2814.02720249 -4724.67374293   -30.99112081]
------
Step:1, Action:South
State  180
Old Q Values:  [-1120.77770304  1075.75814998  1092.67953962 -4966.32149798]
New Q values:  [-1120.77770304   694.85009505  1092.67953962 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5040.75184752 -8695.4397473    863.82278353 -2601.74710518]
------
Step:2, Action:East
State  260
Old Q Values:  [-5040.75184752 -8695.4397473    863.82278353 -2601.74710518]
New Q values:  [-5040.75184752 -8695.4397473    969.11490965 -2601.74710518]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   806.36101138  2060.61932079]
------
Step:3, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   806.36101138  2060.61932079]
New Q values:  [   16.82637525 -5807.06396197   806.36101138  2069.1854516 ]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4151.79241095  -40.34168621  327.82467748  -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [4151.79241095  -40.34168621  327.82467748  -35.88578819]
New Q values:  [2504.32512513  -40.34168621  327.82467748  -35.88578819]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  174.44335828  2814.02720249 -4724.67374293   -30.99112081]
------
Step:5, Action:South
State  181
Old Q Values:  [  174.44335828  2814.02720249 -4724.67374293   -30.99112081]
New Q values:  [  174.44335828  1876.30841854 -4724.67374293   -30.99112081]
Reward: -1  Episode Reward:  15
xxxxx
x. .x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2504.32512513  -40.34168621  327.82467748  -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [2504.32512513  -40.34168621  327.82467748  -35.88578819]
New Q values:  [1564.02257561  -40.34168621  327.82467748  -35.88578819]
Reward: -1  Episode Reward:  14
xxxxx
x.g.x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  174.44335828  1876.30841854 -4724.67374293   -30.99112081]
------
Step:7, Action:South
State  181
Old Q Values:  [  174.44335828  1876.30841854 -4724.67374293   -30.99112081]
New Q values:  [  174.44335828  1219.1301401  -4724.67374293   -30.99112081]
Reward: -1  Episode Reward:  13
xxxxx
x. .x
x g.x
xa .x
xxxxx
Step:8, Action:South
State  261
Old Q Values:  [1564.02257561  -40.34168621  327.82467748  -35.88578819]
New Q values:  [1564.02257561  272.4700982   327.82467748  -35.88578819]
Reward: -301  Episode Reward:  -288
xxxxx
x. .x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1564.02257561  272.4700982   327.82467748  -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [1564.02257561  272.4700982   327.82467748  -35.88578819]
New Q values:  [3628.78382802  272.4700982   327.82467748  -35.88578819]
Reward: -1  Episode Reward:  -289
xxxxx
x. .x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  471.26619624   422.39664393 10012.58265925     0.        ]
------
Step:10, Action:East
State  183
Old Q Values:  [  471.26619624   422.39664393 10012.58265925     0.        ]
New Q values:  [ 471.26619624  422.39664393 7795.08497259    0.        ]
Reward: 9  Episode Reward:  -280
xxxxx
x. .x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -6.80462050e+03  1.26155064e+04  0.00000000e+00]
------
Step:11, Action:East
State  195
Old Q Values:  [   38.85388605  2677.89620798 38776.43839492  1101.59744825]
New Q values:  [   38.85388605  2677.89620798 18690.11426138  1101.59744825]
Reward: 9  Episode Reward:  -271
xxxxx
x. .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.05804630e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
------
Step:12, Action:North
State  210
Old Q Values:  [1.05804630e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
New Q values:  [4.19113011e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  -262
xxxxx
x. ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   2091.02738669   -180.00807518 125579.05286738]
------
Step:13, Action:West
State  130
Old Q Values:  [ 36041.91667283   2091.02738669   -180.00807518 125579.05286738]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 89532.03154302]
Reward: -1  Episode Reward:  -263
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  43132.21661867 131003.36798688]
------
Step:14, Action:West
State  124
Old Q Values:  [   0.         1166.51141701 1684.46356666  341.12160345]
New Q values:  [   0.         1166.51141701 1684.46356666  963.6944397 ]
Reward: 9  Episode Reward:  -254
xxxxx
xag x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 2739.48599439   -8.57207238 -180.6       ]
------
Step:15, Action:South
State  109
Old Q Values:  [-241.10880094 2739.48599439   -8.57207238 -180.6       ]
New Q values:  [-241.10880094 1623.91186695   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  -255
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  681.98074137 1762.391564    940.95197235]
------
Step:16, Action:East
State  189
Old Q Values:  [   9.84673294  681.98074137 1762.391564    940.95197235]
New Q values:  [  9.84673294 681.98074137 763.87267672 940.95197235]
Reward: -1  Episode Reward:  -256
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         121.94855967   0.         198.38683706]
------
Step:17, Action:West
State  204
Old Q Values:  [   0.         1115.71533251  815.31186329  399.75525955]
New Q values:  [   0.         1115.71533251  815.31186329  441.58769553]
Reward: -1  Episode Reward:  -257
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  9.84673294 681.98074137 763.87267672 940.95197235]
------
Step:18, Action:West
State  189
Old Q Values:  [  9.84673294 681.98074137 763.87267672 940.95197235]
New Q values:  [  9.84673294 681.98074137 763.87267672 478.06638065]
Reward: -301  Episode Reward:  -558
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  9.84673294 681.98074137 763.87267672 478.06638065]
------
Step:19, Action:East
State  179
Old Q Values:  [    0.          4614.46100011 97720.73250211     0.        ]
New Q values:  [    0.          4614.46100011 44694.72727926     0.        ]
Reward: -1  Episode Reward:  -559
xxxxx
x   x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  2677.89620798 18690.11426138  1101.59744825]
------
Step:20, Action:East
State  206
Old Q Values:  [  0.         135.48456638 515.78980165   0.        ]
New Q values:  [   0.          135.48456638 1525.83639927    0.        ]
Reward: -1  Episode Reward:  -560
xxxxx
x   x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[4400.40159537  848.96225083    0.          429.03841886]
------
Step:21, Action:North
State  218
Old Q Values:  [4400.40159537  848.96225083    0.          429.03841886]
New Q values:  [1839.66870657  848.96225083    0.          429.03841886]
Reward: -1  Episode Reward:  -561
xxxxx
x  ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 -136.92174709   -0.32296531  267.02689473]
------
Step:22, Action:West
State  138
Old Q Values:  [-139.45925583 -136.92174709   -0.32296531  267.02689473]
New Q values:  [-139.45925583 -136.92174709   -0.32296531  218.70139771]
Reward: -1  Episode Reward:  -562
xxxxx
x a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         331.64678262 374.96879939 284.9215583 ]
------
Step:23, Action:East
State  126
Old Q Values:  [  0.         331.64678262 374.96879939 284.9215583 ]
New Q values:  [  0.         331.64678262 214.99793907 284.9215583 ]
Reward: -1  Episode Reward:  -563
xxxxx
x  ax
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 -136.92174709   -0.32296531  218.70139771]
------
Step:24, Action:West
State  136
Old Q Values:  [-5281.21195651   428.43784228 -2383.80019164 -5438.53135546]
New Q values:  [-5281.21195651   428.43784228 -2383.80019164 -7670.67347219]
Reward: -10001  Episode Reward:  -10564
xxxxx
x g x
x   x
x  .x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  3.86224719e+03  1.20371620e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.78960710e+03 8.35365067e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.78960710e+03 3.97702061e+03 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  126.41333341  2100.53446995 -1925.08326713 -1455.65174173]
------
Step:2, Action:South
State  208
Old Q Values:  [  126.41333341  2100.53446995 -1925.08326713 -1455.65174173]
New Q values:  [  126.41333341  3847.22686944 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10005.37693821 -7525.53407498 -7525.7277781    790.77285892]
------
Step:3, Action:West
State  288
Old Q Values:  [10005.37693821 -7525.53407498 -7525.7277781    790.77285892]
New Q values:  [10005.37693821 -7525.53407498 -7525.7277781    649.14636352]
Reward: -1  Episode Reward:  17
xxxxx
x..gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  1111.45739984]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   453.83635089 72173.85852767]
New Q values:  [-2527.46239811 -8521.23367799   453.83635089 40635.91121437]
Reward: 9  Episode Reward:  26
xxxxx
x.g.x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39203.22601099  2256.66526474  5231.26374966  1875.31501677]
------
Step:5, Action:North
State  257
Old Q Values:  [39203.22601099  2256.66526474  5231.26374966  1875.31501677]
New Q values:  [39205.03604789  2256.66526474  5231.26374966  1875.31501677]
Reward: 9  Episode Reward:  35
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 26817.66925136 48692.32086537     0.        ]
------
Step:6, Action:North
State  181
Old Q Values:  [  174.44335828  1219.1301401  -4724.67374293   -30.99112081]
New Q values:  [   89.26354357  1219.1301401  -4724.67374293   -30.99112081]
Reward: 9  Episode Reward:  44
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[ 0.         46.95400085  0.          0.        ]
------
Step:7, Action:South
State  101
Old Q Values:  [ 0.         46.95400085  0.          0.        ]
New Q values:  [  0.         383.92064237   0.           0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[   89.26354357  1219.1301401  -4724.67374293   -30.99112081]
------
Step:8, Action:South
State  181
Old Q Values:  [   89.26354357  1219.1301401  -4724.67374293   -30.99112081]
New Q values:  [   89.26354357  1575.68720445 -4724.67374293   -30.99112081]
Reward: -1  Episode Reward:  42
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3628.78382802  272.4700982   327.82467748  -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [3628.78382802  272.4700982   327.82467748  -35.88578819]
New Q values:  [3789.43902299  272.4700982   327.82467748  -35.88578819]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  422.39664393 7795.08497259    0.        ]
------
Step:10, Action:East
State  181
Old Q Values:  [   89.26354357  1575.68720445 -4724.67374293   -30.99112081]
New Q values:  [  89.26354357 1575.68720445 1123.59460254  -30.99112081]
Reward: -1  Episode Reward:  40
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 10046.88033238  -113.03832106  1099.96026581]
------
Step:11, Action:South
State  193
Old Q Values:  [-5922.26708831 10046.88033238  -113.03832106  1099.96026581]
New Q values:  [-5922.26708831  4351.58935291  -113.03832106  1099.96026581]
Reward: -1  Episode Reward:  39
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  1111.45739984]
------
Step:12, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134  1111.45739984]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134  1580.81466683]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3789.43902299  272.4700982   327.82467748  -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [3789.43902299  272.4700982   327.82467748  -35.88578819]
New Q values:  [1987.88177053  272.4700982   327.82467748  -35.88578819]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357 1575.68720445 1123.59460254  -30.99112081]
------
Step:14, Action:South
State  183
Old Q Values:  [ 471.26619624  422.39664393 7795.08497259    0.        ]
New Q values:  [ 471.26619624  764.72318873 7795.08497259    0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1987.88177053  272.4700982   327.82467748  -35.88578819]
------
Step:15, Action:North
State  260
Old Q Values:  [-5040.75184752 -8695.4397473    969.11490965 -2601.74710518]
New Q values:  [ -525.91735789 -8695.4397473    969.11490965 -2601.74710518]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:16, Action:East
State  183
Old Q Values:  [ 471.26619624  764.72318873 7795.08497259    0.        ]
New Q values:  [ 471.26619624  764.72318873 4276.10814485    0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  3.86224719e+03  1.20371620e+03]
------
Step:17, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  3.86224719e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  1.41176892e+04  1.20371620e+03]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.19113011e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
------
Step:18, Action:North
State  210
Old Q Values:  [4.19113011e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
New Q values:  [4.36295299e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  42
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2091.02738669  -180.00807518 89532.03154302]
------
Step:19, Action:West
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 89532.03154302]
New Q values:  [ 36041.91667283   2091.02738669   -180.00807518 135119.22301327]
Reward: 100009  Episode Reward:  100051
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  2677.89620798 18690.11426138  1101.59744825]
------
Step:1, Action:East
State  193
Old Q Values:  [-5922.26708831  4351.58935291  -113.03832106  1099.96026581]
New Q values:  [-5922.26708831  4351.58935291 -4885.64726759  1099.96026581]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. gx
x.. x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -6859.02098822    92.93262585  -180.6       ]
------
Step:1, Action:East
State  108
Old Q Values:  [-8463.16477134  1609.55072817   508.62931424     0.        ]
New Q values:  [-8463.16477134  1609.55072817   534.97584496     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xga.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1069.2755168   1087.08039754]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043    89.29384206   824.67112575]
New Q values:  [ -281.736      -3455.78276043   106.72795614   824.67112575]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 -136.92174709   -0.32296531  218.70139771]
------
Step:3, Action:West
State  138
Old Q Values:  [-139.45925583 -136.92174709   -0.32296531  218.70139771]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  3.34281897e+02]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614   824.67112575]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   106.72795614   824.67112575]
New Q values:  [ -281.736      -3455.78276043   106.72795614  1529.53676487]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4000.89438191  291.56517387 -120.29354603]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 4000.89438191  291.56517387 -120.29354603]
New Q values:  [-177.44732869 2882.59019622  291.56517387 -120.29354603]
Reward: -1  Episode Reward:  15
xxxxx
x   x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  764.72318873 4276.10814485    0.        ]
------
Step:6, Action:East
State  189
Old Q Values:  [  9.84673294 681.98074137 763.87267672 478.06638065]
New Q values:  [   9.84673294  681.98074137 1401.7243255   478.06638065]
Reward: 9  Episode Reward:  24
xxxxx
x   x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
------
Step:7, Action:South
State  197
Old Q Values:  [1.32443385e-01 1.52439722e+03 0.00000000e+00 0.00000000e+00]
New Q values:  [1.32443385e-01 1.36149628e+03 0.00000000e+00 0.00000000e+00]
Reward: 9  Episode Reward:  33
xxxxx
x  gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 1.34623827e+03 2.48779131e+03]
------
Step:8, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 1.34623827e+03 2.48779131e+03]
New Q values:  [   1.64433       0.         1346.23826999 1596.88105386]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1987.88177053  272.4700982   327.82467748  -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [1987.88177053  272.4700982   327.82467748  -35.88578819]
New Q values:  [2077.38515166  272.4700982   327.82467748  -35.88578819]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  764.72318873 4276.10814485    0.        ]
------
Step:10, Action:East
State  189
Old Q Values:  [   9.84673294  681.98074137 1401.7243255   478.06638065]
New Q values:  [  9.84673294 681.98074137 619.60578132 478.06638065]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         121.94855967   0.         198.38683706]
------
Step:11, Action:West
State  197
Old Q Values:  [1.32443385e-01 1.36149628e+03 0.00000000e+00 0.00000000e+00]
New Q values:  [1.32443385e-01 1.36149628e+03 0.00000000e+00 4.72106161e+02]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357 1575.68720445 1123.59460254  -30.99112081]
------
Step:12, Action:South
State  181
Old Q Values:  [  89.26354357 1575.68720445 1123.59460254  -30.99112081]
New Q values:  [  89.26354357 1252.89042728 1123.59460254  -30.99112081]
Reward: -1  Episode Reward:  38
xxxxx
x g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2077.38515166  272.4700982   327.82467748  -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [2077.38515166  272.4700982   327.82467748  -35.88578819]
New Q values:  [1206.22118885  272.4700982   327.82467748  -35.88578819]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357 1252.89042728 1123.59460254  -30.99112081]
------
Step:14, Action:South
State  189
Old Q Values:  [  9.84673294 681.98074137 619.60578132 478.06638065]
New Q values:  [  9.84673294 634.0586532  619.60578132 478.06638065]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1206.22118885  272.4700982   327.82467748  -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [1206.22118885  272.4700982   327.82467748  -35.88578819]
New Q values:  [857.75560372 272.4700982  327.82467748 -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357 1252.89042728 1123.59460254  -30.99112081]
------
Step:16, Action:South
State  189
Old Q Values:  [  9.84673294 634.0586532  619.60578132 478.06638065]
New Q values:  [  9.84673294 510.3501424  619.60578132 478.06638065]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[857.75560372 272.4700982  327.82467748 -35.88578819]
------
Step:17, Action:North
State  261
Old Q Values:  [857.75560372 272.4700982  327.82467748 -35.88578819]
New Q values:  [718.36936967 272.4700982  327.82467748 -35.88578819]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357 1252.89042728 1123.59460254  -30.99112081]
------
Step:18, Action:South
State  181
Old Q Values:  [  89.26354357 1252.89042728 1123.59460254  -30.99112081]
New Q values:  [  89.26354357  716.06698181 1123.59460254  -30.99112081]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[718.36936967 272.4700982  327.82467748 -35.88578819]
------
Step:19, Action:North
State  260
Old Q Values:  [ -525.91735789 -8695.4397473    969.11490965 -2601.74710518]
New Q values:  [  116.83691873 -8695.4397473    969.11490965 -2601.74710518]
Reward: -1  Episode Reward:  31
xxxxx
xg  x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1120.77770304   694.85009505  1092.67953962 -4966.32149798]
------
Step:20, Action:East
State  181
Old Q Values:  [  89.26354357  716.06698181 1123.59460254  -30.99112081]
New Q values:  [ 89.26354357 716.06698181 571.16399747 -30.99112081]
Reward: -1  Episode Reward:  30
xxxxx
x g x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   407.75385483   174.55451539     0.        ]
------
Step:21, Action:South
State  196
Old Q Values:  [-2469.90645144   407.75385483   174.55451539     0.        ]
New Q values:  [-2469.90645144   783.25717741   174.55451539     0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   806.36101138  2069.1854516 ]
------
Step:22, Action:West
State  277
Old Q Values:  [   1.64433       0.         1346.23826999 1596.88105386]
New Q values:  [   1.64433       0.         1346.23826999  853.66323244]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[718.36936967 272.4700982  327.82467748 -35.88578819]
------
Step:23, Action:North
State  261
Old Q Values:  [718.36936967 272.4700982  327.82467748 -35.88578819]
New Q values:  [1569.58019132  272.4700982   327.82467748  -35.88578819]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  764.72318873 4276.10814485    0.        ]
------
Step:24, Action:East
State  189
Old Q Values:  [  9.84673294 510.3501424  619.60578132 478.06638065]
New Q values:  [  9.84673294 510.3501424  306.75836364 478.06638065]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         121.94855967   0.         198.38683706]
------
Step:25, Action:West
State  199
Old Q Values:  [  22.48535485 2214.77383591  549.89931413  753.62201984]
New Q values:  [  22.48535485 2214.77383591  549.89931413 1583.68125139]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  764.72318873 4276.10814485    0.        ]
------
Step:26, Action:East
State  189
Old Q Values:  [  9.84673294 510.3501424  306.75836364 478.06638065]
New Q values:  [  9.84673294 510.3501424  181.61939658 478.06638065]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         121.94855967   0.         198.38683706]
------
Step:27, Action:West
State  199
Old Q Values:  [  22.48535485 2214.77383591  549.89931413 1583.68125139]
New Q values:  [  22.48535485 2214.77383591  549.89931413 1915.70494401]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  764.72318873 4276.10814485    0.        ]
------
Step:28, Action:East
State  189
Old Q Values:  [  9.84673294 510.3501424  181.61939658 478.06638065]
New Q values:  [  9.84673294 510.3501424  131.56380975 478.06638065]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         121.94855967   0.         198.38683706]
------
Step:29, Action:West
State  197
Old Q Values:  [1.32443385e-01 1.36149628e+03 0.00000000e+00 4.72106161e+02]
New Q values:  [1.32443385e-01 1.36149628e+03 0.00000000e+00 4.03062559e+02]
Reward: -1  Episode Reward:  21
xxxxx
x  gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 89.26354357 716.06698181 571.16399747 -30.99112081]
------
Step:30, Action:South
State  181
Old Q Values:  [ 89.26354357 716.06698181 571.16399747 -30.99112081]
New Q values:  [ 89.26354357 756.70085012 571.16399747 -30.99112081]
Reward: -1  Episode Reward:  20
xxxxx
x g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1569.58019132  272.4700982   327.82467748  -35.88578819]
------
Step:31, Action:North
State  261
Old Q Values:  [1569.58019132  272.4700982   327.82467748  -35.88578819]
New Q values:  [854.24233157 272.4700982  327.82467748 -35.88578819]
Reward: -1  Episode Reward:  19
xxxxx
x  gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 89.26354357 756.70085012 571.16399747 -30.99112081]
------
Step:32, Action:South
State  181
Old Q Values:  [ 89.26354357 756.70085012 571.16399747 -30.99112081]
New Q values:  [ 89.26354357 558.35303952 571.16399747 -30.99112081]
Reward: -1  Episode Reward:  18
xxxxx
x g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[854.24233157 272.4700982  327.82467748 -35.88578819]
------
Step:33, Action:North
State  261
Old Q Values:  [854.24233157 272.4700982  327.82467748 -35.88578819]
New Q values:  [512.44613187 272.4700982  327.82467748 -35.88578819]
Reward: -1  Episode Reward:  17
xxxxx
x  gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 89.26354357 558.35303952 571.16399747 -30.99112081]
------
Step:34, Action:East
State  189
Old Q Values:  [  9.84673294 510.3501424  131.56380975 478.06638065]
New Q values:  [  9.84673294 510.3501424  111.54157502 478.06638065]
Reward: -1  Episode Reward:  16
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         121.94855967   0.         198.38683706]
------
Step:35, Action:West
State  196
Old Q Values:  [-2469.90645144   783.25717741   174.55451539     0.        ]
New Q values:  [-2469.90645144   783.25717741   174.55451539   170.74919924]
Reward: -1  Episode Reward:  15
xxxxx
x   x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 89.26354357 558.35303952 571.16399747 -30.99112081]
------
Step:36, Action:South
State  189
Old Q Values:  [  9.84673294 510.3501424  111.54157502 478.06638065]
New Q values:  [  9.84673294 357.27389652 111.54157502 478.06638065]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[512.44613187 272.4700982  327.82467748 -35.88578819]
------
Step:37, Action:North
State  261
Old Q Values:  [512.44613187 272.4700982  327.82467748 -35.88578819]
New Q values:  [375.72765199 272.4700982  327.82467748 -35.88578819]
Reward: -1  Episode Reward:  13
xxxxx
x   x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 89.26354357 558.35303952 571.16399747 -30.99112081]
------
Step:38, Action:South
State  189
Old Q Values:  [  9.84673294 357.27389652 111.54157502 478.06638065]
New Q values:  [  9.84673294 255.0278542  111.54157502 478.06638065]
Reward: -1  Episode Reward:  12
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[375.72765199 272.4700982  327.82467748 -35.88578819]
------
Step:39, Action:North
State  261
Old Q Values:  [375.72765199 272.4700982  327.82467748 -35.88578819]
New Q values:  [321.04026003 272.4700982  327.82467748 -35.88578819]
Reward: -1  Episode Reward:  11
xxxxx
x  gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 89.26354357 558.35303952 571.16399747 -30.99112081]
------
Step:40, Action:East
State  189
Old Q Values:  [  9.84673294 255.0278542  111.54157502 478.06638065]
New Q values:  [  9.84673294 255.0278542  103.53268112 478.06638065]
Reward: -1  Episode Reward:  10
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         121.94855967   0.         198.38683706]
------
Step:41, Action:West
State  196
Old Q Values:  [-2469.90645144   783.25717741   174.55451539   170.74919924]
New Q values:  [-2469.90645144   783.25717741   174.55451539   239.04887894]
Reward: -1  Episode Reward:  9
xxxxx
x   x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 89.26354357 558.35303952 571.16399747 -30.99112081]
------
Step:42, Action:South
State  183
Old Q Values:  [ 471.26619624  764.72318873 4276.10814485    0.        ]
New Q values:  [ 471.26619624  403.63667874 4276.10814485    0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x   x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[321.04026003 272.4700982  327.82467748 -35.88578819]
------
Step:43, Action:North
State  261
Old Q Values:  [321.04026003 272.4700982  327.82467748 -35.88578819]
New Q values:  [299.16530325 272.4700982  327.82467748 -35.88578819]
Reward: -1  Episode Reward:  7
xxxxx
x   x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 89.26354357 558.35303952 571.16399747 -30.99112081]
------
Step:44, Action:South
State  183
Old Q Values:  [ 471.26619624  403.63667874 4276.10814485    0.        ]
New Q values:  [ 471.26619624  259.20207474 4276.10814485    0.        ]
Reward: -1  Episode Reward:  6
xxxxx
x   x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[299.16530325 272.4700982  327.82467748 -35.88578819]
------
Step:45, Action:North
State  261
Old Q Values:  [299.16530325 272.4700982  327.82467748 -35.88578819]
New Q values:  [1401.89856476  272.4700982   327.82467748  -35.88578819]
Reward: -1  Episode Reward:  5
xxxxx
x   x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  259.20207474 4276.10814485    0.        ]
------
Step:46, Action:East
State  183
Old Q Values:  [ 471.26619624  259.20207474 4276.10814485    0.        ]
New Q values:  [ 471.26619624  259.20207474 5494.49516683    0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x   x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -6.80462050e+03  1.26155064e+04  0.00000000e+00]
------
Step:47, Action:East
State  195
Old Q Values:  [   38.85388605  2677.89620798 18690.11426138  1101.59744825]
New Q values:  [   38.85388605  2677.89620798 20570.3046712   1101.59744825]
Reward: 9  Episode Reward:  13
xxxxx
x   x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.36295299e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
------
Step:48, Action:North
State  216
Old Q Values:  [  582.34342354   633.14118268 -8489.43729461   531.09593838]
New Q values:  [  332.62193846   633.14118268 -8489.43729461   531.09593838]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  3.34281897e+02]
------
Step:49, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  3.34281897e+02]
New Q values:  [-139.45925583 -136.92174709   -0.32296531  232.60679351]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         331.64678262 214.99793907 284.9215583 ]
------
Step:50, Action:West
State  127
Old Q Values:  [   0.            1.67014986  384.73306724 1139.24686966]
New Q values:  [   0.            1.67014986  384.73306724 1319.87580673]
Reward: -1  Episode Reward:  10
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2882.59019622  291.56517387 -120.29354603]
------
Step:51, Action:South
State  109
Old Q Values:  [-241.10880094 1623.91186695   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  792.38466098   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  9
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  9.84673294 255.0278542  103.53268112 478.06638065]
------
Step:52, Action:West
State  189
Old Q Values:  [  9.84673294 255.0278542  103.53268112 478.06638065]
New Q values:  [  9.84673294 255.0278542  103.53268112 154.04646645]
Reward: -301  Episode Reward:  -292
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  9.84673294 255.0278542  103.53268112 154.04646645]
------
Step:53, Action:South
State  179
Old Q Values:  [    0.          4614.46100011 44694.72727926     0.        ]
New Q values:  [    0.         13606.69521441 44694.72727926     0.        ]
Reward: -1  Episode Reward:  -293
xxxxx
x   x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39205.03604789  2256.66526474  5231.26374966  1875.31501677]
------
Step:54, Action:North
State  261
Old Q Values:  [1401.89856476  272.4700982   327.82467748  -35.88578819]
New Q values:  [691.41156097 272.4700982  327.82467748 -35.88578819]
Reward: -1  Episode Reward:  -294
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 437.5071169   64.46351788   0.        ]
------
Step:55, Action:South
State  179
Old Q Values:  [    0.         13606.69521441 44694.72727926     0.        ]
New Q values:  [    0.         17203.58890013 44694.72727926     0.        ]
Reward: -1  Episode Reward:  -295
xxxxx
x   x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39205.03604789  2256.66526474  5231.26374966  1875.31501677]
------
Step:56, Action:North
State  261
Old Q Values:  [691.41156097 272.4700982  327.82467748 -35.88578819]
New Q values:  [407.21675946 272.4700982  327.82467748 -35.88578819]
Reward: -1  Episode Reward:  -296
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 437.5071169   64.46351788   0.        ]
------
Step:57, Action:South
State  189
Old Q Values:  [  9.84673294 255.0278542  103.53268112 154.04646645]
New Q values:  [  9.84673294 223.57616952 103.53268112 154.04646645]
Reward: -1  Episode Reward:  -297
xxxxx
x   x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[407.21675946 272.4700982  327.82467748 -35.88578819]
------
Step:58, Action:North
State  261
Old Q Values:  [407.21675946 272.4700982  327.82467748 -35.88578819]
New Q values:  [293.53883885 272.4700982  327.82467748 -35.88578819]
Reward: -1  Episode Reward:  -298
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 437.5071169   64.46351788   0.        ]
------
Step:59, Action:South
State  189
Old Q Values:  [  9.84673294 223.57616952 103.53268112 154.04646645]
New Q values:  [  9.84673294 187.17787105 103.53268112 154.04646645]
Reward: -1  Episode Reward:  -299
xxxxx
x   x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[293.53883885 272.4700982  327.82467748 -35.88578819]
------
Step:60, Action:East
State  260
Old Q Values:  [  116.83691873 -8695.4397473    969.11490965 -2601.74710518]
New Q values:  [  116.83691873 -8695.4397473   1007.80159934 -2601.74710518]
Reward: -1  Episode Reward:  -300
xxxxx
x   x
xg  x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   806.36101138  2069.1854516 ]
------
Step:61, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   806.36101138  2069.1854516 ]
New Q values:  [   16.82637525 -5807.06396197   806.36101138 -4870.58533956]
Reward: -10001  Episode Reward:  -10301
xxxxx
x   x
x   x
xg .x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [ 471.26619624  259.20207474 5494.49516683    0.        ]
New Q values:  [ 471.26619624  259.20207474 6438.50482489    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  1.41176892e+04  1.20371620e+03]
------
Step:2, Action:East
State  195
Old Q Values:  [   38.85388605  2677.89620798 20570.3046712   1101.59744825]
New Q values:  [   38.85388605  2677.89620798 21322.38083512  1101.59744825]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.36295299e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [4.36295299e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
New Q values:  [5.79929789e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   2091.02738669   -180.00807518 135119.22301327]
------
Step:4, Action:West
State  130
Old Q Values:  [ 36041.91667283   2091.02738669   -180.00807518 135119.22301327]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 92903.56737961]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29501594e+05]
------
Step:5, Action:West
State  126
Old Q Values:  [  0.         331.64678262 214.99793907 284.9215583 ]
New Q values:  [  0.         331.64678262 214.99793907 984.14568219]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2882.59019622  291.56517387 -120.29354603]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869 2882.59019622  291.56517387 -120.29354603]
New Q values:  [-177.44732869 1283.68821356  291.56517387 -120.29354603]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 437.5071169   64.46351788   0.        ]
------
Step:7, Action:South
State  191
Old Q Values:  [  3.06655861 437.5071169   64.46351788   0.        ]
New Q values:  [  3.06655861 272.75025     64.46351788   0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[293.53883885 272.4700982  327.82467748 -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [293.53883885 272.4700982  327.82467748 -35.88578819]
New Q values:  [198.64061054 272.4700982  327.82467748 -35.88578819]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 272.75025     64.46351788   0.        ]
------
Step:9, Action:South
State  191
Old Q Values:  [  3.06655861 272.75025     64.46351788   0.        ]
New Q values:  [  3.06655861 206.84750324  64.46351788   0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[198.64061054 272.4700982  327.82467748 -35.88578819]
------
Step:10, Action:South
State  261
Old Q Values:  [198.64061054 272.4700982  327.82467748 -35.88578819]
New Q values:  [198.64061054  26.73544252 327.82467748 -35.88578819]
Reward: -301  Episode Reward:  -260
xxxxx
x   x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[198.64061054  26.73544252 327.82467748 -35.88578819]
------
Step:11, Action:East
State  260
Old Q Values:  [  116.83691873 -8695.4397473   1007.80159934 -2601.74710518]
New Q values:  [  116.83691873 -8695.4397473    650.42894315 -2601.74710518]
Reward: 9  Episode Reward:  -251
xxxxx
x   x
xg  x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   806.36101138 -4870.58533956]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   453.83635089 40635.91121437]
New Q values:  [-2527.46239811 -8521.23367799 63188.54762182 40635.91121437]
Reward: 100009  Episode Reward:  99758
xxxxx
x   x
x g x
x  ax
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 -136.92174709   -0.32296531  232.60679351]
------
Step:1, Action:West
State  138
Old Q Values:  [-139.45925583 -136.92174709   -0.32296531  232.60679351]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  5.57303747e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614  1529.53676487]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   153.83468034   601.68186852]
New Q values:  [ -253.44886264 -1902.20915811   153.83468034   631.17921147]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1283.68821356  291.56517387 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 1283.68821356  291.56517387 -120.29354603]
New Q values:  [-177.44732869 2450.42673289  291.56517387 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  259.20207474 6438.50482489    0.        ]
------
Step:4, Action:East
State  181
Old Q Values:  [ 89.26354357 558.35303952 571.16399747 -30.99112081]
New Q values:  [  89.26354357  558.35303952 1043.04440541  -30.99112081]
Reward: -9991  Episode Reward:  -9964
xxxxx
x   x
x g.x
x.. x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 63188.54762182 40635.91121437]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 63188.54762182 40635.91121437]
New Q values:  [-2527.46239811 -8521.23367799 28282.43213019 40635.91121437]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10005.37693821 -7525.53407498 -7525.7277781    649.14636352]
------
Step:2, Action:North
State  288
Old Q Values:  [10005.37693821 -7525.53407498 -7525.7277781    649.14636352]
New Q values:  [ 5161.71883612 -7525.53407498 -7525.7277781    649.14636352]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  126.41333341  3847.22686944 -1925.08326713 -1455.65174173]
------
Step:3, Action:South
State  208
Old Q Values:  [  126.41333341  3847.22686944 -1925.08326713 -1455.65174173]
New Q values:  [  126.41333341  3086.80639861 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5161.71883612 -7525.53407498 -7525.7277781    649.14636352]
------
Step:4, Action:North
State  288
Old Q Values:  [ 5161.71883612 -7525.53407498 -7525.7277781    649.14636352]
New Q values:  [ 2990.12945403 -7525.53407498 -7525.7277781    649.14636352]
Reward: -1  Episode Reward:  16
xxxxx
xg..x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  126.41333341  3086.80639861 -1925.08326713 -1455.65174173]
------
Step:5, Action:South
State  208
Old Q Values:  [  126.41333341  3086.80639861 -1925.08326713 -1455.65174173]
New Q values:  [  126.41333341  2131.16139565 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  15
xxxxx
x.g.x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2990.12945403 -7525.53407498 -7525.7277781    649.14636352]
------
Step:6, Action:North
State  288
Old Q Values:  [ 2990.12945403 -7525.53407498 -7525.7277781    649.14636352]
New Q values:  [ 1834.80020031 -7525.53407498 -7525.7277781    649.14636352]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x.gax
x.  x
xxxxx
Step:7, Action:South
State  208
Old Q Values:  [  126.41333341  2131.16139565 -1925.08326713 -1455.65174173]
New Q values:  [  126.41333341  1402.30461835 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1834.80020031 -7525.53407498 -7525.7277781    649.14636352]
------
Step:8, Action:West
State  288
Old Q Values:  [ 1834.80020031 -7525.53407498 -7525.7277781    649.14636352]
New Q values:  [ 1834.80020031 -7525.53407498 -7525.7277781    733.30294546]
Reward: -1  Episode Reward:  12
xxxxx
x..gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  1580.81466683]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 28282.43213019 40635.91121437]
New Q values:  [-2527.46239811 -8521.23367799 28282.43213019 28021.27530011]
Reward: 9  Episode Reward:  21
xxxxx
x.g.x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39205.03604789  2256.66526474  5231.26374966  1875.31501677]
------
Step:10, Action:North
State  261
Old Q Values:  [198.64061054  26.73544252 327.82467748 -35.88578819]
New Q values:  [397.76956584  26.73544252 327.82467748 -35.88578819]
Reward: 9  Episode Reward:  30
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357  558.35303952 1043.04440541  -30.99112081]
------
Step:11, Action:South
State  183
Old Q Values:  [ 471.26619624  259.20207474 6438.50482489    0.        ]
New Q values:  [ 471.26619624  222.41169965 6438.50482489    0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[397.76956584  26.73544252 327.82467748 -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [397.76956584  26.73544252 327.82467748 -35.88578819]
New Q values:  [471.42114796  26.73544252 327.82467748 -35.88578819]
Reward: -1  Episode Reward:  28
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357  558.35303952 1043.04440541  -30.99112081]
------
Step:13, Action:South
State  183
Old Q Values:  [ 471.26619624  222.41169965 6438.50482489    0.        ]
New Q values:  [ 471.26619624  229.79102425 6438.50482489    0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[471.42114796  26.73544252 327.82467748 -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [471.42114796  26.73544252 327.82467748 -35.88578819]
New Q values:  [500.88178081  26.73544252 327.82467748 -35.88578819]
Reward: -1  Episode Reward:  26
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357  558.35303952 1043.04440541  -30.99112081]
------
Step:15, Action:South
State  177
Old Q Values:  [78394.48547832 26817.66925136 48692.32086537     0.        ]
New Q values:  [78394.48547832 22487.97851491 48692.32086537     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39205.03604789  2256.66526474  5231.26374966  1875.31501677]
------
Step:16, Action:North
State  260
Old Q Values:  [  116.83691873 -8695.4397473    650.42894315 -2601.74710518]
New Q values:  [  373.93862938 -8695.4397473    650.42894315 -2601.74710518]
Reward: -1  Episode Reward:  24
xxxxx
xg..x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1120.77770304   694.85009505  1092.67953962 -4966.32149798]
------
Step:17, Action:East
State  177
Old Q Values:  [78394.48547832 22487.97851491 48692.32086537     0.        ]
New Q values:  [78394.48547832 22487.97851491 20669.43452861     0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x.g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.78960710e+03 3.97702061e+03 2.91043938e+03]
------
Step:18, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.78960710e+03 3.97702061e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.78960710e+03 2.01089963e+03 2.91043938e+03]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  126.41333341  1402.30461835 -1925.08326713 -1455.65174173]
------
Step:19, Action:South
State  210
Old Q Values:  [5.79929789e+04 4.51465896e+02 2.24233123e+03 3.52184257e+00]
New Q values:  [5.79929789e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1834.80020031 -7525.53407498 -7525.7277781    733.30294546]
------
Step:20, Action:North
State  288
Old Q Values:  [ 1834.80020031 -7525.53407498 -7525.7277781    733.30294546]
New Q values:  [18131.21373798 -7525.53407498 -7525.7277781    733.30294546]
Reward: -1  Episode Reward:  20
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.79929789e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
------
Step:21, Action:North
State  208
Old Q Values:  [  126.41333341  1402.30461835 -1925.08326713 -1455.65174173]
New Q values:  [27927.03554725  1402.30461835 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  29
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2091.02738669  -180.00807518 92903.56737961]
------
Step:22, Action:West
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 92903.56737961]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 76467.83734791]
Reward: 9  Episode Reward:  38
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  43132.21661867 131003.36798688]
------
Step:23, Action:West
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.29501594e+05]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.31516176e+05]
Reward: 100009  Episode Reward:  100047
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 28282.43213019 28021.27530011]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 28282.43213019 28021.27530011]
New Q values:  [-2527.46239811 -8521.23367799 16757.73697347 28021.27530011]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18131.21373798 -7525.53407498 -7525.7277781    733.30294546]
------
Step:2, Action:North
State  288
Old Q Values:  [18131.21373798 -7525.53407498 -7525.7277781    733.30294546]
New Q values:  [15635.99615936 -7525.53407498 -7525.7277781    733.30294546]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27927.03554725  1402.30461835 -1925.08326713 -1455.65174173]
------
Step:3, Action:North
State  208
Old Q Values:  [27927.03554725  1402.30461835 -1925.08326713 -1455.65174173]
New Q values:  [ 5304.74557158  1402.30461835 -1925.08326713 -1455.65174173]
Reward: -9991  Episode Reward:  -9973
xxxxx
x. gx
x.. x
x.  x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357  558.35303952 1043.04440541  -30.99112081]
------
Step:1, Action:East
State  183
Old Q Values:  [ 471.26619624  229.79102425 6438.50482489    0.        ]
New Q values:  [ 471.26619624  229.79102425 8977.51618049    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  2677.89620798 21322.38083512  1101.59744825]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  1.41176892e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  2.30443693e+04  1.20371620e+03]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.79929789e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
------
Step:3, Action:North
State  216
Old Q Values:  [  332.62193846   633.14118268 -8489.43729461   531.09593838]
New Q values:  [  305.63989944   633.14118268 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  17
xxxxx
x..ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  5.57303747e+02]
------
Step:4, Action:West
State  136
Old Q Values:  [-5281.21195651   428.43784228 -2383.80019164 -7670.67347219]
New Q values:  [-5281.21195651   428.43784228 -2383.80019164 -8736.74526961]
Reward: -9991  Episode Reward:  -9974
xxxxx
x.g x
x   x
x...x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4351.58935291 -4885.64726759  1099.96026581]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831  4351.58935291 -4885.64726759  1099.96026581]
New Q values:  [-5922.26708831  2220.28014121 -4885.64726759  1099.96026581]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -4703.21688134  1580.81466683]
------
Step:2, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134  1580.81466683]
New Q values:  [   37.74111519  -168.92307549 -4703.21688134   787.99040098]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[500.88178081  26.73544252 327.82467748 -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [500.88178081  26.73544252 327.82467748 -35.88578819]
New Q values:  [2899.00756647   26.73544252  327.82467748  -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  229.79102425 8977.51618049    0.        ]
------
Step:4, Action:East
State  183
Old Q Values:  [ 471.26619624  229.79102425 8977.51618049    0.        ]
New Q values:  [  471.26619624   229.79102425 10503.71727281     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  2.30443693e+04  1.20371620e+03]
------
Step:5, Action:East
State  195
Old Q Values:  [   38.85388605  2677.89620798 21322.38083512  1101.59744825]
New Q values:  [   38.85388605  2677.89620798 25926.2459919   1101.59744825]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5.79929789e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
------
Step:6, Action:North
State  210
Old Q Values:  [5.79929789e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
New Q values:  [4.61429427e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  34
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2091.02738669  -180.00807518 76467.83734791]
------
Step:7, Action:West
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 76467.83734791]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 69893.54533523]
Reward: 9  Episode Reward:  43
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  43132.21661867 131003.36798688]
------
Step:8, Action:West
State  126
Old Q Values:  [  0.         331.64678262 214.99793907 984.14568219]
New Q values:  [   0.          331.64678262  214.99793907 1134.18629274]
Reward: 9  Episode Reward:  52
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2450.42673289  291.56517387 -120.29354603]
------
Step:9, Action:South
State  99
Old Q Values:  [    0.         40775.35701405 65700.46302897     0.        ]
New Q values:  [    0.         29717.9609894  65700.46302897     0.        ]
Reward: -1  Episode Reward:  51
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         17203.58890013 44694.72727926     0.        ]
------
Step:10, Action:East
State  191
Old Q Values:  [  3.06655861 206.84750324  64.46351788   0.        ]
New Q values:  [  3.06655861 206.84750324 482.93632693   0.        ]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[   0.          135.48456638 1525.83639927    0.        ]
------
Step:11, Action:East
State  195
Old Q Values:  [   38.85388605  2677.89620798 25926.2459919   1101.59744825]
New Q values:  [   38.85388605  2677.89620798 24212.78122121  1101.59744825]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.61429427e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
------
Step:12, Action:North
State  218
Old Q Values:  [1839.66870657  848.96225083    0.          429.03841886]
New Q values:  [902.45860669 848.96225083   0.         429.03841886]
Reward: -1  Episode Reward:  48
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  5.57303747e+02]
------
Step:13, Action:West
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 69893.54533523]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 67411.67107642]
Reward: -1  Episode Reward:  47
xxxxx
x a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.31516176e+05]
------
Step:14, Action:West
State  127
Old Q Values:  [   0.            1.67014986  384.73306724 1319.87580673]
New Q values:  [   0.            1.67014986  384.73306724 1262.47834256]
Reward: -1  Episode Reward:  46
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2450.42673289  291.56517387 -120.29354603]
------
Step:15, Action:South
State  111
Old Q Values:  [-177.44732869 2450.42673289  291.56517387 -120.29354603]
New Q values:  [-177.44732869 1035.72405447  291.56517387 -120.29354603]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  9.84673294 187.17787105 103.53268112 154.04646645]
------
Step:16, Action:South
State  188
Old Q Values:  [-6523.78898263   712.69143835  1223.43046172     0.        ]
New Q values:  [-6523.78898263   479.60525829  1223.43046172     0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  373.93862938 -8695.4397473    650.42894315 -2601.74710518]
------
Step:17, Action:East
State  261
Old Q Values:  [2899.00756647   26.73544252  327.82467748  -35.88578819]
New Q values:  [2899.00756647   26.73544252  372.43817441  -35.88578819]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   806.36101138 -4870.58533956]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 16757.73697347 28021.27530011]
New Q values:  [-2527.46239811 -8521.23367799 71399.2936372  28021.27530011]
Reward: 100009  Episode Reward:  100052
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.78960710e+03 2.01089963e+03 2.91043938e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.78960710e+03 2.01089963e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.78960710e+03 2.40118352e+03 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5304.74557158  1402.30461835 -1925.08326713 -1455.65174173]
------
Step:2, Action:North
State  216
Old Q Values:  [  305.63989944   633.14118268 -8489.43729461   531.09593838]
New Q values:  [  294.84708384   633.14118268 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  5.57303747e+02]
------
Step:3, Action:West
State  136
Old Q Values:  [-5281.21195651   428.43784228 -2383.80019164 -8736.74526961]
New Q values:  [-5281.21195651   428.43784228 -2383.80019164 -3395.98390505]
Reward: 9  Episode Reward:  27
xxxxx
x.agx
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:4, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1069.2755168   1087.08039754]
New Q values:  [-9594.56523706 -8069.05606225  1069.2755168    677.94755731]
Reward: 9  Episode Reward:  36
xxxxx
xag x
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  792.38466098   -8.57207238 -180.6       ]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 1035.72405447  291.56517387 -120.29354603]
New Q values:  [-177.44732869  469.8429831   291.56517387 -120.29354603]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  9.84673294 187.17787105 103.53268112 154.04646645]
------
Step:6, Action:South
State  189
Old Q Values:  [  9.84673294 187.17787105 103.53268112 154.04646645]
New Q values:  [  9.84673294 949.97341836 103.53268112 154.04646645]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2899.00756647   26.73544252  372.43817441  -35.88578819]
------
Step:7, Action:North
State  261
Old Q Values:  [2899.00756647   26.73544252  372.43817441  -35.88578819]
New Q values:  [1443.9950521    26.73544252  372.43817441  -35.88578819]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  9.84673294 949.97341836 103.53268112 154.04646645]
------
Step:8, Action:South
State  189
Old Q Values:  [  9.84673294 949.97341836 103.53268112 154.04646645]
New Q values:  [  9.84673294 812.58788297 103.53268112 154.04646645]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1443.9950521    26.73544252  372.43817441  -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [1443.9950521    26.73544252  372.43817441  -35.88578819]
New Q values:  [820.77438573  26.73544252 372.43817441 -35.88578819]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  9.84673294 812.58788297 103.53268112 154.04646645]
------
Step:10, Action:South
State  188
Old Q Values:  [-6523.78898263   479.60525829  1223.43046172     0.        ]
New Q values:  [-6523.78898263   386.37078626  1223.43046172     0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg  x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  373.93862938 -8695.4397473    650.42894315 -2601.74710518]
------
Step:11, Action:East
State  261
Old Q Values:  [820.77438573  26.73544252 372.43817441 -35.88578819]
New Q values:  [820.77438573  26.73544252 396.28357318 -35.88578819]
Reward: 9  Episode Reward:  49
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   806.36101138 -4870.58533956]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 71399.2936372  28021.27530011]
New Q values:  [-2527.46239811 -8521.23367799 93255.91630269 28021.27530011]
Reward: 100009  Episode Reward:  100058
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.78960710e+03 2.40118352e+03 2.91043938e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [   62.8218634  22697.26268809   574.29975264   408.67479662]
New Q values:  [   62.8218634  22697.26268809   425.06225586   408.67479662]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  294.84708384   633.14118268 -8489.43729461   531.09593838]
------
Step:2, Action:South
State  208
Old Q Values:  [ 5304.74557158  1402.30461835 -1925.08326713 -1455.65174173]
New Q values:  [ 5304.74557158  5257.12069515 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15635.99615936 -7525.53407498 -7525.7277781    733.30294546]
------
Step:3, Action:North
State  288
Old Q Values:  [15635.99615936 -7525.53407498 -7525.7277781    733.30294546]
New Q values:  [ 7845.22213522 -7525.53407498 -7525.7277781    733.30294546]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5304.74557158  5257.12069515 -1925.08326713 -1455.65174173]
------
Step:4, Action:North
State  216
Old Q Values:  [  294.84708384   633.14118268 -8489.43729461   531.09593838]
New Q values:  [  251.87018622   633.14118268 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  26
xxxxx
xg.ax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   428.43784228 -2383.80019164 -3395.98390505]
------
Step:5, Action:South
State  136
Old Q Values:  [-5281.21195651   428.43784228 -2383.80019164 -3395.98390505]
New Q values:  [-5281.21195651   360.71749172 -2383.80019164 -3395.98390505]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  251.87018622   633.14118268 -8489.43729461   531.09593838]
------
Step:6, Action:South
State  208
Old Q Values:  [ 5304.74557158  5257.12069515 -1925.08326713 -1455.65174173]
New Q values:  [ 5304.74557158  4455.81491863 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  24
xxxxx
x..gx
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7845.22213522 -7525.53407498 -7525.7277781    733.30294546]
------
Step:7, Action:North
State  288
Old Q Values:  [ 7845.22213522 -7525.53407498 -7525.7277781    733.30294546]
New Q values:  [-1271.08747444 -7525.53407498 -7525.7277781    733.30294546]
Reward: -10001  Episode Reward:  -9977
xxxxx
x.. x
x  gx
x.. x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  469.8429831   291.56517387 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6         330.55462865    5.4           0.        ]
New Q values:  [-180.6         450.53517308    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357  558.35303952 1043.04440541  -30.99112081]
------
Step:2, Action:East
State  181
Old Q Values:  [  89.26354357  558.35303952 1043.04440541  -30.99112081]
New Q values:  [  89.26354357  558.35303952 1507.39301698  -30.99112081]
Reward: -1  Episode Reward:  8
xxxxx
x .gx
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144   783.25717741   174.55451539   239.04887894]
New Q values:  [-2469.90645144   560.61117438   174.55451539   239.04887894]
Reward: 9  Episode Reward:  17
xxxxx
x g.x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   806.36101138 -4870.58533956]
------
Step:4, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 -4703.21688134   787.99040098]
New Q values:  [   37.74111519  -168.92307549 -1655.8958689    787.99040098]
Reward: 9  Episode Reward:  26
xxxxx
x .gx
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1271.08747444 -7525.53407498 -7525.7277781    733.30294546]
------
Step:5, Action:West
State  288
Old Q Values:  [-1271.08747444 -7525.53407498 -7525.7277781    733.30294546]
New Q values:  [-1271.08747444 -7525.53407498 -7525.7277781    529.11829848]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -1655.8958689    787.99040098]
------
Step:6, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -1655.8958689    787.99040098]
New Q values:  [   37.74111519  -168.92307549 -1655.8958689    566.82847611]
Reward: 9  Episode Reward:  34
xxxxx
x .gx
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[820.77438573  26.73544252 396.28357318 -35.88578819]
------
Step:7, Action:North
State  261
Old Q Values:  [820.77438573  26.73544252 396.28357318 -35.88578819]
New Q values:  [779.92765939  26.73544252 396.28357318 -35.88578819]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357  558.35303952 1507.39301698  -30.99112081]
------
Step:8, Action:East
State  183
Old Q Values:  [  471.26619624   229.79102425 10503.71727281     0.        ]
New Q values:  [ 471.26619624  229.79102425 4865.3190599     0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  22.48535485 2214.77383591  549.89931413 1915.70494401]
------
Step:9, Action:South
State  193
Old Q Values:  [-5922.26708831  2220.28014121 -4885.64726759  1099.96026581]
New Q values:  [-5922.26708831  1057.56059932 -4885.64726759  1099.96026581]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -1655.8958689    566.82847611]
------
Step:10, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   806.36101138 -4870.58533956]
New Q values:  [   16.82637525 -5807.06396197   806.36101138 -1714.85583801]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[779.92765939  26.73544252 396.28357318 -35.88578819]
------
Step:11, Action:North
State  260
Old Q Values:  [  373.93862938 -8695.4397473    650.42894315 -2601.74710518]
New Q values:  [-5523.22068636 -8695.4397473    650.42894315 -2601.74710518]
Reward: -10001  Episode Reward:  -9971
xxxxx
x ..x
xg .x
x   x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  2.30443693e+04  1.20371620e+03]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  2.30443693e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  2.30660306e+04  1.20371620e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.61429427e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
------
Step:2, Action:North
State  208
Old Q Values:  [ 5304.74557158  4455.81491863 -1925.08326713 -1455.65174173]
New Q values:  [22350.79955156  4455.81491863 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2091.02738669  -180.00807518 67411.67107642]
------
Step:3, Action:West
State  136
Old Q Values:  [-5281.21195651   360.71749172 -2383.80019164 -3395.98390505]
New Q values:  [-5281.21195651   360.71749172 -2383.80019164 -1032.21090698]
Reward: 9  Episode Reward:  27
xxxxx
xga x
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1069.2755168    677.94755731]
------
Step:4, Action:East
State  114
Old Q Values:  [  -180.6          6712.83097384  43132.21661867 131003.36798688]
New Q values:  [  -180.6          6712.83097384  17419.47777153 131003.36798688]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  5.57303747e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  5.57303747e+02]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  3.95233319e+04]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  17419.47777153 131003.36798688]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   106.72795614  1529.53676487]
New Q values:  [ -281.736      -3455.78276043   106.72795614   635.08393959]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062    59.56411214  -180.6       ]
------
Step:7, Action:East
State  106
Old Q Values:  [ -180.6        -8952.15415062    59.56411214  -180.6       ]
New Q values:  [ -180.6        -8952.15415062   213.75082673  -180.6       ]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614   635.08393959]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   106.72795614   635.08393959]
New Q values:  [ -281.736      -3455.78276043   106.72795614   772.85723985]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1731.41221336  303.62422203 -252.78192178]
------
Step:9, Action:South
State  107
Old Q Values:  [-252.35169558 1731.41221336  303.62422203 -252.78192178]
New Q values:  [-252.35169558  782.19810804  303.62422203 -252.78192178]
Reward: 9  Episode Reward:  41
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[135.20477233   0.         280.77740897   0.        ]
------
Step:10, Action:East
State  187
Old Q Values:  [135.20477233   0.         280.77740897   0.        ]
New Q values:  [135.20477233   0.         501.15052887   0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458  1298.13188427     0.        ]
------
Step:11, Action:East
State  203
Old Q Values:  [   3.60604218  705.82716573 1822.13470504    0.        ]
New Q values:  [  3.60604218 705.82716573 998.99146402   0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[902.45860669 848.96225083   0.         429.03841886]
------
Step:12, Action:North
State  218
Old Q Values:  [902.45860669 848.96225083   0.         429.03841886]
New Q values:  [12217.38301112   848.96225083     0.           429.03841886]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  3.95233319e+04]
------
Step:13, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  3.95233319e+04]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.60405899e+04]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614   772.85723985]
------
Step:14, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1069.2755168    677.94755731]
New Q values:  [-9594.56523706 -8069.05606225  1069.2755168    314.0846357 ]
Reward: -1  Episode Reward:  36
xxxxx
xag x
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         145.01870926   65.14560537    0.        ]
------
Step:15, Action:South
State  104
Old Q Values:  [-8652.84         702.85534727  1078.33520689 -8652.84      ]
New Q values:  [-8652.84         685.67580675  1078.33520689 -8652.84      ]
Reward: -1  Episode Reward:  35
xxxxx
xg  x
xa  x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[-2081.09028721     0.          1350.44555947     0.        ]
------
Step:16, Action:East
State  184
Old Q Values:  [-2081.09028721     0.          1350.44555947     0.        ]
New Q values:  [-2081.09028721     0.          7348.75703021     0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xga x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  22697.26268809   425.06225586   408.67479662]
------
Step:17, Action:South
State  200
Old Q Values:  [   62.8218634  22697.26268809   425.06225586   408.67479662]
New Q values:  [   62.8218634  37055.07996604   425.06225586   408.67479662]
Reward: -1  Episode Reward:  33
xxxxx
xg  x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 93255.91630269 28021.27530011]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 93255.91630269 28021.27530011]
New Q values:  [-2527.46239811 -8521.23367799 37466.50201062 28021.27530011]
Reward: 9  Episode Reward:  42
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1271.08747444 -7525.53407498 -7525.7277781    529.11829848]
------
Step:19, Action:West
State  288
Old Q Values:  [-1271.08747444 -7525.53407498 -7525.7277781    529.11829848]
New Q values:  [-1271.08747444 -7525.53407498 -7525.7277781    381.09586222]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 -1655.8958689    566.82847611]
------
Step:20, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 -1655.8958689    566.82847611]
New Q values:  [ 3.77411152e+01 -1.68923075e+02 -1.65589587e+03  7.19936422e+04]
Reward: 100009  Episode Reward:  100050
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -6859.02098822    92.93262585  -180.6       ]
------
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869  469.8429831   291.56517387 -120.29354603]
New Q values:  [-177.44732869  469.8429831   353.8832415  -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614   772.85723985]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   106.72795614   772.85723985]
New Q values:  [ -281.736      -3455.78276043   106.72795614   336.42268369]
Reward: -1  Episode Reward:  8
xxxxx
xa .x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -6859.02098822    92.93262585  -180.6       ]
------
Step:3, Action:East
State  108
Old Q Values:  [-8463.16477134  1609.55072817   534.97584496     0.        ]
New Q values:  [-8463.16477134  1609.55072817   534.17299302     0.        ]
Reward: -1  Episode Reward:  7
xxxxx
xga.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1069.2755168    314.0846357 ]
------
Step:4, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1069.2755168    314.0846357 ]
New Q values:  [-9594.56523706 -8069.05606225   541.32545423   314.0846357 ]
Reward: 9  Episode Reward:  16
xxxxx
x gax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   360.71749172 -2383.80019164 -1032.21090698]
------
Step:5, Action:South
State  136
Old Q Values:  [-5281.21195651   360.71749172 -2383.80019164 -1032.21090698]
New Q values:  [-5281.21195651   339.62935149 -2383.80019164 -1032.21090698]
Reward: 9  Episode Reward:  25
xxxxx
x  gx
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  251.87018622   633.14118268 -8489.43729461   531.09593838]
------
Step:6, Action:South
State  216
Old Q Values:  [  251.87018622   633.14118268 -8489.43729461   531.09593838]
New Q values:  [  251.87018622   372.98523174 -8489.43729461   531.09593838]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x .gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1271.08747444 -7525.53407498 -7525.7277781    381.09586222]
------
Step:7, Action:West
State  288
Old Q Values:  [-1271.08747444 -7525.53407498 -7525.7277781    381.09586222]
New Q values:  [-1271.08747444 -7525.53407498 -7525.7277781  11397.78894807]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 37466.50201062 28021.27530011]
------
Step:8, Action:East
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02 -1.65589587e+03  7.19936422e+04]
New Q values:  [ 3.77411152e+01 -1.68923075e+02  2.75637834e+03  7.19936422e+04]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x .gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1271.08747444 -7525.53407498 -7525.7277781  11397.78894807]
------
Step:9, Action:West
State  288
Old Q Values:  [-1271.08747444 -7525.53407498 -7525.7277781  11397.78894807]
New Q values:  [-1271.08747444 -7525.53407498 -7525.7277781  15798.46618242]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 37466.50201062 28021.27530011]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 37466.50201062 28021.27530011]
New Q values:  [-2527.46239811 -8521.23367799 19725.54065897 28021.27530011]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1271.08747444 -7525.53407498 -7525.7277781  15798.46618242]
------
Step:11, Action:North
State  288
Old Q Values:  [-1271.08747444 -7525.53407498 -7525.7277781  15798.46618242]
New Q values:  [ -349.70620826 -7525.53407498 -7525.7277781  15798.46618242]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  251.87018622   372.98523174 -8489.43729461   531.09593838]
------
Step:12, Action:South
State  208
Old Q Values:  [22350.79955156  4455.81491863 -1925.08326713 -1455.65174173]
New Q values:  [22350.79955156  6521.26582218 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x .gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -349.70620826 -7525.53407498 -7525.7277781  15798.46618242]
------
Step:13, Action:West
State  288
Old Q Values:  [ -349.70620826 -7525.53407498 -7525.7277781  15798.46618242]
New Q values:  [ -349.70620826 -7525.53407498 -7525.7277781  14725.169063  ]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 19725.54065897 28021.27530011]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 19725.54065897 28021.27530011]
New Q values:  [-2527.46239811 -8521.23367799 19725.54065897 11447.88841786]
Reward: 9  Episode Reward:  46
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[779.92765939  26.73544252 396.28357318 -35.88578819]
------
Step:15, Action:North
State  260
Old Q Values:  [-5523.22068636 -8695.4397473    650.42894315 -2601.74710518]
New Q values:  [ -718.90489343 -8695.4397473    650.42894315 -2601.74710518]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:16, Action:East
State  176
Old Q Values:  [76485.61294353  1327.79507613 99274.60769697     0.        ]
New Q values:  [ 76485.61294353   1327.79507613 100588.37489285      0.        ]
Reward: 100009  Episode Reward:  100054
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -349.70620826 -7525.53407498 -7525.7277781  14725.169063  ]
------
Step:1, Action:West
State  288
Old Q Values:  [ -349.70620826 -7525.53407498 -7525.7277781  14725.169063  ]
New Q values:  [ -349.70620826 -7525.53407498 -7525.7277781  27493.56028664]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3.77411152e+01 -1.68923075e+02  2.75637834e+03  7.19936422e+04]
------
Step:2, Action:West
State  273
Old Q Values:  [ 3.77411152e+01 -1.68923075e+02  2.75637834e+03  7.19936422e+04]
New Q values:  [   37.74111519  -168.92307549  2756.37833686 29036.83517974]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[779.92765939  26.73544252 396.28357318 -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [779.92765939  26.73544252 396.28357318 -35.88578819]
New Q values:  [769.58896885  26.73544252 396.28357318 -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357  558.35303952 1507.39301698  -30.99112081]
------
Step:4, Action:South
State  183
Old Q Values:  [ 471.26619624  229.79102425 4865.3190599     0.        ]
New Q values:  [ 471.26619624  322.19310035 4865.3190599     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[769.58896885  26.73544252 396.28357318 -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [769.58896885  26.73544252 396.28357318 -35.88578819]
New Q values:  [1766.83130551   26.73544252  396.28357318  -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x...x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  322.19310035 4865.3190599     0.        ]
------
Step:6, Action:East
State  183
Old Q Values:  [ 471.26619624  322.19310035 4865.3190599     0.        ]
New Q values:  [ 471.26619624  322.19310035 8871.33679154    0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  2.30660306e+04  1.20371620e+03]
------
Step:7, Action:East
State  195
Old Q Values:  [   38.85388605  2677.89620798 24212.78122121  1101.59744825]
New Q values:  [   38.85388605  2677.89620798 23527.39531294  1101.59744825]
Reward: -1  Episode Reward:  33
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.61429427e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
------
Step:8, Action:North
State  210
Old Q Values:  [4.61429427e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
New Q values:  [3.86860784e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  42
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2091.02738669  -180.00807518 67411.67107642]
------
Step:9, Action:West
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 67411.67107642]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 66271.07882663]
Reward: 9  Episode Reward:  51
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  17419.47777153 131003.36798688]
------
Step:10, Action:West
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.31516176e+05]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.32322009e+05]
Reward: 100009  Episode Reward:  100060
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  469.8429831   353.8832415  -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6         450.53517308    5.4           0.        ]
New Q values:  [-180.6       2847.0151067    5.4          0.       ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  322.19310035 8871.33679154    0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [  89.26354357  558.35303952 1507.39301698  -30.99112081]
New Q values:  [  89.26354357  558.35303952 1699.1324616   -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831  1057.56059932 -4885.64726759  1099.96026581]
New Q values:  [-5922.26708831  9139.47479365 -4885.64726759  1099.96026581]
Reward: 9  Episode Reward:  27
xxxxx
x .gx
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  2756.37833686 29036.83517974]
------
Step:4, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   806.36101138 -1714.85583801]
New Q values:  [   16.82637525 -5807.06396197   806.36101138  -150.49294355]
Reward: 9  Episode Reward:  36
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1766.83130551   26.73544252  396.28357318  -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [1766.83130551   26.73544252  396.28357318  -35.88578819]
New Q values:  [1215.87226068   26.73544252  396.28357318  -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x .gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357  558.35303952 1699.1324616   -30.99112081]
------
Step:6, Action:East
State  181
Old Q Values:  [  89.26354357  558.35303952 1699.1324616   -30.99112081]
New Q values:  [ 89.26354357 558.35303952 847.23633695 -30.99112081]
Reward: -1  Episode Reward:  34
xxxxx
x g.x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   560.61117438   174.55451539   239.04887894]
------
Step:7, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.78960710e+03 2.40118352e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 9.57151143e+02 2.40118352e+03 2.91043938e+03]
Reward: -1  Episode Reward:  33
xxxxx
xg..x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   806.36101138  -150.49294355]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 19725.54065897 11447.88841786]
New Q values:  [-2527.46239811 -8521.23367799 16143.68434958 11447.88841786]
Reward: 9  Episode Reward:  42
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -349.70620826 -7525.53407498 -7525.7277781  27493.56028664]
------
Step:9, Action:West
State  288
Old Q Values:  [ -349.70620826 -7525.53407498 -7525.7277781  27493.56028664]
New Q values:  [ -349.70620826 -7525.53407498 -7525.7277781  15839.92941953]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 16143.68434958 11447.88841786]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 16143.68434958 11447.88841786]
New Q values:  [-2527.46239811 -8521.23367799 11208.85256569 11447.88841786]
Reward: -1  Episode Reward:  40
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -349.70620826 -7525.53407498 -7525.7277781  15839.92941953]
------
Step:11, Action:North
State  288
Old Q Values:  [ -349.70620826 -7525.53407498 -7525.7277781  15839.92941953]
New Q values:  [11465.34104335 -7525.53407498 -7525.7277781  15839.92941953]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.86860784e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
------
Step:12, Action:North
State  208
Old Q Values:  [22350.79955156  6521.26582218 -1925.08326713 -1455.65174173]
New Q values:  [28827.04346861  6521.26582218 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  48
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2091.02738669  -180.00807518 66271.07882663]
------
Step:13, Action:West
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 66271.07882663]
New Q values:  [ 36041.91667283   2091.02738669   -180.00807518 125814.84192672]
Reward: 100009  Episode Reward:  100057
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  2756.37833686 29036.83517974]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 11208.85256569 11447.88841786]
New Q values:  [-2527.46239811 -8521.23367799 11208.85256569  4949.31704535]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1215.87226068   26.73544252  396.28357318  -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [1215.87226068   26.73544252  396.28357318  -35.88578819]
New Q values:  [745.91980536  26.73544252 396.28357318 -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 89.26354357 558.35303952 847.23633695 -30.99112081]
------
Step:3, Action:East
State  177
Old Q Values:  [78394.48547832 22487.97851491 20669.43452861     0.        ]
New Q values:  [78394.48547832 22487.97851491 11015.01624954     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x..gx
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9139.47479365 -4885.64726759  1099.96026581]
------
Step:4, Action:South
State  192
Old Q Values:  [3.89777037e-01 9.57151143e+02 2.40118352e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 6.24168761e+02 2.40118352e+03 2.91043938e+03]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   806.36101138  -150.49294355]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 11208.85256569  4949.31704535]
New Q values:  [-2527.46239811 -8521.23367799  9240.91985214  4949.31704535]
Reward: 9  Episode Reward:  35
xxxxx
x...x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11465.34104335 -7525.53407498 -7525.7277781  15839.92941953]
------
Step:6, Action:West
State  288
Old Q Values:  [11465.34104335 -7525.53407498 -7525.7277781  15839.92941953]
New Q values:  [11465.34104335 -7525.53407498 -7525.7277781   9107.64772345]
Reward: -1  Episode Reward:  34
xxxxx
x.g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9240.91985214  4949.31704535]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9240.91985214  4949.31704535]
New Q values:  [-2527.46239811 -8521.23367799  7135.37025386  4949.31704535]
Reward: -1  Episode Reward:  33
xxxxx
x...x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11465.34104335 -7525.53407498 -7525.7277781   9107.64772345]
------
Step:8, Action:North
State  288
Old Q Values:  [11465.34104335 -7525.53407498 -7525.7277781   9107.64772345]
New Q values:  [16191.359944   -7525.53407498 -7525.7277781   9107.64772345]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.86860784e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
------
Step:9, Action:North
State  208
Old Q Values:  [28827.04346861  6521.26582218 -1925.08326713 -1455.65174173]
New Q values:  [49280.66996546  6521.26582218 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  41
xxxxx
x..ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   2091.02738669   -180.00807518 125814.84192672]
------
Step:10, Action:West
State  128
Old Q Values:  [ 8775.70846068 26229.9965956  -8652.84       72269.26637965]
New Q values:  [ 8775.70846068 26229.9965956  -8652.84       52792.91549266]
Reward: -9991  Episode Reward:  -9950
xxxxx
x.g x
x   x
x   x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 471.26619624  322.19310035 8871.33679154    0.        ]
------
Step:1, Action:East
State  181
Old Q Values:  [ 89.26354357 558.35303952 847.23633695 -30.99112081]
New Q values:  [  89.26354357  558.35303952 3086.13697288  -30.99112081]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9139.47479365 -4885.64726759  1099.96026581]
------
Step:2, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.24168761e+02 2.40118352e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.39567858e+03 2.40118352e+03 2.91043938e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7135.37025386  4949.31704535]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7135.37025386  4949.31704535]
New Q values:  [-2527.46239811 -8521.23367799  7710.95608474  4949.31704535]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16191.359944   -7525.53407498 -7525.7277781   9107.64772345]
------
Step:4, Action:North
State  288
Old Q Values:  [16191.359944   -7525.53407498 -7525.7277781   9107.64772345]
New Q values:  [18087.76750426 -7525.53407498 -7525.7277781   9107.64772345]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.86860784e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
------
Step:5, Action:North
State  208
Old Q Values:  [49280.66996546  6521.26582218 -1925.08326713 -1455.65174173]
New Q values:  [57462.1205642   6521.26582218 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  35
xxxxx
x..ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   2091.02738669   -180.00807518 125814.84192672]
------
Step:6, Action:West
State  128
Old Q Values:  [ 8775.70846068 26229.9965956  -8652.84       52792.91549266]
New Q values:  [ 8775.70846068 26229.9965956  -8652.84       21201.9676666 ]
Reward: 9  Episode Reward:  44
xxxxx
x.agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.           264.6715651  -5999.38454759     0.        ]
------
Step:7, Action:South
State  113
Old Q Values:  [    0.           264.6715651  -5999.38454759     0.        ]
New Q values:  [    0.          2847.11106414 -5999.38454759     0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x. gx
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9139.47479365 -4885.64726759  1099.96026581]
------
Step:8, Action:South
State  193
Old Q Values:  [-5922.26708831  9139.47479365 -4885.64726759  1099.96026581]
New Q values:  [-5922.26708831 12366.24047138 -4885.64726759  1099.96026581]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  2756.37833686 29036.83517974]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7710.95608474  4949.31704535]
New Q values:  [-2527.46239811 -8521.23367799  7710.95608474 13746.63763251]
Reward: 9  Episode Reward:  51
xxxxx
x.  x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39205.03604789  2256.66526474  5231.26374966  1875.31501677]
------
Step:10, Action:North
State  256
Old Q Values:  [76193.91312679 18175.54694409  5576.40109469   644.94785455]
New Q values:  [54653.47771857 18175.54694409  5576.40109469   644.94785455]
Reward: -10001  Episode Reward:  -9950
xxxxx
x.  x
xg  x
x   x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357  558.35303952 3086.13697288  -30.99112081]
------
Step:1, Action:East
State  189
Old Q Values:  [  9.84673294 812.58788297 103.53268112 154.04646645]
New Q values:  [   9.84673294  812.58788297 1137.58832726  154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831 12366.24047138 -4885.64726759  1099.96026581]
New Q values:  [-5922.26708831 13662.94674247 -4885.64726759  1099.96026581]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  2756.37833686 29036.83517974]
------
Step:3, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   806.36101138  -150.49294355]
New Q values:  [   16.82637525 -5807.06396197   806.36101138   168.97876419]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[745.91980536  26.73544252 396.28357318 -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [745.91980536  26.73544252 396.28357318 -35.88578819]
New Q values:  [1223.60901401   26.73544252  396.28357318  -35.88578819]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357  558.35303952 3086.13697288  -30.99112081]
------
Step:5, Action:East
State  177
Old Q Values:  [78394.48547832 22487.97851491 11015.01624954     0.        ]
New Q values:  [78394.48547832 22487.97851491  8504.29052256     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x. gx
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 13662.94674247 -4885.64726759  1099.96026581]
------
Step:6, Action:South
State  193
Old Q Values:  [-5922.26708831 13662.94674247 -4885.64726759  1099.96026581]
New Q values:  [-5922.26708831  5868.45017799 -4885.64726759  1099.96026581]
Reward: -1  Episode Reward:  24
xxxxx
x. .x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.         1346.23826999  853.66323244]
------
Step:7, Action:East
State  277
Old Q Values:  [   1.64433       0.         1346.23826999  853.66323244]
New Q values:  [  1.64433      0.         -29.77444073 853.66323244]
Reward: -9991  Episode Reward:  -9967
xxxxx
x. .x
x  .x
x  gx
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   541.32545423   314.0846357 ]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   541.32545423   314.0846357 ]
New Q values:  [-9594.56523706 -8069.05606225   323.81898714   314.0846357 ]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   339.62935149 -2383.80019164 -1032.21090698]
------
Step:2, Action:South
State  136
Old Q Values:  [-5281.21195651   339.62935149 -2383.80019164 -1032.21090698]
New Q values:  [-5281.21195651   300.58052211 -2383.80019164 -1032.21090698]
Reward: 9  Episode Reward:  18
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  251.87018622   372.98523174 -8489.43729461   531.09593838]
------
Step:3, Action:West
State  216
Old Q Values:  [  251.87018622   372.98523174 -8489.43729461   531.09593838]
New Q values:  [  251.87018622   372.98523174 -8489.43729461 11334.36236516]
Reward: 9  Episode Reward:  27
xxxxx
x g x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  37055.07996604   425.06225586   408.67479662]
------
Step:4, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.39567858e+03 2.40118352e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 5.08766272e+03 2.40118352e+03 2.91043938e+03]
Reward: 9  Episode Reward:  36
xxxxx
xg  x
x.  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7710.95608474 13746.63763251]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7710.95608474 13746.63763251]
New Q values:  [-2527.46239811 -8521.23367799  7710.95608474 17265.56586737]
Reward: 9  Episode Reward:  45
xxxxx
x g x
x.  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39205.03604789  2256.66526474  5231.26374966  1875.31501677]
------
Step:6, Action:North
State  261
Old Q Values:  [1223.60901401   26.73544252  396.28357318  -35.88578819]
New Q values:  [836.12010378  26.73544252 396.28357318 -35.88578819]
Reward: 9  Episode Reward:  54
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  812.58788297 1137.58832726  154.04646645]
------
Step:7, Action:South
State  189
Old Q Values:  [   9.84673294  812.58788297 1137.58832726  154.04646645]
New Q values:  [   9.84673294  575.27118432 1137.58832726  154.04646645]
Reward: -1  Episode Reward:  53
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[836.12010378  26.73544252 396.28357318 -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [836.12010378  26.73544252 396.28357318 -35.88578819]
New Q values:  [675.12453969  26.73544252 396.28357318 -35.88578819]
Reward: -1  Episode Reward:  52
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  575.27118432 1137.58832726  154.04646645]
------
Step:9, Action:East
State  189
Old Q Values:  [   9.84673294  575.27118432 1137.58832726  154.04646645]
New Q values:  [  9.84673294 575.27118432 789.14993066 154.04646645]
Reward: -1  Episode Reward:  51
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         1115.71533251  815.31186329  441.58769553]
------
Step:10, Action:South
State  205
Old Q Values:  [  0.         121.94855967   0.         198.38683706]
New Q values:  [  0.         304.2783936    0.         198.38683706]
Reward: -1  Episode Reward:  50
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         -29.77444073 853.66323244]
------
Step:11, Action:West
State  277
Old Q Values:  [  1.64433      0.         -29.77444073 853.66323244]
New Q values:  [  1.64433      0.         -29.77444073 543.40265488]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[675.12453969  26.73544252 396.28357318 -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [675.12453969  26.73544252 396.28357318 -35.88578819]
New Q values:  [506.19479507  26.73544252 396.28357318 -35.88578819]
Reward: -1  Episode Reward:  48
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  9.84673294 575.27118432 789.14993066 154.04646645]
------
Step:13, Action:East
State  189
Old Q Values:  [  9.84673294 575.27118432 789.14993066 154.04646645]
New Q values:  [  9.84673294 575.27118432 649.77457202 154.04646645]
Reward: -1  Episode Reward:  47
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         1115.71533251  815.31186329  441.58769553]
------
Step:14, Action:South
State  204
Old Q Values:  [   0.         1115.71533251  815.31186329  441.58769553]
New Q values:  [  0.         687.59443642 815.31186329 441.58769553]
Reward: -1  Episode Reward:  46
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   806.36101138   168.97876419]
------
Step:15, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7710.95608474 17265.56586737]
New Q values:  [-2527.46239811 -8521.23367799 68516.11268518 17265.56586737]
Reward: 100009  Episode Reward:  100055
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[506.19479507  26.73544252 396.28357318 -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [506.19479507  26.73544252 396.28357318 -35.88578819]
New Q values:  [1133.71900989   26.73544252  396.28357318  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357  558.35303952 3086.13697288  -30.99112081]
------
Step:2, Action:East
State  181
Old Q Values:  [  89.26354357  558.35303952 3086.13697288  -30.99112081]
New Q values:  [  89.26354357  558.35303952 3000.38984255  -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5868.45017799 -4885.64726759  1099.96026581]
------
Step:3, Action:South
State  192
Old Q Values:  [3.89777037e-01 5.08766272e+03 2.40118352e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.28237339e+03 2.40118352e+03 2.91043938e+03]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   806.36101138   168.97876419]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 68516.11268518 17265.56586737]
New Q values:  [-2527.46239811 -8521.23367799 32838.17532535 17265.56586737]
Reward: 9  Episode Reward:  36
xxxxx
xg. x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18087.76750426 -7525.53407498 -7525.7277781   9107.64772345]
------
Step:5, Action:North
State  288
Old Q Values:  [18087.76750426 -7525.53407498 -7525.7277781   9107.64772345]
New Q values:  [24479.14317096 -7525.53407498 -7525.7277781   9107.64772345]
Reward: 9  Episode Reward:  45
xxxxx
x.. x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[57462.1205642   6521.26582218 -1925.08326713 -1455.65174173]
------
Step:6, Action:North
State  208
Old Q Values:  [57462.1205642   6521.26582218 -1925.08326713 -1455.65174173]
New Q values:  [30853.24720436  6521.26582218 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  44
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068 26229.9965956  -8652.84       21201.9676666 ]
------
Step:7, Action:South
State  128
Old Q Values:  [ 8775.70846068 26229.9965956  -8652.84       21201.9676666 ]
New Q values:  [ 8775.70846068 19747.37279955 -8652.84       21201.9676666 ]
Reward: -1  Episode Reward:  43
xxxxx
x.g x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30853.24720436  6521.26582218 -1925.08326713 -1455.65174173]
------
Step:8, Action:North
State  208
Old Q Values:  [30853.24720436  6521.26582218 -1925.08326713 -1455.65174173]
New Q values:  [18701.28918172  6521.26582218 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  42
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068 19747.37279955 -8652.84       21201.9676666 ]
------
Step:9, Action:West
State  128
Old Q Values:  [ 8775.70846068 19747.37279955 -8652.84       21201.9676666 ]
New Q values:  [ 8775.70846068 19747.37279955 -8652.84       32365.99600744]
Reward: -9991  Episode Reward:  -9949
xxxxx
x.g x
x   x
x   x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -6859.02098822    92.93262585  -180.6       ]
------
Step:1, Action:East
State  110
Old Q Values:  [ -239.29051573 -6859.02098822    92.93262585  -180.6       ]
New Q values:  [ -239.29051573 -6859.02098822   143.49985545  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614   336.42268369]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   106.72795614   336.42268369]
New Q values:  [ -281.736      -3455.78276043   106.72795614   274.92196841]
Reward: -1  Episode Reward:  8
xxxxx
xa .x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  469.8429831   353.8832415  -120.29354603]
------
Step:3, Action:South
State  109
Old Q Values:  [-241.10880094  792.38466098   -8.57207238 -180.6       ]
New Q values:  [-241.10880094 1222.47081715   -8.57207238 -180.6       ]
Reward: 9  Episode Reward:  17
xxxxx
x g.x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  89.26354357  558.35303952 3000.38984255  -30.99112081]
------
Step:4, Action:East
State  180
Old Q Values:  [-1120.77770304   694.85009505  1092.67953962 -4966.32149798]
New Q values:  [-1120.77770304   694.85009505   610.65516816 -4966.32149798]
Reward: 9  Episode Reward:  26
xxxxx
xg .x
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   560.61117438   174.55451539   239.04887894]
------
Step:5, Action:South
State  196
Old Q Values:  [-2469.90645144   560.61117438   174.55451539   239.04887894]
New Q values:  [-2469.90645144   471.55277317   174.55451539   239.04887894]
Reward: 9  Episode Reward:  35
xxxxx
x  .x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   806.36101138   168.97876419]
------
Step:6, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197   806.36101138   168.97876419]
New Q values:  [   16.82637525 -5807.06396197  7671.68735584   168.97876419]
Reward: 9  Episode Reward:  44
xxxxx
xg .x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[24479.14317096 -7525.53407498 -7525.7277781   9107.64772345]
------
Step:7, Action:North
State  288
Old Q Values:  [24479.14317096 -7525.53407498 -7525.7277781   9107.64772345]
New Q values:  [15407.4440229  -7525.53407498 -7525.7277781   9107.64772345]
Reward: 9  Episode Reward:  53
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18701.28918172  6521.26582218 -1925.08326713 -1455.65174173]
------
Step:8, Action:North
State  208
Old Q Values:  [18701.28918172  6521.26582218 -1925.08326713 -1455.65174173]
New Q values:  [77195.71447492  6521.26582218 -1925.08326713 -1455.65174173]
Reward: 100009  Episode Reward:  100062
xxxxx
xg ax
x   x
x   x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -718.90489343 -8695.4397473    650.42894315 -2601.74710518]
------
Step:1, Action:East
State  261
Old Q Values:  [1133.71900989   26.73544252  396.28357318  -35.88578819]
New Q values:  [1133.71900989   26.73544252 2465.41963602  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  7671.68735584   168.97876419]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 32838.17532535 17265.56586737]
New Q values:  [-2527.46239811 -8521.23367799 17762.90333701 17265.56586737]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15407.4440229  -7525.53407498 -7525.7277781   9107.64772345]
------
Step:3, Action:North
State  288
Old Q Values:  [15407.4440229  -7525.53407498 -7525.7277781   9107.64772345]
New Q values:  [29327.09195164 -7525.53407498 -7525.7277781   9107.64772345]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[77195.71447492  6521.26582218 -1925.08326713 -1455.65174173]
------
Step:4, Action:North
State  208
Old Q Values:  [77195.71447492  6521.26582218 -1925.08326713 -1455.65174173]
New Q values:  [68628.13836798  6521.26582218 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  36
xxxxx
x .ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   2091.02738669   -180.00807518 125814.84192672]
------
Step:5, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.60405899e+04]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  6.50411256e+03]
Reward: 9  Episode Reward:  45
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614   274.92196841]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   106.72795614   274.92196841]
New Q values:  [ -281.736      -3455.78276043   106.72795614   173.49403538]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   213.75082673  -180.6       ]
------
Step:7, Action:East
State  110
Old Q Values:  [ -239.29051573 -6859.02098822   143.49985545  -180.6       ]
New Q values:  [ -239.29051573 -6859.02098822   108.84815279  -180.6       ]
Reward: -1  Episode Reward:  43
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614   173.49403538]
------
Step:8, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   323.81898714   314.0846357 ]
New Q values:  [-9594.56523706 -8069.05606225   323.81898714 -5551.46558365]
Reward: -10001  Episode Reward:  -9958
xxxxx
xg  x
x.. x
x   x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 17762.90333701 17265.56586737]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 17762.90333701 17265.56586737]
New Q values:  [-2527.46239811 -8521.23367799 15908.6889203  17265.56586737]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[29327.09195164 -7525.53407498 -7525.7277781   9107.64772345]
------
Step:2, Action:North
State  288
Old Q Values:  [29327.09195164 -7525.53407498 -7525.7277781   9107.64772345]
New Q values:  [32324.67829105 -7525.53407498 -7525.7277781   9107.64772345]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x .ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[68628.13836798  6521.26582218 -1925.08326713 -1455.65174173]
------
Step:3, Action:North
State  208
Old Q Values:  [68628.13836798  6521.26582218 -1925.08326713 -1455.65174173]
New Q values:  [65201.10792521  6521.26582218 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   2091.02738669   -180.00807518 125814.84192672]
------
Step:4, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  6.50411256e+03]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  2.65909324e+03]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614   173.49403538]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   106.72795614   173.49403538]
New Q values:  [ -281.736      -3455.78276043   106.72795614   107.45205999]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -6859.02098822   108.84815279  -180.6       ]
------
Step:6, Action:East
State  106
Old Q Values:  [ -180.6        -8952.15415062   213.75082673  -180.6       ]
New Q values:  [ -180.6        -8952.15415062   117.13594869  -180.6       ]
Reward: -1  Episode Reward:  44
xxxxx
x a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614   107.45205999]
------
Step:7, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   323.81898714 -5551.46558365]
New Q values:  [-9594.56523706 -8069.05606225   323.81898714 -7897.68567139]
Reward: -10001  Episode Reward:  -9957
xxxxx
xg  x
x . x
x.  x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  469.8429831   353.8832415  -120.29354603]
------
Step:1, Action:South
State  110
Old Q Values:  [ -239.29051573 -6859.02098822   108.84815279  -180.6       ]
New Q values:  [ -239.29051573 -8529.75336677   108.84815279  -180.6       ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x ..x
xg .x
x...x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.28237339e+03 2.40118352e+03 2.91043938e+03]
------
Step:1, Action:West
State  201
Old Q Values:  [ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.40000000e-02]
New Q values:  [ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.00341972e+02]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  9.84673294 575.27118432 649.77457202 154.04646645]
------
Step:2, Action:East
State  189
Old Q Values:  [  9.84673294 575.27118432 649.77457202 154.04646645]
New Q values:  [   9.84673294  575.27118432 1350.08508362  154.04646645]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.00341972e+02]
------
Step:3, Action:South
State  199
Old Q Values:  [  22.48535485 2214.77383591  549.89931413 1915.70494401]
New Q values:  [  22.48535485 9602.36008829  549.89931413 1915.70494401]
Reward: 9  Episode Reward:  17
xxxxx
x. .x
x  .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  2756.37833686 29036.83517974]
------
Step:4, Action:West
State  277
Old Q Values:  [  1.64433      0.         -29.77444073 543.40265488]
New Q values:  [  1.64433      0.         -29.77444073 962.38695276]
Reward: 9  Episode Reward:  26
xxxxx
x. .x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252 2465.41963602  -35.88578819]
------
Step:5, Action:East
State  261
Old Q Values:  [1133.71900989   26.73544252 2465.41963602  -35.88578819]
New Q values:  [1133.71900989   26.73544252 3287.07406116  -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x. .x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  7671.68735584   168.97876419]
------
Step:6, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  7671.68735584   168.97876419]
New Q values:  [   16.82637525 -5807.06396197 12771.47842965   168.97876419]
Reward: 9  Episode Reward:  34
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[32324.67829105 -7525.53407498 -7525.7277781   9107.64772345]
------
Step:7, Action:North
State  288
Old Q Values:  [32324.67829105 -7525.53407498 -7525.7277781   9107.64772345]
New Q values:  [32495.60369398 -7525.53407498 -7525.7277781   9107.64772345]
Reward: 9  Episode Reward:  43
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[65201.10792521  6521.26582218 -1925.08326713 -1455.65174173]
------
Step:8, Action:North
State  208
Old Q Values:  [65201.10792521  6521.26582218 -1925.08326713 -1455.65174173]
New Q values:  [63830.2957481   6521.26582218 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  52
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283   2091.02738669   -180.00807518 125814.84192672]
------
Step:9, Action:West
State  130
Old Q Values:  [ 36041.91667283   2091.02738669   -180.00807518 125814.84192672]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 89626.34716675]
Reward: -1  Episode Reward:  51
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  17419.47777153 131003.36798688]
------
Step:10, Action:West
State  112
Old Q Values:  [    0.         11059.61439394  6789.02994987 99599.363136  ]
New Q values:  [    0.         11059.61439394  6789.02994987 99845.1452544 ]
Reward: 100009  Episode Reward:  100060
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1120.77770304   694.85009505   610.65516816 -4966.32149798]
------
Step:1, Action:South
State  181
Old Q Values:  [  89.26354357  558.35303952 3000.38984255  -30.99112081]
New Q values:  [  89.26354357 1214.86343416 3000.38984255  -30.99112081]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252 3287.07406116  -35.88578819]
------
Step:2, Action:East
State  261
Old Q Values:  [1133.71900989   26.73544252 3287.07406116  -35.88578819]
New Q values:  [ 1133.71900989    26.73544252 10031.28017839   -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  2756.37833686 29036.83517974]
------
Step:3, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  2756.37833686 29036.83517974]
New Q values:  [   37.74111519  -168.92307549  2756.37833686 14623.51812541]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1133.71900989    26.73544252 10031.28017839   -35.88578819]
------
Step:4, Action:East
State  261
Old Q Values:  [ 1133.71900989    26.73544252 10031.28017839   -35.88578819]
New Q values:  [1133.71900989   26.73544252 8398.96750898  -35.88578819]
Reward: -1  Episode Reward:  16
xxxxx
x .gx
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  2756.37833686 14623.51812541]
------
Step:5, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  2756.37833686 14623.51812541]
New Q values:  [  37.74111519 -168.92307549 2756.37833686 8368.49750286]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252 8398.96750898  -35.88578819]
------
Step:6, Action:East
State  261
Old Q Values:  [1133.71900989   26.73544252 8398.96750898  -35.88578819]
New Q values:  [1133.71900989   26.73544252 5869.53625445  -35.88578819]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x ..x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2756.37833686 8368.49750286]
------
Step:7, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2756.37833686 8368.49750286]
New Q values:  [  37.74111519 -168.92307549 2756.37833686 5107.65987748]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252 5869.53625445  -35.88578819]
------
Step:8, Action:East
State  261
Old Q Values:  [1133.71900989   26.73544252 5869.53625445  -35.88578819]
New Q values:  [1133.71900989   26.73544252 3879.51246502  -35.88578819]
Reward: -1  Episode Reward:  12
xxxxx
x .gx
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2756.37833686 5107.65987748]
------
Step:9, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197 12771.47842965   168.97876419]
New Q values:  [   16.82637525 -5807.06396197 12771.47842965  1230.84524518]
Reward: -1  Episode Reward:  11
xxxxx
x g.x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252 3879.51246502  -35.88578819]
------
Step:10, Action:East
State  261
Old Q Values:  [1133.71900989   26.73544252 3879.51246502  -35.88578819]
New Q values:  [1133.71900989   26.73544252 3083.50294925  -35.88578819]
Reward: -1  Episode Reward:  10
xxxxx
x .gx
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2756.37833686 5107.65987748]
------
Step:11, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197 12771.47842965  1230.84524518]
New Q values:  [   16.82637525 -5807.06396197 12771.47842965  1416.78898285]
Reward: -1  Episode Reward:  9
xxxxx
x g.x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252 3083.50294925  -35.88578819]
------
Step:12, Action:East
State  261
Old Q Values:  [1133.71900989   26.73544252 3083.50294925  -35.88578819]
New Q values:  [1133.71900989   26.73544252 2765.09914294  -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x .gx
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2756.37833686 5107.65987748]
------
Step:13, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2756.37833686 5107.65987748]
New Q values:  [  37.74111519 -168.92307549 2756.37833686 2871.99369387]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252 2765.09914294  -35.88578819]
------
Step:14, Action:East
State  261
Old Q Values:  [1133.71900989   26.73544252 2765.09914294  -35.88578819]
New Q values:  [1133.71900989   26.73544252 1967.03776534  -35.88578819]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
x ..x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2756.37833686 2871.99369387]
------
Step:15, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2756.37833686 2871.99369387]
New Q values:  [  37.74111519 -168.92307549 2756.37833686 1738.30880715]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252 1967.03776534  -35.88578819]
------
Step:16, Action:East
State  261
Old Q Values:  [1133.71900989   26.73544252 1967.03776534  -35.88578819]
New Q values:  [1133.71900989   26.73544252 4617.65863503  -35.88578819]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197 12771.47842965  1416.78898285]
------
Step:17, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197 12771.47842965  1416.78898285]
New Q values:  [   16.82637525 -5807.06396197 14862.67248006  1416.78898285]
Reward: 9  Episode Reward:  13
xxxxx
x g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[32495.60369398 -7525.53407498 -7525.7277781   9107.64772345]
------
Step:18, Action:North
State  288
Old Q Values:  [32495.60369398 -7525.53407498 -7525.7277781   9107.64772345]
New Q values:  [32152.73020202 -7525.53407498 -7525.7277781   9107.64772345]
Reward: 9  Episode Reward:  22
xxxxx
xg..x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[63830.2957481   6521.26582218 -1925.08326713 -1455.65174173]
------
Step:19, Action:North
State  208
Old Q Values:  [63830.2957481   6521.26582218 -1925.08326713 -1455.65174173]
New Q values:  [52425.42244926  6521.26582218 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  31
xxxxx
x .ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2091.02738669  -180.00807518 89626.34716675]
------
Step:20, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  2.65909324e+03]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.10127291e+03]
Reward: 9  Episode Reward:  40
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614   107.45205999]
------
Step:21, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   106.72795614   107.45205999]
New Q values:  [ -281.736      -3455.78276043   106.72795614   183.33371893]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  469.8429831   353.8832415  -120.29354603]
------
Step:22, Action:South
State  99
Old Q Values:  [    0.         29717.9609894  65700.46302897     0.        ]
New Q values:  [    0.         35404.93003926 65700.46302897     0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 22487.97851491  8504.29052256     0.        ]
------
Step:23, Action:North
State  181
Old Q Values:  [  89.26354357 1214.86343416 3000.38984255  -30.99112081]
New Q values:  [ 401.84666257 1214.86343416 3000.38984255  -30.99112081]
Reward: -1  Episode Reward:  37
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1222.47081715   -8.57207238 -180.6       ]
------
Step:24, Action:South
State  108
Old Q Values:  [-8463.16477134  1609.55072817   534.17299302     0.        ]
New Q values:  [-8463.16477134   851.67531979   534.17299302     0.        ]
Reward: -1  Episode Reward:  36
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1120.77770304   694.85009505   610.65516816 -4966.32149798]
------
Step:25, Action:South
State  181
Old Q Values:  [ 401.84666257 1214.86343416 3000.38984255  -30.99112081]
New Q values:  [ 401.84666257 1870.64296417 3000.38984255  -30.99112081]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252 4617.65863503  -35.88578819]
------
Step:26, Action:East
State  261
Old Q Values:  [1133.71900989   26.73544252 4617.65863503  -35.88578819]
New Q values:  [1133.71900989   26.73544252 2673.37695507  -35.88578819]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2756.37833686 1738.30880715]
------
Step:27, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 2756.37833686 1738.30880715]
New Q values:  [   37.74111519  -168.92307549 10747.77039535  1738.30880715]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[32152.73020202 -7525.53407498 -7525.7277781   9107.64772345]
------
Step:28, Action:West
State  288
Old Q Values:  [32152.73020202 -7525.53407498 -7525.7277781   9107.64772345]
New Q values:  [32152.73020202 -7525.53407498 -7525.7277781   6866.79020799]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 10747.77039535  1738.30880715]
------
Step:29, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 10747.77039535  1738.30880715]
New Q values:  [   37.74111519  -168.92307549 10747.77039535  1496.73660938]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252 2673.37695507  -35.88578819]
------
Step:30, Action:East
State  261
Old Q Values:  [1133.71900989   26.73544252 2673.37695507  -35.88578819]
New Q values:  [1133.71900989   26.73544252 4293.08190063  -35.88578819]
Reward: -1  Episode Reward:  30
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 10747.77039535  1496.73660938]
------
Step:31, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 10747.77039535  1496.73660938]
New Q values:  [   37.74111519  -168.92307549 13944.32721875  1496.73660938]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[32152.73020202 -7525.53407498 -7525.7277781   6866.79020799]
------
Step:32, Action:West
State  288
Old Q Values:  [32152.73020202 -7525.53407498 -7525.7277781   6866.79020799]
New Q values:  [32152.73020202 -7525.53407498 -7525.7277781   7925.78584341]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 15908.6889203  17265.56586737]
------
Step:33, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 15908.6889203  17265.56586737]
New Q values:  [-2527.46239811 -8521.23367799 15908.6889203   8193.55091714]
Reward: -1  Episode Reward:  27
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252 4293.08190063  -35.88578819]
------
Step:34, Action:East
State  257
Old Q Values:  [39205.03604789  2256.66526474  5231.26374966  1875.31501677]
New Q values:  [39205.03604789  2256.66526474  6864.51217595  1875.31501677]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 15908.6889203   8193.55091714]
------
Step:35, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 15908.6889203   8193.55091714]
New Q values:  [-2527.46239811 -8521.23367799 16008.69462872  8193.55091714]
Reward: -1  Episode Reward:  25
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[32152.73020202 -7525.53407498 -7525.7277781   7925.78584341]
------
Step:36, Action:North
State  288
Old Q Values:  [32152.73020202 -7525.53407498 -7525.7277781   7925.78584341]
New Q values:  [28588.11881559 -7525.53407498 -7525.7277781   7925.78584341]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[52425.42244926  6521.26582218 -1925.08326713 -1455.65174173]
------
Step:37, Action:North
State  208
Old Q Values:  [52425.42244926  6521.26582218 -1925.08326713 -1455.65174173]
New Q values:  [21059.74313634  6521.26582218 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  23
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651   300.58052211 -2383.80019164 -1032.21090698]
------
Step:38, Action:South
State  136
Old Q Values:  [-5281.21195651   300.58052211 -2383.80019164 -1032.21090698]
New Q values:  [-5281.21195651  6437.55514975 -2383.80019164 -1032.21090698]
Reward: -1  Episode Reward:  22
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21059.74313634  6521.26582218 -1925.08326713 -1455.65174173]
------
Step:39, Action:South
State  208
Old Q Values:  [21059.74313634  6521.26582218 -1925.08326713 -1455.65174173]
New Q values:  [21059.74313634 11184.34197355 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  21
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[28588.11881559 -7525.53407498 -7525.7277781   7925.78584341]
------
Step:40, Action:North
State  288
Old Q Values:  [28588.11881559 -7525.53407498 -7525.7277781   7925.78584341]
New Q values:  [17752.57046714 -7525.53407498 -7525.7277781   7925.78584341]
Reward: -1  Episode Reward:  20
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21059.74313634 11184.34197355 -1925.08326713 -1455.65174173]
------
Step:41, Action:South
State  208
Old Q Values:  [21059.74313634 11184.34197355 -1925.08326713 -1455.65174173]
New Q values:  [21059.74313634  9798.90792956 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  19
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17752.57046714 -7525.53407498 -7525.7277781   7925.78584341]
------
Step:42, Action:North
State  288
Old Q Values:  [17752.57046714 -7525.53407498 -7525.7277781   7925.78584341]
New Q values:  [13418.35112776 -7525.53407498 -7525.7277781   7925.78584341]
Reward: -1  Episode Reward:  18
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21059.74313634  9798.90792956 -1925.08326713 -1455.65174173]
------
Step:43, Action:South
State  208
Old Q Values:  [21059.74313634  9798.90792956 -1925.08326713 -1455.65174173]
New Q values:  [21059.74313634  7944.46851015 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  17
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13418.35112776 -7525.53407498 -7525.7277781   7925.78584341]
------
Step:44, Action:North
State  288
Old Q Values:  [13418.35112776 -7525.53407498 -7525.7277781   7925.78584341]
New Q values:  [11684.663392   -7525.53407498 -7525.7277781   7925.78584341]
Reward: -1  Episode Reward:  16
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21059.74313634  7944.46851015 -1925.08326713 -1455.65174173]
------
Step:45, Action:South
State  208
Old Q Values:  [21059.74313634  7944.46851015 -1925.08326713 -1455.65174173]
New Q values:  [21059.74313634  6682.58642166 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11684.663392   -7525.53407498 -7525.7277781   7925.78584341]
------
Step:46, Action:West
State  288
Old Q Values:  [11684.663392   -7525.53407498 -7525.7277781   7925.78584341]
New Q values:  [11684.663392   -7525.53407498 -7525.7277781   7353.01250299]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 13944.32721875  1496.73660938]
------
Step:47, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549 13944.32721875  1496.73660938]
New Q values:  [   37.74111519  -168.92307549 13944.32721875  1886.01921394]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x . x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252 4293.08190063  -35.88578819]
------
Step:48, Action:East
State  261
Old Q Values:  [1133.71900989   26.73544252 4293.08190063  -35.88578819]
New Q values:  [1133.71900989   26.73544252  519.24114887  -35.88578819]
Reward: -10001  Episode Reward:  -9988
xxxxx
x   x
x . x
x g x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 16008.69462872  8193.55091714]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 16008.69462872  8193.55091714]
New Q values:  [-2527.46239811 -8521.23367799  9914.27686909  8193.55091714]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11684.663392   -7525.53407498 -7525.7277781   7353.01250299]
------
Step:2, Action:North
State  288
Old Q Values:  [11684.663392   -7525.53407498 -7525.7277781   7353.01250299]
New Q values:  [10997.1882977  -7525.53407498 -7525.7277781   7353.01250299]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21059.74313634  6682.58642166 -1925.08326713 -1455.65174173]
------
Step:3, Action:North
State  208
Old Q Values:  [21059.74313634  6682.58642166 -1925.08326713 -1455.65174173]
New Q values:  [ 8759.67912821  6682.58642166 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  27
xxxxx
x .ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.10127291e+03]
------
Step:4, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.10127291e+03]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  5.00909281e+02]
Reward: 9  Episode Reward:  36
xxxxx
x a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614   183.33371893]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   106.72795614   183.33371893]
New Q values:  [ -281.736      -3455.78276043   106.72795614   107.87427218]
Reward: -1  Episode Reward:  35
xxxxx
xa  x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   117.13594869  -180.6       ]
------
Step:6, Action:East
State  107
Old Q Values:  [-252.35169558  782.19810804  303.62422203 -252.78192178]
New Q values:  [-252.35169558  782.19810804  153.21197047 -252.78192178]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614   107.87427218]
------
Step:7, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   323.81898714 -7897.68567139]
New Q values:  [-9594.56523706 -8069.05606225   323.81898714 -3116.16865578]
Reward: -1  Episode Reward:  33
xxxxx
xag x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         145.01870926   65.14560537    0.        ]
------
Step:8, Action:South
State  107
Old Q Values:  [-252.35169558  782.19810804  153.21197047 -252.78192178]
New Q values:  [-252.35169558  970.18086207  153.21197047 -252.78192178]
Reward: 9  Episode Reward:  42
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   9.40190913    0.         2173.00539618 -178.98      ]
------
Step:9, Action:North
State  183
Old Q Values:  [ 471.26619624  322.19310035 8871.33679154    0.        ]
New Q values:  [ 478.96073712  322.19310035 8871.33679154    0.        ]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  970.18086207  153.21197047 -252.78192178]
------
Step:10, Action:South
State  110
Old Q Values:  [ -239.29051573 -8529.75336677   108.84815279  -180.6       ]
New Q values:  [ -239.29051573 -1921.51796559   108.84815279  -180.6       ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:11, Action:East
State  184
Old Q Values:  [-2081.09028721     0.          7348.75703021     0.        ]
New Q values:  [-2081.09028721     0.         14061.4268019      0.        ]
Reward: 9  Episode Reward:  49
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  37055.07996604   425.06225586   408.67479662]
------
Step:12, Action:South
State  200
Old Q Values:  [   62.8218634  37055.07996604   425.06225586   408.67479662]
New Q values:  [   62.8218634  17795.71504714   425.06225586   408.67479662]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9914.27686909  8193.55091714]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9914.27686909  8193.55091714]
New Q values:  [-2527.46239811 -8521.23367799  7264.26723695  8193.55091714]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10997.1882977  -7525.53407498 -7525.7277781   7353.01250299]
------
Step:14, Action:North
State  288
Old Q Values:  [10997.1882977  -7525.53407498 -7525.7277781   7353.01250299]
New Q values:  [16004.09884574 -7525.53407498 -7525.7277781   7353.01250299]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.86860784e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
------
Step:15, Action:North
State  218
Old Q Values:  [12217.38301112   848.96225083     0.           429.03841886]
New Q values:  [5036.62598862  848.96225083    0.          429.03841886]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  5.00909281e+02]
------
Step:16, Action:West
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 89626.34716675]
New Q values:  [36041.91667283  2091.02738669  -180.00807518 75150.94926276]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  17419.47777153 131003.36798688]
------
Step:17, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   106.72795614   107.87427218]
New Q values:  [ -281.736      -3455.78276043   106.72795614    77.69049348]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   117.13594869  -180.6       ]
------
Step:18, Action:East
State  107
Old Q Values:  [-252.35169558  970.18086207  153.21197047 -252.78192178]
New Q values:  [-252.35169558  970.18086207   92.70317503 -252.78192178]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614    77.69049348]
------
Step:19, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   323.81898714 -3116.16865578]
New Q values:  [-9594.56523706 -8069.05606225  2060.19413978 -3116.16865578]
Reward: -1  Episode Reward:  41
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  6437.55514975 -2383.80019164 -1032.21090698]
------
Step:20, Action:South
State  136
Old Q Values:  [-5281.21195651  6437.55514975 -2383.80019164 -1032.21090698]
New Q values:  [-5281.21195651  5974.73076945 -2383.80019164 -1032.21090698]
Reward: -1  Episode Reward:  40
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  251.87018622   372.98523174 -8489.43729461 11334.36236516]
------
Step:21, Action:West
State  216
Old Q Values:  [  251.87018622   372.98523174 -8489.43729461 11334.36236516]
New Q values:  [  251.87018622   372.98523174 -8489.43729461  5623.92020088]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.00341972e+02]
------
Step:22, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  3.63591752e+03 -8.94356769e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  5.63706517e+03 -8.94356769e+03  2.00341972e+02]
Reward: -1  Episode Reward:  38
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 13944.32721875  1886.01921394]
------
Step:23, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 13944.32721875  1886.01921394]
New Q values:  [   37.74111519  -168.92307549 10378.36054122  1886.01921394]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16004.09884574 -7525.53407498 -7525.7277781   7353.01250299]
------
Step:24, Action:West
State  288
Old Q Values:  [16004.09884574 -7525.53407498 -7525.7277781   7353.01250299]
New Q values:  [16004.09884574 -7525.53407498 -7525.7277781   6054.11316356]
Reward: -1  Episode Reward:  36
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 10378.36054122  1886.01921394]
------
Step:25, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7264.26723695  8193.55091714]
New Q values:  [-2527.46239811 -8521.23367799  7706.3365485   8193.55091714]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16004.09884574 -7525.53407498 -7525.7277781   6054.11316356]
------
Step:26, Action:North
State  288
Old Q Values:  [16004.09884574 -7525.53407498 -7525.7277781   6054.11316356]
New Q values:  [ 8088.21559856 -7525.53407498 -7525.7277781   6054.11316356]
Reward: -1  Episode Reward:  34
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  251.87018622   372.98523174 -8489.43729461  5623.92020088]
------
Step:27, Action:West
State  216
Old Q Values:  [  251.87018622   372.98523174 -8489.43729461  5623.92020088]
New Q values:  [  251.87018622   372.98523174 -8489.43729461  7587.68259449]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  17795.71504714   425.06225586   408.67479662]
------
Step:28, Action:South
State  200
Old Q Values:  [   62.8218634  17795.71504714   425.06225586   408.67479662]
New Q values:  [  62.8218634  9575.751294    425.06225586  408.67479662]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7706.3365485   8193.55091714]
------
Step:29, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7706.3365485   8193.55091714]
New Q values:  [-2527.46239811 -8521.23367799  7706.3365485  75044.33118122]
Reward: 100009  Episode Reward:  100041
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8088.21559856 -7525.53407498 -7525.7277781   6054.11316356]
------
Step:1, Action:North
State  288
Old Q Values:  [ 8088.21559856 -7525.53407498 -7525.7277781   6054.11316356]
New Q values:  [ 5868.58997789 -7525.53407498 -7525.7277781   6054.11316356]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8759.67912821  6682.58642166 -1925.08326713 -1455.65174173]
------
Step:2, Action:North
State  208
Old Q Values:  [ 8759.67912821  6682.58642166 -1925.08326713 -1455.65174173]
New Q values:  [ 5295.69088212  6682.58642166 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  8
xxxxx
xg.ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  5974.73076945 -2383.80019164 -1032.21090698]
------
Step:3, Action:South
State  136
Old Q Values:  [-5281.21195651  5974.73076945 -2383.80019164 -1032.21090698]
New Q values:  [-5281.21195651  4394.06823428 -2383.80019164 -1032.21090698]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5295.69088212  6682.58642166 -1925.08326713 -1455.65174173]
------
Step:4, Action:South
State  208
Old Q Values:  [ 5295.69088212  6682.58642166 -1925.08326713 -1455.65174173]
New Q values:  [ 5295.69088212  4488.66851773 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  6
xxxxx
x..gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5868.58997789 -7525.53407498 -7525.7277781   6054.11316356]
------
Step:5, Action:West
State  288
Old Q Values:  [ 5868.58997789 -7525.53407498 -7525.7277781   6054.11316356]
New Q values:  [ 5868.58997789 -7525.53407498 -7525.7277781   5540.55342779]
Reward: 9  Episode Reward:  15
xxxxx
x..gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549 10378.36054122  1886.01921394]
------
Step:6, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549 10378.36054122  1886.01921394]
New Q values:  [  37.74111519 -168.92307549 5911.32120985 1886.01921394]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5868.58997789 -7525.53407498 -7525.7277781   5540.55342779]
------
Step:7, Action:West
State  288
Old Q Values:  [ 5868.58997789 -7525.53407498 -7525.7277781   5540.55342779]
New Q values:  [ 5868.58997789 -7525.53407498 -7525.7277781  24728.92072548]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7706.3365485  75044.33118122]
------
Step:8, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 5911.32120985 1886.01921394]
New Q values:  [   37.74111519  -168.92307549  5911.32120985 12521.31849995]
Reward: 9  Episode Reward:  22
xxxxx
x.. x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39205.03604789  2256.66526474  6864.51217595  1875.31501677]
------
Step:9, Action:North
State  257
Old Q Values:  [39205.03604789  2256.66526474  6864.51217595  1875.31501677]
New Q values:  [39205.76006265  2256.66526474  6864.51217595  1875.31501677]
Reward: 9  Episode Reward:  31
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 22487.97851491  8504.29052256     0.        ]
------
Step:10, Action:North
State  180
Old Q Values:  [-1120.77770304   694.85009505   610.65516816 -4966.32149798]
New Q values:  [ -441.36308122   694.85009505   610.65516816 -4966.32149798]
Reward: 9  Episode Reward:  40
xxxxx
xa. x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[ -180.6       -2257.0253383     5.16       -180.6      ]
------
Step:11, Action:East
State  108
Old Q Values:  [-8463.16477134   851.67531979   534.17299302     0.        ]
New Q values:  [-8463.16477134   851.67531979   837.12743914     0.        ]
Reward: 9  Episode Reward:  49
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2060.19413978 -3116.16865578]
------
Step:12, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2060.19413978 -3116.16865578]
New Q values:  [-9594.56523706 -8069.05606225  2141.6981262  -3116.16865578]
Reward: -1  Episode Reward:  48
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  4394.06823428 -2383.80019164 -1032.21090698]
------
Step:13, Action:South
State  130
Old Q Values:  [36041.91667283  2091.02738669  -180.00807518 75150.94926276]
New Q values:  [36041.91667283  2424.51821931  -180.00807518 75150.94926276]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5295.69088212  4488.66851773 -1925.08326713 -1455.65174173]
------
Step:14, Action:North
State  208
Old Q Values:  [ 5295.69088212  4488.66851773 -1925.08326713 -1455.65174173]
New Q values:  [ 3435.89682313  4488.66851773 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  46
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  4394.06823428 -2383.80019164 -1032.21090698]
------
Step:15, Action:South
State  130
Old Q Values:  [36041.91667283  2424.51821931  -180.00807518 75150.94926276]
New Q values:  [36041.91667283  2315.80784304  -180.00807518 75150.94926276]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3435.89682313  4488.66851773 -1925.08326713 -1455.65174173]
------
Step:16, Action:South
State  208
Old Q Values:  [ 3435.89682313  4488.66851773 -1925.08326713 -1455.65174173]
New Q values:  [ 3435.89682313  9213.54362474 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  44
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5868.58997789 -7525.53407498 -7525.7277781  24728.92072548]
------
Step:17, Action:West
State  288
Old Q Values:  [ 5868.58997789 -7525.53407498 -7525.7277781  24728.92072548]
New Q values:  [ 5868.58997789 -7525.53407498 -7525.7277781  32404.26764456]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7706.3365485  75044.33118122]
------
Step:18, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7706.3365485  75044.33118122]
New Q values:  [-2527.46239811 -8521.23367799  7706.3365485  30212.26115543]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -718.90489343 -8695.4397473    650.42894315 -2601.74710518]
------
Step:19, Action:East
State  260
Old Q Values:  [ -718.90489343 -8695.4397473    650.42894315 -2601.74710518]
New Q values:  [ -718.90489343 -8695.4397473   9323.24992389 -2601.74710518]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7706.3365485  30212.26115543]
------
Step:20, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7706.3365485  30212.26115543]
New Q values:  [-2527.46239811 -8521.23367799 12803.21491277 30212.26115543]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5868.58997789 -7525.53407498 -7525.7277781  32404.26764456]
------
Step:21, Action:North
State  288
Old Q Values:  [ 5868.58997789 -7525.53407498 -7525.7277781  32404.26764456]
New Q values:  [13952.65951781 -7525.53407498 -7525.7277781  32404.26764456]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.86860784e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
------
Step:22, Action:North
State  208
Old Q Values:  [ 3435.89682313  9213.54362474 -1925.08326713 -1455.65174173]
New Q values:  [ 1524.03151342  9213.54362474 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  5.00909281e+02]
------
Step:23, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  5.00909281e+02]
New Q values:  [-139.45925583 -136.92174709   -0.32296531  231.78209907]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   106.72795614    77.69049348]
------
Step:24, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   106.72795614    77.69049348]
New Q values:  [ -281.736      -3455.78276043   111.62581218    77.69049348]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 -136.92174709   -0.32296531  231.78209907]
------
Step:25, Action:West
State  138
Old Q Values:  [-139.45925583 -136.92174709   -0.32296531  231.78209907]
New Q values:  [-139.45925583 -136.92174709   -0.32296531  281.46660307]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   153.83468034   631.17921147]
------
Step:26, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   153.83468034   631.17921147]
New Q values:  [ -253.44886264 -1902.20915811   153.83468034   392.82457952]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  469.8429831   353.8832415  -120.29354603]
------
Step:27, Action:South
State  111
Old Q Values:  [-177.44732869  469.8429831   353.8832415  -120.29354603]
New Q values:  [-177.44732869 2848.7382307   353.8832415  -120.29354603]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 478.96073712  322.19310035 8871.33679154    0.        ]
------
Step:28, Action:East
State  179
Old Q Values:  [    0.         17203.58890013 44694.72727926     0.        ]
New Q values:  [    0.         17203.58890013 84803.10007929     0.        ]
Reward: 100009  Episode Reward:  100042
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  2.30660306e+04  1.20371620e+03]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  2.30660306e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  2.08376358e+04  1.20371620e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3.86860784e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
------
Step:2, Action:North
State  210
Old Q Values:  [3.86860784e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
New Q values:  [1.55642713e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-139.45925583 -136.92174709   -0.32296531  281.46660307]
------
Step:3, Action:West
State  138
Old Q Values:  [-139.45925583 -136.92174709   -0.32296531  281.46660307]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  3.94189970e+04]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          6712.83097384  17419.47777153 131003.36798688]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   111.62581218    77.69049348]
New Q values:  [ -281.736      -3455.78276043   111.62581218    69.13064323]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -1921.51796559   108.84815279  -180.6       ]
------
Step:5, Action:East
State  107
Old Q Values:  [-252.35169558  970.18086207   92.70317503 -252.78192178]
New Q values:  [-252.35169558  970.18086207   69.96901366 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   111.62581218    69.13064323]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   111.62581218    69.13064323]
New Q values:  [ -281.736      -3455.78276043 11869.74943606    69.13064323]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  3.94189970e+04]
------
Step:7, Action:West
State  136
Old Q Values:  [-5281.21195651  4394.06823428 -2383.80019164 -1032.21090698]
New Q values:  [-5281.21195651  4394.06823428 -2383.80019164 -5770.97492493]
Reward: -10001  Episode Reward:  -9967
xxxxx
x g x
x.  x
x ..x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2848.7382307   353.8832415  -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6       2847.0151067    5.4          0.       ]
New Q values:  [-180.6        3805.60708014    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 478.96073712  322.19310035 8871.33679154    0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [ 401.84666257 1870.64296417 3000.38984255  -30.99112081]
New Q values:  [  401.84666257  1870.64296417 -4652.97823103   -30.99112081]
Reward: -9991  Episode Reward:  -9982
xxxxx
x ..x
x g.x
x.. x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2848.7382307   353.8832415  -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6        3805.60708014    5.4           0.        ]
New Q values:  [-180.6        4189.04386952    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 478.96073712  322.19310035 8871.33679154    0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 478.96073712  322.19310035 8871.33679154    0.        ]
New Q values:  [ 478.96073712  322.19310035 7338.58662551    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -6.80462050e+03  1.26155064e+04  0.00000000e+00]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.28237339e+03 2.40118352e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.28237339e+03 3.72993650e+03 2.91043938e+03]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1524.03151342  9213.54362474 -1925.08326713 -1455.65174173]
------
Step:4, Action:South
State  208
Old Q Values:  [ 1524.03151342  9213.54362474 -1925.08326713 -1455.65174173]
New Q values:  [ 1524.03151342 13412.09774326 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13952.65951781 -7525.53407498 -7525.7277781  32404.26764456]
------
Step:5, Action:West
State  288
Old Q Values:  [13952.65951781 -7525.53407498 -7525.7277781  32404.26764456]
New Q values:  [13952.65951781 -7525.53407498 -7525.7277781  22024.78540445]
Reward: -1  Episode Reward:  35
xxxxx
xg..x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 12803.21491277 30212.26115543]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 12803.21491277 30212.26115543]
New Q values:  [-2527.46239811 -8521.23367799 12803.21491277 14887.27943934]
Reward: 9  Episode Reward:  44
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -718.90489343 -8695.4397473   9323.24992389 -2601.74710518]
------
Step:7, Action:East
State  260
Old Q Values:  [ -718.90489343 -8695.4397473   9323.24992389 -2601.74710518]
New Q values:  [ -718.90489343 -8695.4397473   8194.88380136 -2601.74710518]
Reward: -1  Episode Reward:  43
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 12803.21491277 14887.27943934]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 12803.21491277 14887.27943934]
New Q values:  [-2527.46239811 -8521.23367799 11728.12158644 14887.27943934]
Reward: -1  Episode Reward:  42
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13952.65951781 -7525.53407498 -7525.7277781  22024.78540445]
------
Step:9, Action:West
State  288
Old Q Values:  [13952.65951781 -7525.53407498 -7525.7277781  22024.78540445]
New Q values:  [13952.65951781 -7525.53407498 -7525.7277781  13275.49799358]
Reward: -1  Episode Reward:  41
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 11728.12158644 14887.27943934]
------
Step:10, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197 14862.67248006  1416.78898285]
New Q values:  [   16.82637525 -5807.06396197 14862.67248006   906.23129611]
Reward: -1  Episode Reward:  40
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1133.71900989   26.73544252  519.24114887  -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [1133.71900989   26.73544252  519.24114887  -35.88578819]
New Q values:  [1014.08049321   26.73544252  519.24114887  -35.88578819]
Reward: -1  Episode Reward:  39
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  401.84666257  1870.64296417 -4652.97823103   -30.99112081]
------
Step:12, Action:South
State  181
Old Q Values:  [  401.84666257  1870.64296417 -4652.97823103   -30.99112081]
New Q values:  [  401.84666257  1051.88133363 -4652.97823103   -30.99112081]
Reward: -1  Episode Reward:  38
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1014.08049321   26.73544252  519.24114887  -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [1014.08049321   26.73544252  519.24114887  -35.88578819]
New Q values:  [720.59659737  26.73544252 519.24114887 -35.88578819]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  401.84666257  1051.88133363 -4652.97823103   -30.99112081]
------
Step:14, Action:South
State  181
Old Q Values:  [  401.84666257  1051.88133363 -4652.97823103   -30.99112081]
New Q values:  [  401.84666257   636.33151266 -4652.97823103   -30.99112081]
Reward: -1  Episode Reward:  36
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[720.59659737  26.73544252 519.24114887 -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [720.59659737  26.73544252 519.24114887 -35.88578819]
New Q values:  [478.53809275  26.73544252 519.24114887 -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  401.84666257   636.33151266 -4652.97823103   -30.99112081]
------
Step:16, Action:South
State  183
Old Q Values:  [ 478.96073712  322.19310035 7338.58662551    0.        ]
New Q values:  [ 478.96073712  284.0495848  7338.58662551    0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[478.53809275  26.73544252 519.24114887 -35.88578819]
------
Step:17, Action:North
State  260
Old Q Values:  [ -718.90489343 -8695.4397473   8194.88380136 -2601.74710518]
New Q values:  [ 1202.82142375 -8695.4397473   8194.88380136 -2601.74710518]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:18, Action:East
State  183
Old Q Values:  [ 478.96073712  284.0495848  7338.58662551    0.        ]
New Q values:  [ 478.96073712  284.0495848  9186.12537523    0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x a x
x g x
xxxxx
Step:19, Action:North
State  199
Old Q Values:  [  22.48535485 9602.36008829  549.89931413 1915.70494401]
New Q values:  [  14.86214194 9602.36008829  549.89931413 1915.70494401]
Reward: 9  Episode Reward:  41
xxxxx
x a.x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SE
[0.   1.56 0.   0.  ]
------
Step:20, Action:South
State  118
Old Q Values:  [ -281.736 -6000.6       0.        0.   ]
New Q values:  [-281.736      1383.81190889    0.            0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -6.80462050e+03  1.26155064e+04  0.00000000e+00]
------
Step:21, Action:East
State  196
Old Q Values:  [-2469.90645144   471.55277317   174.55451539   239.04887894]
New Q values:  [-2469.90645144   471.55277317  4092.85112914   239.04887894]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1524.03151342 13412.09774326 -1925.08326713 -1455.65174173]
------
Step:22, Action:South
State  208
Old Q Values:  [ 1524.03151342 13412.09774326 -1925.08326713 -1455.65174173]
New Q values:  [ 1524.03151342  9550.03695265 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  38
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13952.65951781 -7525.53407498 -7525.7277781  13275.49799358]
------
Step:23, Action:North
State  288
Old Q Values:  [13952.65951781 -7525.53407498 -7525.7277781  13275.49799358]
New Q values:  [ 8445.47489292 -7525.53407498 -7525.7277781  13275.49799358]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1524.03151342  9550.03695265 -1925.08326713 -1455.65174173]
------
Step:24, Action:South
State  208
Old Q Values:  [ 1524.03151342  9550.03695265 -1925.08326713 -1455.65174173]
New Q values:  [ 1524.03151342  7802.06417913 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  36
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8445.47489292 -7525.53407498 -7525.7277781  13275.49799358]
------
Step:25, Action:West
State  288
Old Q Values:  [ 8445.47489292 -7525.53407498 -7525.7277781  13275.49799358]
New Q values:  [ 8445.47489292 -7525.53407498 -7525.7277781   9065.99474742]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  5911.32120985 12521.31849995]
------
Step:26, Action:West
State  277
Old Q Values:  [  1.64433      0.         -29.77444073 962.38695276]
New Q values:  [  1.64433      0.         -29.77444073 540.12712577]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[478.53809275  26.73544252 519.24114887 -35.88578819]
------
Step:27, Action:East
State  257
Old Q Values:  [39205.76006265  2256.66526474  6864.51217595  1875.31501677]
New Q values:  [39205.76006265  2256.66526474  6501.60042036  1875.31501677]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  5911.32120985 12521.31849995]
------
Step:28, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197 14862.67248006   906.23129611]
New Q values:  [   16.82637525 -5807.06396197 14862.67248006   517.6648631 ]
Reward: -1  Episode Reward:  32
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[478.53809275  26.73544252 519.24114887 -35.88578819]
------
Step:29, Action:East
State  257
Old Q Values:  [39205.76006265  2256.66526474  6501.60042036  1875.31501677]
New Q values:  [39205.76006265  2256.66526474  6356.43571813  1875.31501677]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  5911.32120985 12521.31849995]
------
Step:30, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197 14862.67248006   517.6648631 ]
New Q values:  [   16.82637525 -5807.06396197 14862.67248006   362.2382899 ]
Reward: -1  Episode Reward:  30
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[478.53809275  26.73544252 519.24114887 -35.88578819]
------
Step:31, Action:East
State  260
Old Q Values:  [ 1202.82142375 -8695.4397473   8194.88380136 -2601.74710518]
New Q values:  [ 1202.82142375 -8695.4397473   7736.15526456 -2601.74710518]
Reward: -1  Episode Reward:  29
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197 14862.67248006   362.2382899 ]
------
Step:32, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197 14862.67248006   362.2382899 ]
New Q values:  [   16.82637525 -5807.06396197  8664.26741625   362.2382899 ]
Reward: -1  Episode Reward:  28
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8445.47489292 -7525.53407498 -7525.7277781   9065.99474742]
------
Step:33, Action:West
State  288
Old Q Values:  [ 8445.47489292 -7525.53407498 -7525.7277781   9065.99474742]
New Q values:  [ 8445.47489292 -7525.53407498 -7525.7277781   7382.19344895]
Reward: -1  Episode Reward:  27
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  5911.32120985 12521.31849995]
------
Step:34, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  8664.26741625   362.2382899 ]
New Q values:  [   16.82637525 -5807.06396197  8664.26741625   300.06766062]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[478.53809275  26.73544252 519.24114887 -35.88578819]
------
Step:35, Action:East
State  260
Old Q Values:  [ 1202.82142375 -8695.4397473   7736.15526456 -2601.74710518]
New Q values:  [ 1202.82142375 -8695.4397473   5693.1423307  -2601.74710518]
Reward: -1  Episode Reward:  25
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  8664.26741625   300.06766062]
------
Step:36, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  8664.26741625   300.06766062]
New Q values:  [   16.82637525 -5807.06396197  5998.74943437   300.06766062]
Reward: -1  Episode Reward:  24
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8445.47489292 -7525.53407498 -7525.7277781   7382.19344895]
------
Step:37, Action:North
State  288
Old Q Values:  [ 8445.47489292 -7525.53407498 -7525.7277781   7382.19344895]
New Q values:  [ 5718.20921091 -7525.53407498 -7525.7277781   7382.19344895]
Reward: -1  Episode Reward:  23
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1524.03151342  7802.06417913 -1925.08326713 -1455.65174173]
------
Step:38, Action:South
State  208
Old Q Values:  [ 1524.03151342  7802.06417913 -1925.08326713 -1455.65174173]
New Q values:  [ 1524.03151342  5334.88370634 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  22
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5718.20921091 -7525.53407498 -7525.7277781   7382.19344895]
------
Step:39, Action:West
State  288
Old Q Values:  [ 5718.20921091 -7525.53407498 -7525.7277781   7382.19344895]
New Q values:  [ 5718.20921091 -7525.53407498 -7525.7277781   4751.90220989]
Reward: -1  Episode Reward:  21
xxxxx
x  .x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  5998.74943437   300.06766062]
------
Step:40, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  5998.74943437   300.06766062]
New Q values:  [   16.82637525 -5807.06396197  4114.36253702   300.06766062]
Reward: -1  Episode Reward:  20
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5718.20921091 -7525.53407498 -7525.7277781   4751.90220989]
------
Step:41, Action:North
State  288
Old Q Values:  [ 5718.20921091 -7525.53407498 -7525.7277781   4751.90220989]
New Q values:  [ 3887.14879627 -7525.53407498 -7525.7277781   4751.90220989]
Reward: -1  Episode Reward:  19
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1524.03151342  5334.88370634 -1925.08326713 -1455.65174173]
------
Step:42, Action:South
State  208
Old Q Values:  [ 1524.03151342  5334.88370634 -1925.08326713 -1455.65174173]
New Q values:  [ 1524.03151342  3558.9241455  -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  18
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3887.14879627 -7525.53407498 -7525.7277781   4751.90220989]
------
Step:43, Action:West
State  288
Old Q Values:  [ 3887.14879627 -7525.53407498 -7525.7277781   4751.90220989]
New Q values:  [ 3887.14879627 -7525.53407498 -7525.7277781   3134.46964506]
Reward: -1  Episode Reward:  17
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  4114.36253702   300.06766062]
------
Step:44, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  4114.36253702   300.06766062]
New Q values:  [   16.82637525 -5807.06396197  2811.28965369   300.06766062]
Reward: -1  Episode Reward:  16
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3887.14879627 -7525.53407498 -7525.7277781   3134.46964506]
------
Step:45, Action:North
State  288
Old Q Values:  [ 3887.14879627 -7525.53407498 -7525.7277781   3134.46964506]
New Q values:  [ 2621.93676216 -7525.53407498 -7525.7277781   3134.46964506]
Reward: -1  Episode Reward:  15
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1524.03151342  3558.9241455  -1925.08326713 -1455.65174173]
------
Step:46, Action:South
State  208
Old Q Values:  [ 1524.03151342  3558.9241455  -1925.08326713 -1455.65174173]
New Q values:  [ 1524.03151342  2363.31055172 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  14
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2621.93676216 -7525.53407498 -7525.7277781   3134.46964506]
------
Step:47, Action:West
State  288
Old Q Values:  [ 2621.93676216 -7525.53407498 -7525.7277781   3134.46964506]
New Q values:  [ 2621.93676216 -7525.53407498 -7525.7277781   2096.57475413]
Reward: -1  Episode Reward:  13
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2811.28965369   300.06766062]
------
Step:48, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2811.28965369   300.06766062]
New Q values:  [   16.82637525 -5807.06396197  1910.49689012   300.06766062]
Reward: -1  Episode Reward:  12
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2621.93676216 -7525.53407498 -7525.7277781   2096.57475413]
------
Step:49, Action:North
State  288
Old Q Values:  [ 2621.93676216 -7525.53407498 -7525.7277781   2096.57475413]
New Q values:  [ 1757.16787038 -7525.53407498 -7525.7277781   2096.57475413]
Reward: -1  Episode Reward:  11
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1524.03151342  2363.31055172 -1925.08326713 -1455.65174173]
------
Step:50, Action:South
State  208
Old Q Values:  [ 1524.03151342  2363.31055172 -1925.08326713 -1455.65174173]
New Q values:  [ 1524.03151342  1573.69664693 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  10
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1757.16787038 -7525.53407498 -7525.7277781   2096.57475413]
------
Step:51, Action:West
State  288
Old Q Values:  [ 1757.16787038 -7525.53407498 -7525.7277781   2096.57475413]
New Q values:  [ 1757.16787038 -7525.53407498 -7525.7277781   4594.42545164]
Reward: -1  Episode Reward:  9
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  5911.32120985 12521.31849995]
------
Step:52, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1910.49689012   300.06766062]
New Q values:  [   16.82637525 -5807.06396197  1910.49689012   275.19940891]
Reward: -1  Episode Reward:  8
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[478.53809275  26.73544252 519.24114887 -35.88578819]
------
Step:53, Action:East
State  261
Old Q Values:  [478.53809275  26.73544252 519.24114887 -35.88578819]
New Q values:  [478.53809275  26.73544252 780.24552659 -35.88578819]
Reward: -1  Episode Reward:  7
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1910.49689012   275.19940891]
------
Step:54, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1910.49689012   275.19940891]
New Q values:  [   16.82637525 -5807.06396197  2141.92639154   275.19940891]
Reward: -1  Episode Reward:  6
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1757.16787038 -7525.53407498 -7525.7277781   4594.42545164]
------
Step:55, Action:West
State  288
Old Q Values:  [ 1757.16787038 -7525.53407498 -7525.7277781   4594.42545164]
New Q values:  [ 1757.16787038 -7525.53407498 -7525.7277781   5593.56573064]
Reward: -1  Episode Reward:  5
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  5911.32120985 12521.31849995]
------
Step:56, Action:West
State  277
Old Q Values:  [  1.64433      0.         -29.77444073 540.12712577]
New Q values:  [  1.64433      0.         -29.77444073 449.52450828]
Reward: -1  Episode Reward:  4
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[478.53809275  26.73544252 780.24552659 -35.88578819]
------
Step:57, Action:East
State  261
Old Q Values:  [478.53809275  26.73544252 780.24552659 -35.88578819]
New Q values:  [478.53809275  26.73544252 954.0761281  -35.88578819]
Reward: -1  Episode Reward:  3
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2141.92639154   275.19940891]
------
Step:58, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2141.92639154   275.19940891]
New Q values:  [   16.82637525 -5807.06396197  2534.24027581   275.19940891]
Reward: -1  Episode Reward:  2
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1757.16787038 -7525.53407498 -7525.7277781   5593.56573064]
------
Step:59, Action:North
State  288
Old Q Values:  [ 1757.16787038 -7525.53407498 -7525.7277781   5593.56573064]
New Q values:  [ 1174.37614223 -7525.53407498 -7525.7277781   5593.56573064]
Reward: -1  Episode Reward:  1
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1524.03151342  1573.69664693 -1925.08326713 -1455.65174173]
------
Step:60, Action:South
State  210
Old Q Values:  [1.55642713e+04 7.30426418e+02 2.24233123e+03 3.52184257e+00]
New Q values:  [1.55642713e+04 1.96964029e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  0
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1174.37614223 -7525.53407498 -7525.7277781   5593.56573064]
------
Step:61, Action:North
State  288
Old Q Values:  [ 1174.37614223 -7525.53407498 -7525.7277781   5593.56573064]
New Q values:  [ 5138.43186183 -7525.53407498 -7525.7277781   5593.56573064]
Reward: -1  Episode Reward:  -1
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.55642713e+04 1.96964029e+03 2.24233123e+03 3.52184257e+00]
------
Step:62, Action:North
State  210
Old Q Values:  [1.55642713e+04 1.96964029e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [8.87763933e+04 1.96964029e+03 2.24233123e+03 3.52184257e+00]
Reward: 100009  Episode Reward:  100008
xxxxx
x  ax
x   x
x g x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.28237339e+03 3.72993650e+03 2.91043938e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.28237339e+03 3.72993650e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.28237339e+03 1.96948359e+03 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1524.03151342  1573.69664693 -1925.08326713 -1455.65174173]
------
Step:2, Action:South
State  210
Old Q Values:  [8.87763933e+04 1.96964029e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [8.87763933e+04 2.47132583e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5138.43186183 -7525.53407498 -7525.7277781   5593.56573064]
------
Step:3, Action:North
State  288
Old Q Values:  [ 5138.43186183 -7525.53407498 -7525.7277781   5593.56573064]
New Q values:  [ 2526.88173881 -7525.53407498 -7525.7277781   5593.56573064]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1524.03151342  1573.69664693 -1925.08326713 -1455.65174173]
------
Step:4, Action:South
State  208
Old Q Values:  [ 1524.03151342  1573.69664693 -1925.08326713 -1455.65174173]
New Q values:  [ 1524.03151342  2306.94837796 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2526.88173881 -7525.53407498 -7525.7277781   5593.56573064]
------
Step:5, Action:West
State  288
Old Q Values:  [ 2526.88173881 -7525.53407498 -7525.7277781   5593.56573064]
New Q values:  [ 2526.88173881 -7525.53407498 -7525.7277781   5999.22184224]
Reward: 9  Episode Reward:  25
xxxxx
x...x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  5911.32120985 12521.31849995]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 11728.12158644 14887.27943934]
New Q values:  [-2527.46239811 -8521.23367799 11728.12158644  6246.53461417]
Reward: 9  Episode Reward:  34
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[478.53809275  26.73544252 954.0761281  -35.88578819]
------
Step:7, Action:North
State  261
Old Q Values:  [478.53809275  26.73544252 954.0761281  -35.88578819]
New Q values:  [381.7146909   26.73544252 954.0761281  -35.88578819]
Reward: -1  Episode Reward:  33
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  401.84666257   636.33151266 -4652.97823103   -30.99112081]
------
Step:8, Action:South
State  180
Old Q Values:  [ -441.36308122   694.85009505   610.65516816 -4966.32149798]
New Q values:  [ -441.36308122  1985.28273723   610.65516816 -4966.32149798]
Reward: -1  Episode Reward:  32
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1202.82142375 -8695.4397473   5693.1423307  -2601.74710518]
------
Step:9, Action:East
State  260
Old Q Values:  [ 1202.82142375 -8695.4397473   5693.1423307  -2601.74710518]
New Q values:  [ 1202.82142375 -8695.4397473   5795.09340821 -2601.74710518]
Reward: -1  Episode Reward:  31
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 11728.12158644  6246.53461417]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 11728.12158644  6246.53461417]
New Q values:  [-2527.46239811 -8521.23367799  6490.41518725  6246.53461417]
Reward: -1  Episode Reward:  30
xxxxx
x.g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2526.88173881 -7525.53407498 -7525.7277781   5999.22184224]
------
Step:11, Action:West
State  288
Old Q Values:  [ 2526.88173881 -7525.53407498 -7525.7277781   5999.22184224]
New Q values:  [ 2526.88173881 -7525.53407498 -7525.7277781   6155.48428688]
Reward: -1  Episode Reward:  29
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  5911.32120985 12521.31849995]
------
Step:12, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  5911.32120985 12521.31849995]
New Q values:  [  37.74111519 -168.92307549 5911.32120985 5294.15023841]
Reward: -1  Episode Reward:  28
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[381.7146909   26.73544252 954.0761281  -35.88578819]
------
Step:13, Action:East
State  261
Old Q Values:  [381.7146909   26.73544252 954.0761281  -35.88578819]
New Q values:  [ 381.7146909    26.73544252 2154.42681419  -35.88578819]
Reward: -1  Episode Reward:  27
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 5911.32120985 5294.15023841]
------
Step:14, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 5911.32120985 5294.15023841]
New Q values:  [  37.74111519 -168.92307549 5911.32120985 2763.38813962]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 381.7146909    26.73544252 2154.42681419  -35.88578819]
------
Step:15, Action:East
State  257
Old Q Values:  [39205.76006265  2256.66526474  6356.43571813  1875.31501677]
New Q values:  [39205.76006265  2256.66526474  4315.37065021  1875.31501677]
Reward: -1  Episode Reward:  25
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 5911.32120985 2763.38813962]
------
Step:16, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 5911.32120985 2763.38813962]
New Q values:  [  37.74111519 -168.92307549 4210.57377001 2763.38813962]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2526.88173881 -7525.53407498 -7525.7277781   6155.48428688]
------
Step:17, Action:West
State  288
Old Q Values:  [ 2526.88173881 -7525.53407498 -7525.7277781   6155.48428688]
New Q values:  [ 2526.88173881 -7525.53407498 -7525.7277781   3724.76584575]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4210.57377001 2763.38813962]
------
Step:18, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6490.41518725  6246.53461417]
New Q values:  [-2527.46239811 -8521.23367799  6490.41518725  3144.34188992]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 381.7146909    26.73544252 2154.42681419  -35.88578819]
------
Step:19, Action:North
State  260
Old Q Values:  [ 1202.82142375 -8695.4397473   5795.09340821 -2601.74710518]
New Q values:  [ 1971.51195062 -8695.4397473   5795.09340821 -2601.74710518]
Reward: -1  Episode Reward:  21
xxxxx
x...x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:20, Action:East
State  180
Old Q Values:  [ -441.36308122  1985.28273723   610.65516816 -4966.32149798]
New Q values:  [ -441.36308122  1985.28273723  1116.79388132 -4966.32149798]
Reward: -1  Episode Reward:  20
xxxxx
x...x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.28237339e+03 1.96948359e+03 2.91043938e+03]
------
Step:21, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.28237339e+03 1.96948359e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.85947391e+03 1.96948359e+03 2.91043938e+03]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6490.41518725  3144.34188992]
------
Step:22, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6490.41518725  3144.34188992]
New Q values:  [-2527.46239811 -8521.23367799  3712.99582863  3144.34188992]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2526.88173881 -7525.53407498 -7525.7277781   3724.76584575]
------
Step:23, Action:North
State  288
Old Q Values:  [ 2526.88173881 -7525.53407498 -7525.7277781   3724.76584575]
New Q values:  [27643.07069115 -7525.53407498 -7525.7277781   3724.76584575]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.87763933e+04 2.47132583e+03 2.24233123e+03 3.52184257e+00]
------
Step:24, Action:North
State  208
Old Q Values:  [ 1524.03151342  2306.94837796 -1925.08326713 -1455.65174173]
New Q values:  [23160.2973842   2306.94837796 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  26
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2315.80784304  -180.00807518 75150.94926276]
------
Step:25, Action:West
State  128
Old Q Values:  [ 8775.70846068 19747.37279955 -8652.84       32365.99600744]
New Q values:  [ 8775.70846068 19747.37279955 -8652.84       42905.3419793 ]
Reward: 9  Episode Reward:  35
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[    0.         11059.61439394  6789.02994987 99845.1452544 ]
------
Step:26, Action:South
State  114
Old Q Values:  [  -180.6          6712.83097384  17419.47777153 131003.36798688]
New Q values:  [  -180.6          3557.6642036   17419.47777153 131003.36798688]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.85947391e+03 1.96948359e+03 2.91043938e+03]
------
Step:27, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.85947391e+03 1.96948359e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.25708831e+03 1.96948359e+03 2.91043938e+03]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3712.99582863  3144.34188992]
------
Step:28, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3712.99582863  3144.34188992]
New Q values:  [-2527.46239811 -8521.23367799  9777.51953879  3144.34188992]
Reward: -1  Episode Reward:  32
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[27643.07069115 -7525.53407498 -7525.7277781   3724.76584575]
------
Step:29, Action:North
State  288
Old Q Values:  [27643.07069115 -7525.53407498 -7525.7277781   3724.76584575]
New Q values:  [18004.71749172 -7525.53407498 -7525.7277781   3724.76584575]
Reward: -1  Episode Reward:  31
xxxxx
x. gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23160.2973842   2306.94837796 -1925.08326713 -1455.65174173]
------
Step:30, Action:South
State  208
Old Q Values:  [23160.2973842   2306.94837796 -1925.08326713 -1455.65174173]
New Q values:  [23160.2973842   6323.5945987  -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18004.71749172 -7525.53407498 -7525.7277781   3724.76584575]
------
Step:31, Action:West
State  288
Old Q Values:  [18004.71749172 -7525.53407498 -7525.7277781   3724.76584575]
New Q values:  [18004.71749172 -7525.53407498 -7525.7277781   2752.4784693 ]
Reward: -1  Episode Reward:  29
xxxxx
x.  x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4210.57377001 2763.38813962]
------
Step:32, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4210.57377001 2763.38813962]
New Q values:  [   37.74111519  -168.92307549  4210.57377001 12866.48327464]
Reward: -1  Episode Reward:  28
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39205.76006265  2256.66526474  4315.37065021  1875.31501677]
------
Step:33, Action:North
State  257
Old Q Values:  [39205.76006265  2256.66526474  4315.37065021  1875.31501677]
New Q values:  [41122.63404885  2256.66526474  4315.37065021  1875.31501677]
Reward: -1  Episode Reward:  27
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         17203.58890013 84803.10007929     0.        ]
------
Step:34, Action:East
State  177
Old Q Values:  [78394.48547832 22487.97851491  8504.29052256     0.        ]
New Q values:  [78394.48547832 22487.97851491  5161.65126242     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5868.45017799 -4885.64726759  1099.96026581]
------
Step:35, Action:South
State  195
Old Q Values:  [   38.85388605  2677.89620798 23527.39531294  1101.59744825]
New Q values:  [   38.85388605  4930.50346558 23527.39531294  1101.59744825]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4210.57377001 12866.48327464]
------
Step:36, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  4210.57377001 12866.48327464]
New Q values:  [   37.74111519  -168.92307549  4210.57377001 17482.78352451]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41122.63404885  2256.66526474  4315.37065021  1875.31501677]
------
Step:37, Action:North
State  257
Old Q Values:  [41122.63404885  2256.66526474  4315.37065021  1875.31501677]
New Q values:  [41889.38364332  2256.66526474  4315.37065021  1875.31501677]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         17203.58890013 84803.10007929     0.        ]
------
Step:38, Action:East
State  179
Old Q Values:  [    0.         17203.58890013 84803.10007929     0.        ]
New Q values:  [    0.         17203.58890013 40171.93075675     0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  2.08376358e+04  1.20371620e+03]
------
Step:39, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.25708831e+03 1.96948359e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.25708831e+03 7.73528265e+03 2.91043938e+03]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23160.2973842   6323.5945987  -1925.08326713 -1455.65174173]
------
Step:40, Action:North
State  208
Old Q Values:  [23160.2973842   6323.5945987  -1925.08326713 -1455.65174173]
New Q values:  [31808.80373251  6323.5945987  -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2315.80784304  -180.00807518 75150.94926276]
------
Step:41, Action:West
State  128
Old Q Values:  [ 8775.70846068 19747.37279955 -8652.84       42905.3419793 ]
New Q values:  [ 8775.70846068 19747.37279955 -8652.84       18015.67011096]
Reward: -1  Episode Reward:  19
xxxxx
x.agx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.          2847.11106414 -5999.38454759     0.        ]
------
Step:42, Action:South
State  112
Old Q Values:  [    0.         11059.61439394  6789.02994987 99845.1452544 ]
New Q values:  [    0.          6743.83055328  6789.02994987 99845.1452544 ]
Reward: -1  Episode Reward:  18
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.25708831e+03 7.73528265e+03 2.91043938e+03]
------
Step:43, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.25708831e+03 7.73528265e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.25708831e+03 1.26361542e+04 2.91043938e+03]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31808.80373251  6323.5945987  -1925.08326713 -1455.65174173]
------
Step:44, Action:North
State  208
Old Q Values:  [31808.80373251  6323.5945987  -1925.08326713 -1455.65174173]
New Q values:  [35268.20627183  6323.5945987  -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2315.80784304  -180.00807518 75150.94926276]
------
Step:45, Action:West
State  130
Old Q Values:  [36041.91667283  2315.80784304  -180.00807518 75150.94926276]
New Q values:  [36041.91667283  2315.80784304  -180.00807518 69756.38255464]
Reward: -1  Episode Reward:  15
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.32322009e+05]
------
Step:46, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   17419.47777153 131003.36798688]
New Q values:  [  -180.6          3557.6642036   17419.47777153 132116.88610344]
Reward: 100009  Episode Reward:  100024
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -1921.51796559   108.84815279  -180.6       ]
------
Step:1, Action:East
State  108
Old Q Values:  [-8463.16477134   851.67531979   837.12743914     0.        ]
New Q values:  [-8463.16477134   851.67531979   982.76041352     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xga.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2141.6981262  -3116.16865578]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043 11869.74943606    69.13064323]
New Q values:  [ -281.736      -3455.78276043 16578.99888561    69.13064323]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  3.94189970e+04]
------
Step:3, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  3.94189970e+04]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  2.07406985e+04]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043 16578.99888561    69.13064323]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043 16578.99888561    69.13064323]
New Q values:  [ -281.736      -3455.78276043 12853.20909842    69.13064323]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  2.07406985e+04]
------
Step:5, Action:West
State  136
Old Q Values:  [-5281.21195651  4394.06823428 -2383.80019164 -5770.97492493]
New Q values:  [-5281.21195651  4394.06823428 -2383.80019164 -1666.48053212]
Reward: -1  Episode Reward:  15
xxxxx
xga x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2141.6981262  -3116.16865578]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043 12853.20909842    69.13064323]
New Q values:  [ -281.736      -3455.78276043 11362.89318355    69.13064323]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  2.07406985e+04]
------
Step:7, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  2.07406985e+04]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.17045473e+04]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043 11362.89318355    69.13064323]
------
Step:8, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   153.83468034   392.82457952]
New Q values:  [ -253.44886264 -1902.20915811  3572.29807633   392.82457952]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.17045473e+04]
------
Step:9, Action:West
State  136
Old Q Values:  [-5281.21195651  4394.06823428 -2383.80019164 -1666.48053212]
New Q values:  [-5281.21195651  4394.06823428 -2383.80019164  -573.87801006]
Reward: -1  Episode Reward:  11
xxxxx
x agx
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:10, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2141.6981262  -3116.16865578]
New Q values:  [-9594.56523706 -8069.05606225  2141.6981262   -880.32621716]
Reward: -1  Episode Reward:  10
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1222.47081715   -8.57207238 -180.6       ]
------
Step:11, Action:South
State  111
Old Q Values:  [-177.44732869 2848.7382307   353.8832415  -120.29354603]
New Q values:  [-177.44732869 1543.92081737  353.8832415  -120.29354603]
Reward: -1  Episode Reward:  9
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  575.27118432 1350.08508362  154.04646645]
------
Step:12, Action:South
State  189
Old Q Values:  [   9.84673294  575.27118432 1350.08508362  154.04646645]
New Q values:  [   9.84673294  881.83651799 1350.08508362  154.04646645]
Reward: 9  Episode Reward:  18
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 381.7146909    26.73544252 2154.42681419  -35.88578819]
------
Step:13, Action:East
State  261
Old Q Values:  [ 381.7146909    26.73544252 2154.42681419  -35.88578819]
New Q values:  [ 381.7146909    26.73544252 1002.02807816  -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x  gx
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         -29.77444073 449.52450828]
------
Step:14, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2534.24027581   275.19940891]
New Q values:  [   16.82637525 -5807.06396197  2534.24027581   410.08818701]
Reward: -1  Episode Reward:  26
xxxxx
x g x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 381.7146909    26.73544252 1002.02807816  -35.88578819]
------
Step:15, Action:East
State  260
Old Q Values:  [ 1971.51195062 -8695.4397473   5795.09340821 -2601.74710518]
New Q values:  [ 1971.51195062 -8695.4397473   3077.70944603 -2601.74710518]
Reward: -1  Episode Reward:  25
xxxxx
xg  x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2534.24027581   410.08818701]
------
Step:16, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9777.51953879  3144.34188992]
New Q values:  [-2527.46239811 -8521.23367799  9317.82306303  3144.34188992]
Reward: 9  Episode Reward:  34
xxxxx
x   x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18004.71749172 -7525.53407498 -7525.7277781   2752.4784693 ]
------
Step:17, Action:North
State  288
Old Q Values:  [18004.71749172 -7525.53407498 -7525.7277781   2752.4784693 ]
New Q values:  [17787.74887824 -7525.53407498 -7525.7277781   2752.4784693 ]
Reward: 9  Episode Reward:  43
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[35268.20627183  6323.5945987  -1925.08326713 -1455.65174173]
------
Step:18, Action:North
State  208
Old Q Values:  [35268.20627183  6323.5945987  -1925.08326713 -1455.65174173]
New Q values:  [15424.90297902  6323.5945987  -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  42
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  4394.06823428 -2383.80019164  -573.87801006]
------
Step:19, Action:South
State  136
Old Q Values:  [-5281.21195651  4394.06823428 -2383.80019164  -573.87801006]
New Q values:  [-5281.21195651  6384.49818742 -2383.80019164  -573.87801006]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15424.90297902  6323.5945987  -1925.08326713 -1455.65174173]
------
Step:20, Action:North
State  208
Old Q Values:  [15424.90297902  6323.5945987  -1925.08326713 -1455.65174173]
New Q values:  [ 8084.71064783  6323.5945987  -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  40
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  6384.49818742 -2383.80019164  -573.87801006]
------
Step:21, Action:South
State  136
Old Q Values:  [-5281.21195651  6384.49818742 -2383.80019164  -573.87801006]
New Q values:  [-5281.21195651  4978.61246932 -2383.80019164  -573.87801006]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8084.71064783  6323.5945987  -1925.08326713 -1455.65174173]
------
Step:22, Action:North
State  208
Old Q Values:  [ 8084.71064783  6323.5945987  -1925.08326713 -1455.65174173]
New Q values:  [ 4726.86799993  6323.5945987  -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  38
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  4978.61246932 -2383.80019164  -573.87801006]
------
Step:23, Action:South
State  136
Old Q Values:  [-5281.21195651  4978.61246932 -2383.80019164  -573.87801006]
New Q values:  [-5281.21195651  3887.92336734 -2383.80019164  -573.87801006]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4726.86799993  6323.5945987  -1925.08326713 -1455.65174173]
------
Step:24, Action:South
State  208
Old Q Values:  [ 4726.86799993  6323.5945987  -1925.08326713 -1455.65174173]
New Q values:  [ 4726.86799993  7865.16250295 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  36
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17787.74887824 -7525.53407498 -7525.7277781   2752.4784693 ]
------
Step:25, Action:North
State  288
Old Q Values:  [17787.74887824 -7525.53407498 -7525.7277781   2752.4784693 ]
New Q values:  [ 9474.04830218 -7525.53407498 -7525.7277781   2752.4784693 ]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4726.86799993  7865.16250295 -1925.08326713 -1455.65174173]
------
Step:26, Action:South
State  208
Old Q Values:  [ 4726.86799993  7865.16250295 -1925.08326713 -1455.65174173]
New Q values:  [ 4726.86799993  5987.67949183 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9474.04830218 -7525.53407498 -7525.7277781   2752.4784693 ]
------
Step:27, Action:West
State  288
Old Q Values:  [ 9474.04830218 -7525.53407498 -7525.7277781   2752.4784693 ]
New Q values:  [ 9474.04830218 -7525.53407498 -7525.7277781   3895.73830663]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9317.82306303  3144.34188992]
------
Step:28, Action:East
State  273
Old Q Values:  [   37.74111519  -168.92307549  4210.57377001 17482.78352451]
New Q values:  [   37.74111519  -168.92307549  4525.84399866 17482.78352451]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9474.04830218 -7525.53407498 -7525.7277781   3895.73830663]
------
Step:29, Action:West
State  288
Old Q Values:  [ 9474.04830218 -7525.53407498 -7525.7277781   3895.73830663]
New Q values:  [ 9474.04830218 -7525.53407498 -7525.7277781   6802.53038001]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4525.84399866 17482.78352451]
------
Step:30, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  4525.84399866 17482.78352451]
New Q values:  [  37.74111519 -168.92307549 4525.84399866 7293.12183325]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 381.7146909    26.73544252 1002.02807816  -35.88578819]
------
Step:31, Action:East
State  257
Old Q Values:  [41889.38364332  2256.66526474  4315.37065021  1875.31501677]
New Q values:  [41889.38364332  2256.66526474  4520.89517899  1875.31501677]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9317.82306303  3144.34188992]
------
Step:32, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 4525.84399866 7293.12183325]
New Q values:  [  37.74111519 -168.92307549 4651.95209012 7293.12183325]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9474.04830218 -7525.53407498 -7525.7277781   6802.53038001]
------
Step:33, Action:West
State  288
Old Q Values:  [ 9474.04830218 -7525.53407498 -7525.7277781   6802.53038001]
New Q values:  [ 9474.04830218 -7525.53407498 -7525.7277781   4908.34870198]
Reward: -1  Episode Reward:  27
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4651.95209012 7293.12183325]
------
Step:34, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4651.95209012 7293.12183325]
New Q values:  [  37.74111519 -168.92307549 4651.95209012 3217.25715675]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 381.7146909    26.73544252 1002.02807816  -35.88578819]
------
Step:35, Action:East
State  261
Old Q Values:  [ 381.7146909    26.73544252 1002.02807816  -35.88578819]
New Q values:  [ 381.7146909    26.73544252 1795.7968583   -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4651.95209012 3217.25715675]
------
Step:36, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9317.82306303  3144.34188992]
New Q values:  [-2527.46239811 -8521.23367799  9317.82306303  1795.87581346]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 381.7146909    26.73544252 1795.7968583   -35.88578819]
------
Step:37, Action:North
State  260
Old Q Values:  [ 1971.51195062 -8695.4397473   3077.70944603 -2601.74710518]
New Q values:  [ 2278.98816136 -8695.4397473   3077.70944603 -2601.74710518]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:38, Action:East
State  176
Old Q Values:  [ 76485.61294353   1327.79507613 100588.37489285      0.        ]
New Q values:  [ 76485.61294353   1327.79507613 104031.59621135      0.        ]
Reward: 100009  Episode Reward:  100032
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  3572.29807633   392.82457952]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043 11362.89318355    69.13064323]
New Q values:  [ -281.736      -3455.78276043  8061.92147761    69.13064323]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.17045473e+04]
------
Step:2, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.17045473e+04]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  7.09979538e+03]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x...x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  8061.92147761    69.13064323]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  8061.92147761    69.13064323]
New Q values:  [ -281.736      -3455.78276043  5354.10720571    69.13064323]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  7.09979538e+03]
------
Step:4, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  7.09979538e+03]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  4.44555031e+03]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x...x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  5354.10720571    69.13064323]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  5354.10720571    69.13064323]
New Q values:  [ -281.736      -3455.78276043  3474.70797666    69.13064323]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
xg..x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  4.44555031e+03]
------
Step:6, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  4.44555031e+03]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  2.82003252e+03]
Reward: -1  Episode Reward:  4
xxxxx
x.a x
x...x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  3474.70797666    69.13064323]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  3474.70797666    69.13064323]
New Q values:  [ -281.736      -3455.78276043  2235.29294631    69.13064323]
Reward: -1  Episode Reward:  3
xxxxx
x. ax
xg..x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  2.82003252e+03]
------
Step:8, Action:West
State  136
Old Q Values:  [-5281.21195651  3887.92336734 -2383.80019164  -573.87801006]
New Q values:  [-5281.21195651  3887.92336734 -2383.80019164   412.35823384]
Reward: -1  Episode Reward:  2
xxxxx
xga x
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2141.6981262   -880.32621716]
------
Step:9, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  2235.29294631    69.13064323]
New Q values:  [ -281.736      -3455.78276043  1739.52693418    69.13064323]
Reward: -1  Episode Reward:  1
xxxxx
x. ax
xg..x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  2.82003252e+03]
------
Step:10, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  2.82003252e+03]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.64927109e+03]
Reward: -1  Episode Reward:  0
xxxxx
x.a x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  1739.52693418    69.13064323]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  1739.52693418    69.13064323]
New Q values:  [ -281.736      -3455.78276043  1189.99210001    69.13064323]
Reward: -1  Episode Reward:  -1
xxxxx
x. ax
xg..x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.64927109e+03]
------
Step:12, Action:West
State  136
Old Q Values:  [-5281.21195651  3887.92336734 -2383.80019164   412.35823384]
New Q values:  [-5281.21195651  3887.92336734 -2383.80019164   806.85273139]
Reward: -1  Episode Reward:  -2
xxxxx
xga x
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2141.6981262   -880.32621716]
------
Step:13, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2141.6981262   -880.32621716]
New Q values:  [-9594.56523706 -8069.05606225  2022.45626068  -880.32621716]
Reward: -1  Episode Reward:  -3
xxxxx
x.gax
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  3887.92336734 -2383.80019164   806.85273139]
------
Step:14, Action:South
State  136
Old Q Values:  [-5281.21195651  3887.92336734 -2383.80019164   806.85273139]
New Q values:  [-5281.21195651  3356.87319449 -2383.80019164   806.85273139]
Reward: 9  Episode Reward:  6
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4726.86799993  5987.67949183 -1925.08326713 -1455.65174173]
------
Step:15, Action:South
State  208
Old Q Values:  [ 4726.86799993  5987.67949183 -1925.08326713 -1455.65174173]
New Q values:  [ 4726.86799993  5236.68628739 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  5
xxxxx
x.  x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9474.04830218 -7525.53407498 -7525.7277781   4908.34870198]
------
Step:16, Action:West
State  288
Old Q Values:  [ 9474.04830218 -7525.53407498 -7525.7277781   4908.34870198]
New Q values:  [ 9474.04830218 -7525.53407498 -7525.7277781   3364.32510783]
Reward: 9  Episode Reward:  14
xxxxx
x.  x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4651.95209012 3217.25715675]
------
Step:17, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4651.95209012 3217.25715675]
New Q values:  [   37.74111519  -168.92307549  4651.95209012 13859.1179557 ]
Reward: 9  Episode Reward:  23
xxxxx
x.  x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41889.38364332  2256.66526474  4520.89517899  1875.31501677]
------
Step:18, Action:North
State  257
Old Q Values:  [41889.38364332  2256.66526474  4520.89517899  1875.31501677]
New Q values:  [40279.49910083  2256.66526474  4520.89517899  1875.31501677]
Reward: 9  Episode Reward:  32
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 22487.97851491  5161.65126242     0.        ]
------
Step:19, Action:North
State  181
Old Q Values:  [  401.84666257   636.33151266 -4652.97823103   -30.99112081]
New Q values:  [  532.87991018   636.33151266 -4652.97823103   -30.99112081]
Reward: 9  Episode Reward:  41
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1222.47081715   -8.57207238 -180.6       ]
------
Step:20, Action:South
State  99
Old Q Values:  [    0.         35404.93003926 65700.46302897     0.        ]
New Q values:  [    0.         37679.7176592  65700.46302897     0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xag x
x   x
xxxxx
Step:21, Action:North
State  180
Old Q Values:  [ -441.36308122  1985.28273723  1116.79388132 -4966.32149798]
New Q values:  [ -144.49078665  1985.28273723  1116.79388132 -4966.32149798]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -1921.51796559   108.84815279  -180.6       ]
------
Step:22, Action:East
State  110
Old Q Values:  [ -239.29051573 -1921.51796559   108.84815279  -180.6       ]
New Q values:  [ -239.29051573 -1921.51796559   399.93689112  -180.6       ]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  1189.99210001    69.13064323]
------
Step:23, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  1189.99210001    69.13064323]
New Q values:  [ -281.736      -3455.78276043   970.17816634    69.13064323]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.64927109e+03]
------
Step:24, Action:West
State  136
Old Q Values:  [-5281.21195651  3356.87319449 -2383.80019164   806.85273139]
New Q values:  [-5281.21195651  3356.87319449 -2383.80019164   928.87797076]
Reward: -1  Episode Reward:  36
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2022.45626068  -880.32621716]
------
Step:25, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2022.45626068  -880.32621716]
New Q values:  [-9594.56523706 -8069.05606225  1815.44446262  -880.32621716]
Reward: -1  Episode Reward:  35
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  3356.87319449 -2383.80019164   928.87797076]
------
Step:26, Action:South
State  136
Old Q Values:  [-5281.21195651  3356.87319449 -2383.80019164   928.87797076]
New Q values:  [-5281.21195651  2913.15516401 -2383.80019164   928.87797076]
Reward: -1  Episode Reward:  34
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4726.86799993  5236.68628739 -1925.08326713 -1455.65174173]
------
Step:27, Action:South
State  208
Old Q Values:  [ 4726.86799993  5236.68628739 -1925.08326713 -1455.65174173]
New Q values:  [ 4726.86799993  4936.28900561 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9474.04830218 -7525.53407498 -7525.7277781   3364.32510783]
------
Step:28, Action:North
State  288
Old Q Values:  [ 9474.04830218 -7525.53407498 -7525.7277781   3364.32510783]
New Q values:  [ 5269.90602255 -7525.53407498 -7525.7277781   3364.32510783]
Reward: -1  Episode Reward:  32
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4726.86799993  4936.28900561 -1925.08326713 -1455.65174173]
------
Step:29, Action:South
State  208
Old Q Values:  [ 4726.86799993  4936.28900561 -1925.08326713 -1455.65174173]
New Q values:  [ 4726.86799993  3554.88740901 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  31
xxxxx
x g x
x . x
x  ax
xxxxx
Step:30, Action:South
State  288
Old Q Values:  [ 5269.90602255 -7525.53407498 -7525.7277781   3364.32510783]
New Q values:  [ 5269.90602255 -1609.84182322 -7525.7277781   3364.32510783]
Reward: -301  Episode Reward:  -270
xxxxx
x   x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5269.90602255 -1609.84182322 -7525.7277781   3364.32510783]
------
Step:31, Action:North
State  288
Old Q Values:  [ 5269.90602255 -1609.84182322 -7525.7277781   3364.32510783]
New Q values:  [ 3525.422809   -1609.84182322 -7525.7277781   3364.32510783]
Reward: -1  Episode Reward:  -271
xxxxx
x g x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4726.86799993  3554.88740901 -1925.08326713 -1455.65174173]
------
Step:32, Action:North
State  208
Old Q Values:  [ 4726.86799993  3554.88740901 -1925.08326713 -1455.65174173]
New Q values:  [22817.06196636  3554.88740901 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  -272
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2315.80784304  -180.00807518 69756.38255464]
------
Step:33, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  1.64927109e+03]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  9.50161885e+02]
Reward: -1  Episode Reward:  -273
xxxxx
x a x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   970.17816634    69.13064323]
------
Step:34, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   17419.47777153 132116.88610344]
New Q values:  [  -180.6          3557.6642036   27894.105875   132116.88610344]
Reward: -1  Episode Reward:  -274
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2315.80784304  -180.00807518 69756.38255464]
------
Step:35, Action:West
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  9.50161885e+02]
New Q values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  6.70518204e+02]
Reward: -1  Episode Reward:  -275
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   970.17816634    69.13064323]
------
Step:36, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1815.44446262  -880.32621716]
New Q values:  [-9594.56523706 -8069.05606225  1599.52433425  -880.32621716]
Reward: -1  Episode Reward:  -276
xxxxx
xg ax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  2913.15516401 -2383.80019164   928.87797076]
------
Step:37, Action:South
State  138
Old Q Values:  [-1.39459256e+02 -1.36921747e+02 -3.22965309e-01  6.70518204e+02]
New Q values:  [-1.39459256e+02  6.78974989e+03 -3.22965309e-01  6.70518204e+02]
Reward: -1  Episode Reward:  -277
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22817.06196636  3554.88740901 -1925.08326713 -1455.65174173]
------
Step:38, Action:North
State  208
Old Q Values:  [22817.06196636  3554.88740901 -1925.08326713 -1455.65174173]
New Q values:  [10000.17133575  3554.88740901 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  -278
xxxxx
xg ax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  2913.15516401 -2383.80019164   928.87797076]
------
Step:39, Action:South
State  136
Old Q Values:  [-5281.21195651  2913.15516401 -2383.80019164   928.87797076]
New Q values:  [-5281.21195651  4164.71346633 -2383.80019164   928.87797076]
Reward: -1  Episode Reward:  -279
xxxxx
x g x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10000.17133575  3554.88740901 -1925.08326713 -1455.65174173]
------
Step:40, Action:North
State  208
Old Q Values:  [10000.17133575  3554.88740901 -1925.08326713 -1455.65174173]
New Q values:  [ 5248.8825742   3554.88740901 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  -280
xxxxx
xg ax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  4164.71346633 -2383.80019164   928.87797076]
------
Step:41, Action:South
State  138
Old Q Values:  [-1.39459256e+02  6.78974989e+03 -3.22965309e-01  6.70518204e+02]
New Q values:  [-1.39459256e+02  4.28996473e+03 -3.22965309e-01  6.70518204e+02]
Reward: -1  Episode Reward:  -281
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5248.8825742   3554.88740901 -1925.08326713 -1455.65174173]
------
Step:42, Action:North
State  208
Old Q Values:  [ 5248.8825742   3554.88740901 -1925.08326713 -1455.65174173]
New Q values:  [23025.86779607  3554.88740901 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  -282
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2315.80784304  -180.00807518 69756.38255464]
------
Step:43, Action:West
State  138
Old Q Values:  [-1.39459256e+02  4.28996473e+03 -3.22965309e-01  6.70518204e+02]
New Q values:  [-1.39459256e+02  4.28996473e+03 -3.22965309e-01  5.58660731e+02]
Reward: -1  Episode Reward:  -283
xxxxx
x a x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   970.17816634    69.13064323]
------
Step:44, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  3572.29807633   392.82457952]
New Q values:  [ -253.44886264 -1902.20915811  2715.30864914   392.82457952]
Reward: -1  Episode Reward:  -284
xxxxx
x  ax
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  4.28996473e+03 -3.22965309e-01  5.58660731e+02]
------
Step:45, Action:South
State  138
Old Q Values:  [-1.39459256e+02  4.28996473e+03 -3.22965309e-01  5.58660731e+02]
New Q values:  [-1.39459256e+02  2.83483039e+04 -3.22965309e-01  5.58660731e+02]
Reward: -1  Episode Reward:  -285
xxxxx
x   x
x .ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8.87763933e+04 2.47132583e+03 2.24233123e+03 3.52184257e+00]
------
Step:46, Action:North
State  208
Old Q Values:  [23025.86779607  3554.88740901 -1925.08326713 -1455.65174173]
New Q values:  [30136.66188482  3554.88740901 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  -286
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2315.80784304  -180.00807518 69756.38255464]
------
Step:47, Action:West
State  136
Old Q Values:  [-5281.21195651  4164.71346633 -2383.80019164   928.87797076]
New Q values:  [-5281.21195651  4164.71346633 -2383.80019164 -5149.19151142]
Reward: -10001  Episode Reward:  -10287
xxxxx
x g x
x . x
x   x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1222.47081715   -8.57207238 -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869 1543.92081737  353.8832415  -120.29354603]
New Q values:  [-177.44732869  813.86778075  353.8832415  -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  532.87991018   636.33151266 -4652.97823103   -30.99112081]
------
Step:2, Action:South
State  183
Old Q Values:  [ 478.96073712  284.0495848  9186.12537523    0.        ]
New Q values:  [ 478.96073712  657.75889141 9186.12537523    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x . x
x ..x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 381.7146909    26.73544252 1795.7968583   -35.88578819]
------
Step:3, Action:East
State  261
Old Q Values:  [ 381.7146909    26.73544252 1795.7968583   -35.88578819]
New Q values:  [ 381.7146909    26.73544252 4881.45413003  -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x . x
x .gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4651.95209012 13859.1179557 ]
------
Step:4, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  4651.95209012 13859.1179557 ]
New Q values:  [  37.74111519 -168.92307549 4651.95209012 7007.48342129]
Reward: -1  Episode Reward:  26
xxxxx
x . x
x ..x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 381.7146909    26.73544252 4881.45413003  -35.88578819]
------
Step:5, Action:East
State  261
Old Q Values:  [ 381.7146909    26.73544252 4881.45413003  -35.88578819]
New Q values:  [  381.7146909     26.73544252 -1252.67142908   -35.88578819]
Reward: -10001  Episode Reward:  -9975
xxxxx
x . x
x ..x
x g.x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9317.82306303  1795.87581346]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9317.82306303  1795.87581346]
New Q values:  [-2527.46239811 -8521.23367799  4790.15606791  1795.87581346]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3525.422809   -1609.84182322 -7525.7277781   3364.32510783]
------
Step:2, Action:North
State  288
Old Q Values:  [ 3525.422809   -1609.84182322 -7525.7277781   3364.32510783]
New Q values:  [10456.56768905 -1609.84182322 -7525.7277781   3364.32510783]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30136.66188482  3554.88740901 -1925.08326713 -1455.65174173]
------
Step:3, Action:North
State  210
Old Q Values:  [8.87763933e+04 2.47132583e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [4.40204485e+04 2.47132583e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  27
xxxxx
x .ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  2.83483039e+04 -3.22965309e-01  5.58660731e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [-1.39459256e+02  2.83483039e+04 -3.22965309e-01  5.58660731e+02]
New Q values:  [-1.39459256e+02  2.45448561e+04 -3.22965309e-01  5.58660731e+02]
Reward: -1  Episode Reward:  26
xxxxx
x . x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.40204485e+04 2.47132583e+03 2.24233123e+03 3.52184257e+00]
------
Step:5, Action:North
State  210
Old Q Values:  [4.40204485e+04 2.47132583e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [2.49710362e+04 2.47132583e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  25
xxxxx
x .ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  2.45448561e+04 -3.22965309e-01  5.58660731e+02]
------
Step:6, Action:South
State  138
Old Q Values:  [-1.39459256e+02  2.45448561e+04 -3.22965309e-01  5.58660731e+02]
New Q values:  [-1.39459256e+02  1.88583410e+04 -3.22965309e-01  5.58660731e+02]
Reward: -1  Episode Reward:  24
xxxxx
x . x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30136.66188482  3554.88740901 -1925.08326713 -1455.65174173]
------
Step:7, Action:North
State  210
Old Q Values:  [2.49710362e+04 2.47132583e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [1.56453168e+04 2.47132583e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  23
xxxxx
x .ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  1.88583410e+04 -3.22965309e-01  5.58660731e+02]
------
Step:8, Action:South
State  138
Old Q Values:  [-1.39459256e+02  1.88583410e+04 -3.22965309e-01  5.58660731e+02]
New Q values:  [-1.39459256e+02  1.22363314e+04 -3.22965309e-01  5.58660731e+02]
Reward: -1  Episode Reward:  22
xxxxx
x . x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.56453168e+04 2.47132583e+03 2.24233123e+03 3.52184257e+00]
------
Step:9, Action:North
State  208
Old Q Values:  [30136.66188482  3554.88740901 -1925.08326713 -1455.65174173]
New Q values:  [15724.96418612  3554.88740901 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  21
xxxxx
x .ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-1.39459256e+02  1.22363314e+04 -3.22965309e-01  5.58660731e+02]
------
Step:10, Action:West
State  138
Old Q Values:  [-1.39459256e+02  1.22363314e+04 -3.22965309e-01  5.58660731e+02]
New Q values:  [-1.39459256e+02  1.22363314e+04 -3.22965309e-01  1.04345689e+03]
Reward: 9  Episode Reward:  30
xxxxx
x a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2715.30864914   392.82457952]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   970.17816634    69.13064323]
New Q values:  [ -281.736      -3455.78276043  4058.37069873    69.13064323]
Reward: -1  Episode Reward:  29
xxxxx
x  ax
x.. x
x.g x
xxxxx
Step:12, Action:North
State  138
Old Q Values:  [-1.39459256e+02  1.22363314e+04 -3.22965309e-01  1.04345689e+03]
New Q values:  [ 3.43451573e+03  1.22363314e+04 -3.22965309e-01  1.04345689e+03]
Reward: -301  Episode Reward:  -272
xxxxx
x  ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.43451573e+03  1.22363314e+04 -3.22965309e-01  1.04345689e+03]
------
Step:13, Action:South
State  138
Old Q Values:  [ 3.43451573e+03  1.22363314e+04 -3.22965309e-01  1.04345689e+03]
New Q values:  [ 3.43451573e+03  3.61142183e+03 -3.22965309e-01  1.04345689e+03]
Reward: -10001  Episode Reward:  -10273
xxxxx
x   x
x..gx
x.  x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15724.96418612  3554.88740901 -1925.08326713 -1455.65174173]
------
Step:1, Action:North
State  216
Old Q Values:  [  251.87018622   372.98523174 -8489.43729461  7587.68259449]
New Q values:  [ 1355.56211439   372.98523174 -8489.43729461  7587.68259449]
Reward: 9  Episode Reward:  9
xxxxx
xg ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-5281.21195651  4164.71346633 -2383.80019164 -5149.19151142]
------
Step:2, Action:South
State  138
Old Q Values:  [ 3.43451573e+03  3.61142183e+03 -3.22965309e-01  1.04345689e+03]
New Q values:  [ 3.43451573e+03  3.72027351e+03 -3.22965309e-01  1.04345689e+03]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1355.56211439   372.98523174 -8489.43729461  7587.68259449]
------
Step:3, Action:West
State  216
Old Q Values:  [ 1355.56211439   372.98523174 -8489.43729461  7587.68259449]
New Q values:  [ 1355.56211439   372.98523174 -8489.43729461  5913.198426  ]
Reward: 9  Episode Reward:  17
xxxxx
xg  x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  9575.751294    425.06225586  408.67479662]
------
Step:4, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.25708831e+03 1.26361542e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 2.34528215e+03 1.26361542e+04 2.91043938e+03]
Reward: 9  Episode Reward:  26
xxxxx
x.  x
xg  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4790.15606791  1795.87581346]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4790.15606791  1795.87581346]
New Q values:  [-2527.46239811 -8521.23367799  5058.43273388  1795.87581346]
Reward: 9  Episode Reward:  35
xxxxx
x.  x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10456.56768905 -1609.84182322 -7525.7277781   3364.32510783]
------
Step:6, Action:North
State  288
Old Q Values:  [10456.56768905 -1609.84182322 -7525.7277781   3364.32510783]
New Q values:  [ 8899.51633146 -1609.84182322 -7525.7277781   3364.32510783]
Reward: -1  Episode Reward:  34
xxxxx
x.g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15724.96418612  3554.88740901 -1925.08326713 -1455.65174173]
------
Step:7, Action:North
State  208
Old Q Values:  [15724.96418612  3554.88740901 -1925.08326713 -1455.65174173]
New Q values:  [ 7405.4677278   3554.88740901 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  33
xxxxx
x. ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.43451573e+03  3.72027351e+03 -3.22965309e-01  1.04345689e+03]
------
Step:8, Action:South
State  138
Old Q Values:  [ 3.43451573e+03  3.72027351e+03 -3.22965309e-01  1.04345689e+03]
New Q values:  [ 3.43451573e+03 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
Reward: -10001  Episode Reward:  -9968
xxxxx
x.  x
x. gx
x.  x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.43451573e+03 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
------
Step:1, Action:North
State  136
Old Q Values:  [-5281.21195651  4164.71346633 -2383.80019164 -5149.19151142]
New Q values:  [-1043.6707427   4164.71346633 -2383.80019164 -5149.19151142]
Reward: -301  Episode Reward:  -301
xxxxx
x.gax
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   4164.71346633 -2383.80019164 -5149.19151142]
------
Step:2, Action:South
State  136
Old Q Values:  [-1043.6707427   4164.71346633 -2383.80019164 -5149.19151142]
New Q values:  [-1043.6707427   3892.92570487 -2383.80019164 -5149.19151142]
Reward: 9  Episode Reward:  -292
xxxxx
x..gx
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7405.4677278   3554.88740901 -1925.08326713 -1455.65174173]
------
Step:3, Action:South
State  208
Old Q Values:  [ 7405.4677278   3554.88740901 -1925.08326713 -1455.65174173]
New Q values:  [ 7405.4677278   4097.20986304 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  -283
xxxxx
x.. x
x. gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8899.51633146 -1609.84182322 -7525.7277781   3364.32510783]
------
Step:4, Action:West
State  288
Old Q Values:  [ 8899.51633146 -1609.84182322 -7525.7277781   3364.32510783]
New Q values:  [ 8899.51633146 -1609.84182322 -7525.7277781   3453.37506952]
Reward: 9  Episode Reward:  -274
xxxxx
x..gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4651.95209012 7007.48342129]
------
Step:5, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4651.95209012 7007.48342129]
New Q values:  [   37.74111519  -168.92307549  4651.95209012 14892.24309876]
Reward: 9  Episode Reward:  -265
xxxxx
x.. x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[40279.49910083  2256.66526474  4520.89517899  1875.31501677]
------
Step:6, Action:North
State  257
Old Q Values:  [40279.49910083  2256.66526474  4520.89517899  1875.31501677]
New Q values:  [39635.54528383  2256.66526474  4520.89517899  1875.31501677]
Reward: 9  Episode Reward:  -256
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 22487.97851491  5161.65126242     0.        ]
------
Step:7, Action:North
State  180
Old Q Values:  [ -144.49078665  1985.28273723  1116.79388132 -4966.32149798]
New Q values:  [  -50.84831466  1985.28273723  1116.79388132 -4966.32149798]
Reward: 9  Episode Reward:  -247
xxxxx
xa. x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[ -180.6       -2257.0253383     5.16       -180.6      ]
------
Step:8, Action:East
State  98
Old Q Values:  [    0.         41136.94667617 54934.04314511     0.        ]
New Q values:  [     0.          41136.94667617 121614.08308908      0.        ]
Reward: 100009  Episode Reward:  99762
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  4930.50346558 23527.39531294  1101.59744825]
------
Step:1, Action:East
State  193
Old Q Values:  [-5922.26708831  5868.45017799 -4885.64726759  1099.96026581]
New Q values:  [-5922.26708831  5868.45017799 -5727.2185887   1099.96026581]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. gx
x.. x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2715.30864914   392.82457952]
------
Step:1, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2715.30864914   392.82457952]
New Q values:  [ -253.44886264 -1902.20915811  2121.87817861   392.82457952]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.43451573e+03 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
------
Step:2, Action:North
State  138
Old Q Values:  [ 3.43451573e+03 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
New Q values:  [ 2.22356101e+03 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
Reward: -301  Episode Reward:  -292
xxxxx
x. ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.22356101e+03 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
------
Step:3, Action:North
State  138
Old Q Values:  [ 2.22356101e+03 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
New Q values:  [ 1.37589271e+03 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
Reward: -301  Episode Reward:  -593
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.37589271e+03 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
------
Step:4, Action:North
State  138
Old Q Values:  [ 1.37589271e+03 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
New Q values:  [ 7.82524895e+02 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
Reward: -301  Episode Reward:  -894
xxxxx
x. ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.82524895e+02 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
------
Step:5, Action:West
State  138
Old Q Values:  [ 7.82524895e+02 -2.29085028e+03 -3.22965309e-01  1.04345689e+03]
New Q values:  [ 7.82524895e+02 -2.29085028e+03 -3.22965309e-01  1.63429396e+03]
Reward: -1  Episode Reward:  -895
xxxxx
x.a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  4058.37069873    69.13064323]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  4058.37069873    69.13064323]
New Q values:  [ -281.736      -3455.78276043  2113.03646886    69.13064323]
Reward: -1  Episode Reward:  -896
xxxxx
x. ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.82524895e+02 -2.29085028e+03 -3.22965309e-01  1.63429396e+03]
------
Step:7, Action:West
State  138
Old Q Values:  [ 7.82524895e+02 -2.29085028e+03 -3.22965309e-01  1.63429396e+03]
New Q values:  [ 7.82524895e+02 -2.29085028e+03 -3.22965309e-01  1.28702853e+03]
Reward: -1  Episode Reward:  -897
xxxxx
x.a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  2113.03646886    69.13064323]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  2113.03646886    69.13064323]
New Q values:  [ -281.736      -3455.78276043  1230.72314549    69.13064323]
Reward: -1  Episode Reward:  -898
xxxxx
x. ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.82524895e+02 -2.29085028e+03 -3.22965309e-01  1.28702853e+03]
------
Step:9, Action:West
State  136
Old Q Values:  [-1043.6707427   3892.92570487 -2383.80019164 -5149.19151142]
New Q values:  [-1043.6707427   3892.92570487 -2383.80019164 -1580.41930429]
Reward: -1  Episode Reward:  -899
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1599.52433425  -880.32621716]
------
Step:10, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1599.52433425  -880.32621716]
New Q values:  [-9594.56523706 -8069.05606225  1807.08744516  -880.32621716]
Reward: -1  Episode Reward:  -900
xxxxx
x.gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   3892.92570487 -2383.80019164 -1580.41930429]
------
Step:11, Action:South
State  138
Old Q Values:  [ 7.82524895e+02 -2.29085028e+03 -3.22965309e-01  1.28702853e+03]
New Q values:  [ 7.82524895e+02  8.57019417e+02 -3.22965309e-01  1.28702853e+03]
Reward: -1  Episode Reward:  -901
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1355.56211439   372.98523174 -8489.43729461  5913.198426  ]
------
Step:12, Action:North
State  210
Old Q Values:  [1.56453168e+04 2.47132583e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [6.64363528e+03 2.47132583e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  -902
xxxxx
x. ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.82524895e+02  8.57019417e+02 -3.22965309e-01  1.28702853e+03]
------
Step:13, Action:West
State  138
Old Q Values:  [ 7.82524895e+02  8.57019417e+02 -3.22965309e-01  1.28702853e+03]
New Q values:  [ 7.82524895e+02  8.57019417e+02 -3.22965309e-01  8.83428354e+02]
Reward: -1  Episode Reward:  -903
xxxxx
x.a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  1230.72314549    69.13064323]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  1230.72314549    69.13064323]
New Q values:  [ -281.736      -3455.78276043   756.71776447    69.13064323]
Reward: -1  Episode Reward:  -904
xxxxx
x. ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.82524895e+02  8.57019417e+02 -3.22965309e-01  8.83428354e+02]
------
Step:15, Action:West
State  138
Old Q Values:  [ 7.82524895e+02  8.57019417e+02 -3.22965309e-01  8.83428354e+02]
New Q values:  [ 7.82524895e+02  8.57019417e+02 -3.22965309e-01  5.79786671e+02]
Reward: -1  Episode Reward:  -905
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   756.71776447    69.13064323]
------
Step:16, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1807.08744516  -880.32621716]
New Q values:  [-9594.56523706 -8069.05606225  1890.11268953  -880.32621716]
Reward: -1  Episode Reward:  -906
xxxxx
x.gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   3892.92570487 -2383.80019164 -1580.41930429]
------
Step:17, Action:South
State  136
Old Q Values:  [-1043.6707427   3892.92570487 -2383.80019164 -1580.41930429]
New Q values:  [-1043.6707427   3778.21060029 -2383.80019164 -1580.41930429]
Reward: -1  Episode Reward:  -907
xxxxx
x. gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7405.4677278   4097.20986304 -1925.08326713 -1455.65174173]
------
Step:18, Action:South
State  208
Old Q Values:  [ 7405.4677278   4097.20986304 -1925.08326713 -1455.65174173]
New Q values:  [ 7405.4677278   4314.13884465 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  -898
xxxxx
x.  x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8899.51633146 -1609.84182322 -7525.7277781   3453.37506952]
------
Step:19, Action:West
State  288
Old Q Values:  [ 8899.51633146 -1609.84182322 -7525.7277781   3453.37506952]
New Q values:  [ 8899.51633146 -1609.84182322 -7525.7277781   2904.27984797]
Reward: 9  Episode Reward:  -889
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5058.43273388  1795.87581346]
------
Step:20, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5058.43273388  1795.87581346]
New Q values:  [-2527.46239811 -8521.23367799  4692.62799299  1795.87581346]
Reward: -1  Episode Reward:  -890
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8899.51633146 -1609.84182322 -7525.7277781   2904.27984797]
------
Step:21, Action:North
State  288
Old Q Values:  [ 8899.51633146 -1609.84182322 -7525.7277781   2904.27984797]
New Q values:  [ 5552.29711516 -1609.84182322 -7525.7277781   2904.27984797]
Reward: -1  Episode Reward:  -891
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6.64363528e+03 2.47132583e+03 2.24233123e+03 3.52184257e+00]
------
Step:22, Action:North
State  208
Old Q Values:  [ 7405.4677278   4314.13884465 -1925.08326713 -1455.65174173]
New Q values:  [ 3218.6929162   4314.13884465 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  -892
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.82524895e+02  8.57019417e+02 -3.22965309e-01  5.79786671e+02]
------
Step:23, Action:South
State  138
Old Q Values:  [ 7.82524895e+02  8.57019417e+02 -3.22965309e-01  5.79786671e+02]
New Q values:  [ 7.82524895e+02  1.63644942e+03 -3.22965309e-01  5.79786671e+02]
Reward: -1  Episode Reward:  -893
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3218.6929162   4314.13884465 -1925.08326713 -1455.65174173]
------
Step:24, Action:South
State  208
Old Q Values:  [ 3218.6929162   4314.13884465 -1925.08326713 -1455.65174173]
New Q values:  [ 3218.6929162   3390.74467241 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  -894
xxxxx
x.  x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5552.29711516 -1609.84182322 -7525.7277781   2904.27984797]
------
Step:25, Action:West
State  288
Old Q Values:  [ 5552.29711516 -1609.84182322 -7525.7277781   2904.27984797]
New Q values:  [ 5552.29711516 -1609.84182322 -7525.7277781   5628.78486882]
Reward: -1  Episode Reward:  -895
xxxxx
x.  x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4651.95209012 14892.24309876]
------
Step:26, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4692.62799299  1795.87581346]
New Q values:  [-2527.46239811 -8521.23367799  4692.62799299 12614.41391053]
Reward: 9  Episode Reward:  -886
xxxxx
x.  x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39635.54528383  2256.66526474  4520.89517899  1875.31501677]
------
Step:27, Action:North
State  256
Old Q Values:  [54653.47771857 18175.54694409  5576.40109469   644.94785455]
New Q values:  [48225.76268973 18175.54694409  5576.40109469   644.94785455]
Reward: 9  Episode Reward:  -877
xxxxx
x.  x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:28, Action:East
State  176
Old Q Values:  [ 76485.61294353   1327.79507613 104031.59621135      0.        ]
New Q values:  [76485.61294353  1327.79507613 45408.88473875     0.        ]
Reward: 9  Episode Reward:  -868
xxxxx
x.  x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.34528215e+03 1.26361542e+04 2.91043938e+03]
------
Step:29, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  2.08376358e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  1.03275449e+04  1.20371620e+03]
Reward: -1  Episode Reward:  -869
xxxxx
x.  x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6.64363528e+03 2.47132583e+03 2.24233123e+03 3.52184257e+00]
------
Step:30, Action:North
State  208
Old Q Values:  [ 3218.6929162   3390.74467241 -1925.08326713 -1455.65174173]
New Q values:  [22213.79193287  3390.74467241 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  -870
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2315.80784304  -180.00807518 69756.38255464]
------
Step:31, Action:West
State  128
Old Q Values:  [ 8775.70846068 19747.37279955 -8652.84       18015.67011096]
New Q values:  [ 8775.70846068 19747.37279955 -8652.84       37159.2116207 ]
Reward: -1  Episode Reward:  -871
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[    0.          6743.83055328  6789.02994987 99845.1452544 ]
------
Step:32, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   27894.105875   132116.88610344]
New Q values:  [  -180.6          3557.6642036   32083.9571164  132116.88610344]
Reward: -1  Episode Reward:  -872
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283  2315.80784304  -180.00807518 69756.38255464]
------
Step:33, Action:West
State  130
Old Q Values:  [36041.91667283  2315.80784304  -180.00807518 69756.38255464]
New Q values:  [36041.91667283  2315.80784304  -180.00807518 67537.01885289]
Reward: -1  Episode Reward:  -873
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132116.88610344]
------
Step:34, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   32083.9571164  132116.88610344]
New Q values:  [  -180.6          3557.6642036   32083.9571164  132562.29335007]
Reward: 100009  Episode Reward:  99136
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  1.03275449e+04  1.20371620e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.34528215e+03 1.26361542e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 2.34528215e+03 1.17239993e+04 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22213.79193287  3390.74467241 -1925.08326713 -1455.65174173]
------
Step:2, Action:North
State  208
Old Q Values:  [22213.79193287  3390.74467241 -1925.08326713 -1455.65174173]
New Q values:  [ 9381.8515992   3390.74467241 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.82524895e+02  1.63644942e+03 -3.22965309e-01  5.79786671e+02]
------
Step:3, Action:South
State  138
Old Q Values:  [ 7.82524895e+02  1.63644942e+03 -3.22965309e-01  5.79786671e+02]
New Q values:  [ 7.82524895e+02 -2.53146475e+03 -3.22965309e-01  5.79786671e+02]
Reward: -10001  Episode Reward:  -9983
xxxxx
x.. x
x. gx
x ..x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.82524895e+02 -2.53146475e+03 -3.22965309e-01  5.79786671e+02]
------
Step:1, Action:North
State  138
Old Q Values:  [ 7.82524895e+02 -2.53146475e+03 -3.22965309e-01  5.79786671e+02]
New Q values:  [ 3.67167427e+02 -2.53146475e+03 -3.22965309e-01  5.79786671e+02]
Reward: -301  Episode Reward:  -301
xxxxx
x..ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -2.53146475e+03 -3.22965309e-01  5.79786671e+02]
------
Step:2, Action:West
State  136
Old Q Values:  [-1043.6707427   3778.21060029 -2383.80019164 -1580.41930429]
New Q values:  [-1043.6707427   3778.21060029 -2383.80019164  -533.45351893]
Reward: 9  Episode Reward:  -292
xxxxx
x.agx
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:3, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2121.87817861   392.82457952]
New Q values:  [ -253.44886264 -1902.20915811  2121.87817861   453.58409043]
Reward: 9  Episode Reward:  -283
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  970.18086207   69.96901366 -252.78192178]
------
Step:4, Action:South
State  109
Old Q Values:  [-241.10880094 1222.47081715   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  685.28778066   -8.57207238 -180.6       ]
Reward: 9  Episode Reward:  -274
xxxxx
x  gx
xa..x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  532.87991018   636.33151266 -4652.97823103   -30.99112081]
------
Step:5, Action:South
State  181
Old Q Values:  [  532.87991018   636.33151266 -4652.97823103   -30.99112081]
New Q values:  [  532.87991018   374.44701234 -4652.97823103   -30.99112081]
Reward: 9  Episode Reward:  -265
xxxxx
x g x
x ..x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  381.7146909     26.73544252 -1252.67142908   -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [  381.7146909     26.73544252 -1252.67142908   -35.88578819]
New Q values:  [  311.94984941    26.73544252 -1252.67142908   -35.88578819]
Reward: -1  Episode Reward:  -266
xxxxx
x  gx
xa..x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  532.87991018   374.44701234 -4652.97823103   -30.99112081]
------
Step:7, Action:North
State  181
Old Q Values:  [  532.87991018   374.44701234 -4652.97823103   -30.99112081]
New Q values:  [  418.13829827   374.44701234 -4652.97823103   -30.99112081]
Reward: -1  Episode Reward:  -267
xxxxx
xag x
x ..x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  685.28778066   -8.57207238 -180.6       ]
------
Step:8, Action:South
State  108
Old Q Values:  [-8463.16477134   851.67531979   982.76041352     0.        ]
New Q values:  [-8463.16477134   935.65494908   982.76041352     0.        ]
Reward: -1  Episode Reward:  -268
xxxxx
xg  x
xa..x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  -50.84831466  1985.28273723  1116.79388132 -4966.32149798]
------
Step:9, Action:South
State  181
Old Q Values:  [  418.13829827   374.44701234 -4652.97823103   -30.99112081]
New Q values:  [  418.13829827   242.76375976 -4652.97823103   -30.99112081]
Reward: -1  Episode Reward:  -269
xxxxx
x g x
x ..x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  311.94984941    26.73544252 -1252.67142908   -35.88578819]
------
Step:10, Action:North
State  261
Old Q Values:  [  311.94984941    26.73544252 -1252.67142908   -35.88578819]
New Q values:  [  249.62142925    26.73544252 -1252.67142908   -35.88578819]
Reward: -1  Episode Reward:  -270
xxxxx
x  gx
xa..x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  418.13829827   242.76375976 -4652.97823103   -30.99112081]
------
Step:11, Action:North
State  181
Old Q Values:  [  418.13829827   242.76375976 -4652.97823103   -30.99112081]
New Q values:  [  372.24165351   242.76375976 -4652.97823103   -30.99112081]
Reward: -1  Episode Reward:  -271
xxxxx
xag x
x ..x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  685.28778066   -8.57207238 -180.6       ]
------
Step:12, Action:South
State  109
Old Q Values:  [-241.10880094  685.28778066   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  385.18760832   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  -272
xxxxx
x  gx
xa..x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  372.24165351   242.76375976 -4652.97823103   -30.99112081]
------
Step:13, Action:North
State  181
Old Q Values:  [  372.24165351   242.76375976 -4652.97823103   -30.99112081]
New Q values:  [  392.45699563   242.76375976 -4652.97823103   -30.99112081]
Reward: -1  Episode Reward:  -273
xxxxx
xa  x
x .gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  813.86778075  353.8832415  -120.29354603]
------
Step:14, Action:South
State  111
Old Q Values:  [-177.44732869  813.86778075  353.8832415  -120.29354603]
New Q values:  [-177.44732869 3080.78472487  353.8832415  -120.29354603]
Reward: -1  Episode Reward:  -274
xxxxx
x   x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 478.96073712  657.75889141 9186.12537523    0.        ]
------
Step:15, Action:East
State  189
Old Q Values:  [   9.84673294  881.83651799 1350.08508362  154.04646645]
New Q values:  [   9.84673294  881.83651799 2236.55358506  154.04646645]
Reward: 9  Episode Reward:  -265
xxxxx
x   x
x agx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  5.63706517e+03 -8.94356769e+03  2.00341972e+02]
------
Step:16, Action:South
State  199
Old Q Values:  [  14.86214194 9602.36008829  549.89931413 1915.70494401]
New Q values:  [  14.86214194 3981.2013878   549.89931413 1915.70494401]
Reward: 9  Episode Reward:  -256
xxxxx
x   x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         -29.77444073 449.52450828]
------
Step:17, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  4651.95209012 14892.24309876]
New Q values:  [   37.74111519  -168.92307549  4651.95209012 17846.96082465]
Reward: -1  Episode Reward:  -257
xxxxx
x   x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39635.54528383  2256.66526474  4520.89517899  1875.31501677]
------
Step:18, Action:North
State  261
Old Q Values:  [  249.62142925    26.73544252 -1252.67142908   -35.88578819]
New Q values:  [  216.98567039    26.73544252 -1252.67142908   -35.88578819]
Reward: -1  Episode Reward:  -258
xxxxx
x   x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  392.45699563   242.76375976 -4652.97823103   -30.99112081]
------
Step:19, Action:North
State  180
Old Q Values:  [  -50.84831466  1985.28273723  1116.79388132 -4966.32149798]
New Q values:  [   99.04174147  1985.28273723  1116.79388132 -4966.32149798]
Reward: -1  Episode Reward:  -259
xxxxx
xa  x
xg .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -1921.51796559   399.93689112  -180.6       ]
------
Step:20, Action:East
State  108
Old Q Values:  [-8463.16477134   935.65494908   982.76041352     0.        ]
New Q values:  [-8463.16477134   935.65494908   897.8432354      0.        ]
Reward: -1  Episode Reward:  -260
xxxxx
xga x
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 1684.46356666  963.6944397 ]
------
Step:21, Action:East
State  124
Old Q Values:  [   0.         1166.51141701 1684.46356666  963.6944397 ]
New Q values:  [   0.         1166.51141701 1806.64860675  963.6944397 ]
Reward: -1  Episode Reward:  -261
xxxxx
x gax
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   3778.21060029 -2383.80019164  -533.45351893]
------
Step:22, Action:South
State  130
Old Q Values:  [36041.91667283  2315.80784304  -180.00807518 67537.01885289]
New Q values:  [36041.91667283 63746.27861698  -180.00807518 67537.01885289]
Reward: 100009  Episode Reward:  99748
xxxxx
x   x
x gax
x   x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  216.98567039    26.73544252 -1252.67142908   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [  216.98567039    26.73544252 -1252.67142908   -35.88578819]
New Q values:  [  209.93136684    26.73544252 -1252.67142908   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  392.45699563   242.76375976 -4652.97823103   -30.99112081]
------
Step:2, Action:North
State  181
Old Q Values:  [  392.45699563   242.76375976 -4652.97823103   -30.99112081]
New Q values:  [ 1086.61821571   242.76375976 -4652.97823103   -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
xa. x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3080.78472487  353.8832415  -120.29354603]
------
Step:3, Action:South
State  109
Old Q Values:  [-241.10880094  385.18760832   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  479.46050804   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1086.61821571   242.76375976 -4652.97823103   -30.99112081]
------
Step:4, Action:North
State  181
Old Q Values:  [ 1086.61821571   242.76375976 -4652.97823103   -30.99112081]
New Q values:  [ 1358.28270375   242.76375976 -4652.97823103   -30.99112081]
Reward: -1  Episode Reward:  16
xxxxx
xa. x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3080.78472487  353.8832415  -120.29354603]
------
Step:5, Action:South
State  110
Old Q Values:  [ -239.29051573 -1921.51796559   399.93689112  -180.6       ]
New Q values:  [ -239.29051573 -6173.62236507   399.93689112  -180.6       ]
Reward: -10001  Episode Reward:  -9985
xxxxx
x . x
xg..x
x ..x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   756.71776447    69.13064323]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   756.71776447    69.13064323]
New Q values:  [ -281.736      -3455.78276043   482.0231071     69.13064323]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -2.53146475e+03 -3.22965309e-01  5.79786671e+02]
------
Step:2, Action:West
State  136
Old Q Values:  [-1043.6707427   3778.21060029 -2383.80019164  -533.45351893]
New Q values:  [-1043.6707427   3778.21060029 -2383.80019164   353.05239929]
Reward: -1  Episode Reward:  8
xxxxx
xga x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1890.11268953  -880.32621716]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   482.0231071     69.13064323]
New Q values:  [ -281.736      -3455.78276043   366.14524415    69.13064323]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -2.53146475e+03 -3.22965309e-01  5.79786671e+02]
------
Step:4, Action:West
State  136
Old Q Values:  [-1043.6707427   3778.21060029 -2383.80019164   353.05239929]
New Q values:  [-1043.6707427   3778.21060029 -2383.80019164   707.65476657]
Reward: -1  Episode Reward:  6
xxxxx
xga x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1890.11268953  -880.32621716]
------
Step:5, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1890.11268953  -880.32621716]
New Q values:  [-9594.56523706 -8069.05606225  1888.9082559   -880.32621716]
Reward: -1  Episode Reward:  5
xxxxx
x.gax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   3778.21060029 -2383.80019164   707.65476657]
------
Step:6, Action:South
State  136
Old Q Values:  [-1043.6707427   3778.21060029 -2383.80019164   707.65476657]
New Q values:  [-1043.6707427   4331.23971988 -2383.80019164   707.65476657]
Reward: 9  Episode Reward:  14
xxxxx
xg  x
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9381.8515992   3390.74467241 -1925.08326713 -1455.65174173]
------
Step:7, Action:North
State  208
Old Q Values:  [ 9381.8515992   3390.74467241 -1925.08326713 -1455.65174173]
New Q values:  [ 5051.51255564  3390.74467241 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  13
xxxxx
x.gax
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   4331.23971988 -2383.80019164   707.65476657]
------
Step:8, Action:South
State  138
Old Q Values:  [ 3.67167427e+02 -2.53146475e+03 -3.22965309e-01  5.79786671e+02]
New Q values:  [ 3.67167427e+02  5.02267866e+02 -3.22965309e-01  5.79786671e+02]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5051.51255564  3390.74467241 -1925.08326713 -1455.65174173]
------
Step:9, Action:North
State  208
Old Q Values:  [ 5051.51255564  3390.74467241 -1925.08326713 -1455.65174173]
New Q values:  [ 2193.94102357  3390.74467241 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  5.02267866e+02 -3.22965309e-01  5.79786671e+02]
------
Step:10, Action:West
State  136
Old Q Values:  [-1043.6707427   4331.23971988 -2383.80019164   707.65476657]
New Q values:  [-1043.6707427   4331.23971988 -2383.80019164   849.1343834 ]
Reward: -1  Episode Reward:  10
xxxxx
xga x
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1888.9082559   -880.32621716]
------
Step:11, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1888.9082559   -880.32621716]
New Q values:  [-9594.56523706 -8069.05606225  2054.33521832  -880.32621716]
Reward: -1  Episode Reward:  9
xxxxx
x.gax
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   4331.23971988 -2383.80019164   849.1343834 ]
------
Step:12, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  5.02267866e+02 -3.22965309e-01  5.79786671e+02]
New Q values:  [ 3.67167427e+02  1.21753055e+03 -3.22965309e-01  5.79786671e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2193.94102357  3390.74467241 -1925.08326713 -1455.65174173]
------
Step:13, Action:South
State  208
Old Q Values:  [ 2193.94102357  3390.74467241 -1925.08326713 -1455.65174173]
New Q values:  [ 2193.94102357  3050.33332961 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  17
xxxxx
x.g x
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5552.29711516 -1609.84182322 -7525.7277781   5628.78486882]
------
Step:14, Action:West
State  288
Old Q Values:  [ 5552.29711516 -1609.84182322 -7525.7277781   5628.78486882]
New Q values:  [ 5552.29711516 -1609.84182322 -7525.7277781   6041.23812069]
Reward: 9  Episode Reward:  26
xxxxx
xg  x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4692.62799299 12614.41391053]
------
Step:15, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4692.62799299 12614.41391053]
New Q values:  [-2527.46239811 -8521.23367799  4692.62799299 19512.89437113]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[48225.76268973 18175.54694409  5576.40109469   644.94785455]
------
Step:16, Action:South
State  256
Old Q Values:  [48225.76268973 18175.54694409  5576.40109469   644.94785455]
New Q values:  [48225.76268973 15557.34758455  5576.40109469   644.94785455]
Reward: -10301  Episode Reward:  -10276
xxxxx
x.  x
x.. x
xg  x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3080.78472487  353.8832415  -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [-180.6        4189.04386952    5.4           0.        ]
New Q values:  [-180.6        4436.85516038    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 478.96073712  657.75889141 9186.12537523    0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [ 1358.28270375   242.76375976 -4652.97823103   -30.99112081]
New Q values:  [1358.28270375  242.76375976  -95.25623902  -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5868.45017799 -5727.2185887   1099.96026581]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831  5868.45017799 -5727.2185887   1099.96026581]
New Q values:  [-5922.26708831  7700.86831859 -5727.2185887   1099.96026581]
Reward: -1  Episode Reward:  17
xxxxx
x .gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4651.95209012 17846.96082465]
------
Step:4, Action:West
State  277
Old Q Values:  [  1.64433      0.         -29.77444073 449.52450828]
New Q values:  [  1.64433      0.         -29.77444073 248.18921337]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  209.93136684    26.73544252 -1252.67142908   -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [  209.93136684    26.73544252 -1252.67142908   -35.88578819]
New Q values:  [ 2839.21015931    26.73544252 -1252.67142908   -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 478.96073712  657.75889141 9186.12537523    0.        ]
------
Step:6, Action:East
State  181
Old Q Values:  [1358.28270375  242.76375976  -95.25623902  -30.99112081]
New Q values:  [1358.28270375  242.76375976 2271.55799997  -30.99112081]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  7700.86831859 -5727.2185887   1099.96026581]
------
Step:7, Action:South
State  193
Old Q Values:  [-5922.26708831  7700.86831859 -5727.2185887   1099.96026581]
New Q values:  [-5922.26708831  3154.20409145 -5727.2185887   1099.96026581]
Reward: -1  Episode Reward:  23
xxxxx
x .gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         -29.77444073 248.18921337]
------
Step:8, Action:West
State  277
Old Q Values:  [  1.64433      0.         -29.77444073 248.18921337]
New Q values:  [  1.64433      0.         -29.77444073 950.43873314]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2839.21015931    26.73544252 -1252.67142908   -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [ 2839.21015931    26.73544252 -1252.67142908   -35.88578819]
New Q values:  [ 3890.92167629    26.73544252 -1252.67142908   -35.88578819]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 478.96073712  657.75889141 9186.12537523    0.        ]
------
Step:10, Action:East
State  183
Old Q Values:  [ 478.96073712  657.75889141 9186.12537523    0.        ]
New Q values:  [ 478.96073712  657.75889141 7458.50205899    0.        ]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -6.80462050e+03  1.26155064e+04  0.00000000e+00]
------
Step:11, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.34528215e+03 1.17239993e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 2.34528215e+03 5.61009970e+03 2.91043938e+03]
Reward: 9  Episode Reward:  29
xxxxx
x ..x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2193.94102357  3050.33332961 -1925.08326713 -1455.65174173]
------
Step:12, Action:South
State  208
Old Q Values:  [ 2193.94102357  3050.33332961 -1925.08326713 -1455.65174173]
New Q values:  [ 2193.94102357  3037.90476805 -1925.08326713 -1455.65174173]
Reward: 9  Episode Reward:  38
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5552.29711516 -1609.84182322 -7525.7277781   6041.23812069]
------
Step:13, Action:West
State  288
Old Q Values:  [ 5552.29711516 -1609.84182322 -7525.7277781   6041.23812069]
New Q values:  [ 5552.29711516 -1609.84182322 -7525.7277781   8269.76355961]
Reward: -1  Episode Reward:  37
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4692.62799299 19512.89437113]
------
Step:14, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2534.24027581   410.08818701]
New Q values:  [   16.82637525 -5807.06396197  2534.24027581  1330.71177769]
Reward: -1  Episode Reward:  36
xxxxx
x g.x
x   x
xa  x
xxxxx
Step:15, Action:East
State  261
Old Q Values:  [ 3890.92167629    26.73544252 -1252.67142908   -35.88578819]
New Q values:  [3890.92167629   26.73544252 4852.41967576  -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  4651.95209012 17846.96082465]
------
Step:16, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  4651.95209012 17846.96082465]
New Q values:  [  37.74111519 -168.92307549 4651.95209012 8593.91023259]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3890.92167629   26.73544252 4852.41967576  -35.88578819]
------
Step:17, Action:East
State  261
Old Q Values:  [3890.92167629   26.73544252 4852.41967576  -35.88578819]
New Q values:  [3890.92167629   26.73544252 4518.54094008  -35.88578819]
Reward: -1  Episode Reward:  33
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4651.95209012 8593.91023259]
------
Step:18, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4651.95209012 8593.91023259]
New Q values:  [  37.74111519 -168.92307549 4651.95209012 4792.52637506]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3890.92167629   26.73544252 4518.54094008  -35.88578819]
------
Step:19, Action:East
State  261
Old Q Values:  [3890.92167629   26.73544252 4518.54094008  -35.88578819]
New Q values:  [3890.92167629   26.73544252 3244.57428855  -35.88578819]
Reward: -1  Episode Reward:  31
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4651.95209012 4792.52637506]
------
Step:20, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4651.95209012 4792.52637506]
New Q values:  [  37.74111519 -168.92307549 4651.95209012 3083.68705291]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3890.92167629   26.73544252 3244.57428855  -35.88578819]
------
Step:21, Action:North
State  261
Old Q Values:  [3890.92167629   26.73544252 3244.57428855  -35.88578819]
New Q values:  [2237.23607051   26.73544252 3244.57428855  -35.88578819]
Reward: -1  Episode Reward:  29
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1358.28270375  242.76375976 2271.55799997  -30.99112081]
------
Step:22, Action:East
State  181
Old Q Values:  [1358.28270375  242.76375976 2271.55799997  -30.99112081]
New Q values:  [1358.28270375  242.76375976 2135.87853873  -30.99112081]
Reward: -1  Episode Reward:  28
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   471.55277317  4092.85112914   239.04887894]
------
Step:23, Action:East
State  193
Old Q Values:  [-5922.26708831  3154.20409145 -5727.2185887   1099.96026581]
New Q values:  [-5922.26708831  3154.20409145 -1380.11600506  1099.96026581]
Reward: -1  Episode Reward:  27
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2193.94102357  3037.90476805 -1925.08326713 -1455.65174173]
------
Step:24, Action:South
State  208
Old Q Values:  [ 2193.94102357  3037.90476805 -1925.08326713 -1455.65174173]
New Q values:  [ 2193.94102357  3695.4909751  -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5552.29711516 -1609.84182322 -7525.7277781   8269.76355961]
------
Step:25, Action:West
State  288
Old Q Values:  [ 5552.29711516 -1609.84182322 -7525.7277781   8269.76355961]
New Q values:  [ 5552.29711516 -1609.84182322 -7525.7277781   4702.89105088]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4651.95209012 3083.68705291]
------
Step:26, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 4651.95209012 3083.68705291]
New Q values:  [  37.74111519 -168.92307549 4651.95209012 2206.24710773]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2237.23607051   26.73544252 3244.57428855  -35.88578819]
------
Step:27, Action:East
State  261
Old Q Values:  [2237.23607051   26.73544252 3244.57428855  -35.88578819]
New Q values:  [2237.23607051   26.73544252 2692.81534246  -35.88578819]
Reward: -1  Episode Reward:  23
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4651.95209012 2206.24710773]
------
Step:28, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2534.24027581  1330.71177769]
New Q values:  [   16.82637525 -5807.06396197  2678.78524487  1330.71177769]
Reward: -1  Episode Reward:  22
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5552.29711516 -1609.84182322 -7525.7277781   4702.89105088]
------
Step:29, Action:North
State  288
Old Q Values:  [ 5552.29711516 -1609.84182322 -7525.7277781   4702.89105088]
New Q values:  [ 3328.9661386  -1609.84182322 -7525.7277781   4702.89105088]
Reward: -1  Episode Reward:  21
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2193.94102357  3695.4909751  -1925.08326713 -1455.65174173]
------
Step:30, Action:South
State  208
Old Q Values:  [ 2193.94102357  3695.4909751  -1925.08326713 -1455.65174173]
New Q values:  [ 2193.94102357  2888.46370531 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  20
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3328.9661386  -1609.84182322 -7525.7277781   4702.89105088]
------
Step:31, Action:West
State  288
Old Q Values:  [ 3328.9661386  -1609.84182322 -7525.7277781   4702.89105088]
New Q values:  [ 3328.9661386  -1609.84182322 -7525.7277781   3276.14204739]
Reward: -1  Episode Reward:  19
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 4651.95209012 2206.24710773]
------
Step:32, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 4651.95209012 2206.24710773]
New Q values:  [  37.74111519 -168.92307549 2858.87067763 2206.24710773]
Reward: -1  Episode Reward:  18
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3328.9661386  -1609.84182322 -7525.7277781   3276.14204739]
------
Step:33, Action:West
State  288
Old Q Values:  [ 3328.9661386  -1609.84182322 -7525.7277781   3276.14204739]
New Q values:  [ 3328.9661386  -1609.84182322 -7525.7277781   2167.51802224]
Reward: -1  Episode Reward:  17
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2858.87067763 2206.24710773]
------
Step:34, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 2858.87067763 2206.24710773]
New Q values:  [  37.74111519 -168.92307549 2141.63811263 2206.24710773]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3328.9661386  -1609.84182322 -7525.7277781   2167.51802224]
------
Step:35, Action:West
State  288
Old Q Values:  [ 3328.9661386  -1609.84182322 -7525.7277781   2167.51802224]
New Q values:  [ 3328.9661386  -1609.84182322 -7525.7277781   1528.28134122]
Reward: -1  Episode Reward:  15
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2141.63811263 2206.24710773]
------
Step:36, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2678.78524487  1330.71177769]
New Q values:  [   16.82637525 -5807.06396197  2678.78524487  1339.52931381]
Reward: -1  Episode Reward:  14
xxxxx
x g.x
x   x
xa  x
xxxxx
Step:37, Action:South
State  260
Old Q Values:  [ 2278.98816136 -8695.4397473   3077.70944603 -2601.74710518]
New Q values:  [ 2278.98816136 -2735.46306511  3077.70944603 -2601.74710518]
Reward: -301  Episode Reward:  -287
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2278.98816136 -2735.46306511  3077.70944603 -2601.74710518]
------
Step:38, Action:East
State  260
Old Q Values:  [ 2278.98816136 -2735.46306511  3077.70944603 -2601.74710518]
New Q values:  [ 2278.98816136 -2735.46306511  7084.35208975 -2601.74710518]
Reward: -1  Episode Reward:  -288
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4692.62799299 19512.89437113]
------
Step:39, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4692.62799299 19512.89437113]
New Q values:  [-2527.46239811 -8521.23367799  4692.62799299  8612.40235119]
Reward: -1  Episode Reward:  -289
xxxxx
x ..x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2237.23607051   26.73544252 2692.81534246  -35.88578819]
------
Step:40, Action:East
State  261
Old Q Values:  [2237.23607051   26.73544252 2692.81534246  -35.88578819]
New Q values:  [ 2237.23607051    26.73544252 -2339.75315766   -35.88578819]
Reward: -10001  Episode Reward:  -10290
xxxxx
x ..x
x   x
x g x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4692.62799299  8612.40235119]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4692.62799299  8612.40235119]
New Q values:  [-2527.46239811 -8521.23367799  2881.14103877  8612.40235119]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
x gax
xxxxx
Step:2, Action:East
State  288
Old Q Values:  [ 3328.9661386  -1609.84182322 -7525.7277781   1528.28134122]
New Q values:  [ 3328.9661386  -1609.84182322 -8192.20126966  1528.28134122]
Reward: -10301  Episode Reward:  -10292
xxxxx
x...x
x...x
x  gx
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   4331.23971988 -2383.80019164   849.1343834 ]
------
Step:1, Action:South
State  136
Old Q Values:  [-1043.6707427   4331.23971988 -2383.80019164   849.1343834 ]
New Q values:  [-1043.6707427   3511.85541575 -2383.80019164   849.1343834 ]
Reward: 9  Episode Reward:  9
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1355.56211439   372.98523174 -8489.43729461  5913.198426  ]
------
Step:2, Action:West
State  216
Old Q Values:  [ 1355.56211439   372.98523174 -8489.43729461  5913.198426  ]
New Q values:  [ 1355.56211439   372.98523174 -8489.43729461  4053.70928032]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.34528215e+03 5.61009970e+03 2.91043938e+03]
------
Step:3, Action:East
State  200
Old Q Values:  [  62.8218634  9575.751294    425.06225586  408.67479662]
New Q values:  [  62.8218634  9575.751294   1385.53768644  408.67479662]
Reward: -1  Episode Reward:  17
xxxxx
xg  x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1355.56211439   372.98523174 -8489.43729461  4053.70928032]
------
Step:4, Action:West
State  216
Old Q Values:  [ 1355.56211439   372.98523174 -8489.43729461  4053.70928032]
New Q values:  [ 1355.56211439   372.98523174 -8489.43729461  3303.91362205]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.34528215e+03 5.61009970e+03 2.91043938e+03]
------
Step:5, Action:East
State  193
Old Q Values:  [-5922.26708831  3154.20409145 -1380.11600506  1099.96026581]
New Q values:  [-5922.26708831  3154.20409145   438.52768459  1099.96026581]
Reward: -1  Episode Reward:  15
xxxxx
x. gx
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1355.56211439   372.98523174 -8489.43729461  3303.91362205]
------
Step:6, Action:West
State  216
Old Q Values:  [ 1355.56211439   372.98523174 -8489.43729461  3303.91362205]
New Q values:  [ 1355.56211439   372.98523174 -8489.43729461  2267.22667625]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3154.20409145   438.52768459  1099.96026581]
------
Step:7, Action:South
State  195
Old Q Values:  [   38.85388605  4930.50346558 23527.39531294  1101.59744825]
New Q values:  [   38.85388605  2639.47551855 23527.39531294  1101.59744825]
Reward: 9  Episode Reward:  23
xxxxx
x.  x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2141.63811263 2206.24710773]
------
Step:8, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2141.63811263 2206.24710773]
New Q values:  [   37.74111519  -168.92307549  2141.63811263 12778.56242824]
Reward: 9  Episode Reward:  32
xxxxx
x.  x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39635.54528383  2256.66526474  4520.89517899  1875.31501677]
------
Step:9, Action:North
State  257
Old Q Values:  [39635.54528383  2256.66526474  4520.89517899  1875.31501677]
New Q values:  [27911.19734055  2256.66526474  4520.89517899  1875.31501677]
Reward: 9  Episode Reward:  41
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         17203.58890013 40171.93075675     0.        ]
------
Step:10, Action:East
State  179
Old Q Values:  [    0.         17203.58890013 40171.93075675     0.        ]
New Q values:  [    0.         17203.58890013 19166.43576748     0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x.  x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  1.03275449e+04  1.20371620e+03]
------
Step:11, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.34528215e+03 5.61009970e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.34528215e+03 3.10997899e+03 2.91043938e+03]
Reward: -1  Episode Reward:  39
xxxxx
x.  x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2193.94102357  2888.46370531 -1925.08326713 -1455.65174173]
------
Step:12, Action:South
State  210
Old Q Values:  [6.64363528e+03 2.47132583e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [6.64363528e+03 1.99262018e+03 2.24233123e+03 3.52184257e+00]
Reward: 9  Episode Reward:  48
xxxxx
x.  x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3328.9661386  -1609.84182322 -8192.20126966  1528.28134122]
------
Step:13, Action:North
State  288
Old Q Values:  [ 3328.9661386  -1609.84182322 -8192.20126966  1528.28134122]
New Q values:  [ 2197.52556703 -1609.84182322 -8192.20126966  1528.28134122]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2193.94102357  2888.46370531 -1925.08326713 -1455.65174173]
------
Step:14, Action:South
State  210
Old Q Values:  [6.64363528e+03 1.99262018e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [6.64363528e+03 1.45570574e+03 2.24233123e+03 3.52184257e+00]
Reward: -1  Episode Reward:  46
xxxxx
x.  x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2197.52556703 -1609.84182322 -8192.20126966  1528.28134122]
------
Step:15, Action:North
State  288
Old Q Values:  [ 2197.52556703 -1609.84182322 -8192.20126966  1528.28134122]
New Q values:  [ 1744.9493384  -1609.84182322 -8192.20126966  1528.28134122]
Reward: -1  Episode Reward:  45
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2193.94102357  2888.46370531 -1925.08326713 -1455.65174173]
------
Step:16, Action:South
State  208
Old Q Values:  [ 2193.94102357  2888.46370531 -1925.08326713 -1455.65174173]
New Q values:  [ 2193.94102357  1678.27028364 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  44
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1744.9493384  -1609.84182322 -8192.20126966  1528.28134122]
------
Step:17, Action:North
State  288
Old Q Values:  [ 1744.9493384  -1609.84182322 -8192.20126966  1528.28134122]
New Q values:  [ 1355.56204243 -1609.84182322 -8192.20126966  1528.28134122]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2193.94102357  1678.27028364 -1925.08326713 -1455.65174173]
------
Step:18, Action:North
State  208
Old Q Values:  [ 2193.94102357  1678.27028364 -1925.08326713 -1455.65174173]
New Q values:  [12024.73989564  1678.27028364 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  42
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068 19747.37279955 -8652.84       37159.2116207 ]
------
Step:19, Action:South
State  130
Old Q Values:  [36041.91667283 63746.27861698  -180.00807518 67537.01885289]
New Q values:  [36041.91667283 29105.33341548  -180.00807518 67537.01885289]
Reward: -1  Episode Reward:  41
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12024.73989564  1678.27028364 -1925.08326713 -1455.65174173]
------
Step:20, Action:North
State  208
Old Q Values:  [12024.73989564  1678.27028364 -1925.08326713 -1455.65174173]
New Q values:  [25070.40161412  1678.27028364 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  40
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283 29105.33341548  -180.00807518 67537.01885289]
------
Step:21, Action:West
State  128
Old Q Values:  [ 8775.70846068 19747.37279955 -8652.84       37159.2116207 ]
New Q values:  [ 8775.70846068 19747.37279955 -8652.84       15717.21796752]
Reward: -1  Episode Reward:  39
xxxxx
x.agx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.          2847.11106414 -5999.38454759     0.        ]
------
Step:22, Action:South
State  112
Old Q Values:  [    0.          6743.83055328  6789.02994987 99845.1452544 ]
New Q values:  [    0.          3629.92591876  6789.02994987 99845.1452544 ]
Reward: -1  Episode Reward:  38
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.34528215e+03 3.10997899e+03 2.91043938e+03]
------
Step:23, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.34528215e+03 3.10997899e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.34528215e+03 8.76451208e+03 2.91043938e+03]
Reward: -1  Episode Reward:  37
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25070.40161412  1678.27028364 -1925.08326713 -1455.65174173]
------
Step:24, Action:North
State  208
Old Q Values:  [25070.40161412  1678.27028364 -1925.08326713 -1455.65174173]
New Q values:  [30288.66630152  1678.27028364 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  36
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283 29105.33341548  -180.00807518 67537.01885289]
------
Step:25, Action:West
State  130
Old Q Values:  [36041.91667283 29105.33341548  -180.00807518 67537.01885289]
New Q values:  [36041.91667283 29105.33341548  -180.00807518 66782.89554618]
Reward: -1  Episode Reward:  35
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:26, Action:West
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.32322009e+05]
New Q values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.32644343e+05]
Reward: 100009  Episode Reward:  100044
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1355.56204243 -1609.84182322 -8192.20126966  1528.28134122]
------
Step:1, Action:West
State  288
Old Q Values:  [ 1355.56204243 -1609.84182322 -8192.20126966  1528.28134122]
New Q values:  [ 1355.56204243 -1609.84182322 -8192.20126966  3200.43324184]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2881.14103877  8612.40235119]
------
Step:2, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  2141.63811263 12778.56242824]
New Q values:  [  37.74111519 -168.92307549 2141.63811263 5787.99579245]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2237.23607051    26.73544252 -2339.75315766   -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [ 2237.23607051    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1541.05798982    26.73544252 -2339.75315766   -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1358.28270375  242.76375976 2135.87853873  -30.99112081]
------
Step:4, Action:East
State  183
Old Q Values:  [ 478.96073712  657.75889141 7458.50205899    0.        ]
New Q values:  [ 478.96073712  657.75889141 4183.16123993    0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  14.86214194 3981.2013878   549.89931413 1915.70494401]
------
Step:5, Action:South
State  198
Old Q Values:  [-2.78872080e-01 -6.80462050e+03  1.26155064e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -7.91881263e+03  1.26155064e+04  0.00000000e+00]
Reward: -10001  Episode Reward:  -9965
xxxxx
x ..x
x  .x
x g x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.21753055e+03 -3.22965309e-01  5.79786671e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.21753055e+03 -3.22965309e-01  5.79786671e+02]
New Q values:  [ 3.67167427e+02  1.17258022e+03 -3.22965309e-01  5.79786671e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1355.56211439   372.98523174 -8489.43729461  2267.22667625]
------
Step:2, Action:North
State  208
Old Q Values:  [30288.66630152  1678.27028364 -1925.08326713 -1455.65174173]
New Q values:  [12466.64058724  1678.27028364 -1925.08326713 -1455.65174173]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.17258022e+03 -3.22965309e-01  5.79786671e+02]
------
Step:3, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.17258022e+03 -3.22965309e-01  5.79786671e+02]
New Q values:  [ 3.67167427e+02  1.17258022e+03 -3.22965309e-01  8.73878122e+02]
Reward: 9  Episode Reward:  17
xxxxx
x.a x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2121.87817861   453.58409043]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   366.14524415    69.13064323]
New Q values:  [ -281.736      -3455.78276043   497.63216429    69.13064323]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.17258022e+03 -3.22965309e-01  8.73878122e+02]
------
Step:5, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.17258022e+03 -3.22965309e-01  8.73878122e+02]
New Q values:  [ 3.67167427e+02  2.46152267e+03 -3.22965309e-01  8.73878122e+02]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6.64363528e+03 1.45570574e+03 2.24233123e+03 3.52184257e+00]
------
Step:6, Action:North
State  210
Old Q Values:  [6.64363528e+03 1.45570574e+03 2.24233123e+03 3.52184257e+00]
New Q values:  [3395.31091153 1455.70574015 2242.33123263    3.52184257]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  2.46152267e+03 -3.22965309e-01  8.73878122e+02]
------
Step:7, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  2.46152267e+03 -3.22965309e-01  8.73878122e+02]
New Q values:  [ 3.67167427e+02  2.00260234e+03 -3.22965309e-01  8.73878122e+02]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x .ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3395.31091153 1455.70574015 2242.33123263    3.52184257]
------
Step:8, Action:North
State  210
Old Q Values:  [3395.31091153 1455.70574015 2242.33123263    3.52184257]
New Q values:  [1958.30506722 1455.70574015 2242.33123263    3.52184257]
Reward: -1  Episode Reward:  12
xxxxx
x. ax
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  2.00260234e+03 -3.22965309e-01  8.73878122e+02]
------
Step:9, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  2.00260234e+03 -3.22965309e-01  8.73878122e+02]
New Q values:  [ 3.67167427e+02  1.47314031e+03 -3.22965309e-01  8.73878122e+02]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
x .ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1958.30506722 1455.70574015 2242.33123263    3.52184257]
------
Step:10, Action:East
State  216
Old Q Values:  [ 1355.56211439   372.98523174 -8489.43729461  2267.22667625]
New Q values:  [ 1355.56211439   372.98523174 -8896.20691497  2267.22667625]
Reward: -10301  Episode Reward:  -10290
xxxxx
x.  x
x .gx
x...x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1958.30506722 1455.70574015 2242.33123263    3.52184257]
------
Step:1, Action:East
State  208
Old Q Values:  [12466.64058724  1678.27028364 -1925.08326713 -1455.65174173]
New Q values:  [12466.64058724  1678.27028364  2789.35886932 -1455.65174173]
Reward: -301  Episode Reward:  -301
xxxxx
x...x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12466.64058724  1678.27028364  2789.35886932 -1455.65174173]
------
Step:2, Action:North
State  208
Old Q Values:  [12466.64058724  1678.27028364  2789.35886932 -1455.65174173]
New Q values:  [ 5433.99832687  1678.27028364  2789.35886932 -1455.65174173]
Reward: 9  Episode Reward:  -292
xxxxx
x..ax
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.47314031e+03 -3.22965309e-01  8.73878122e+02]
------
Step:3, Action:South
State  136
Old Q Values:  [-1043.6707427   3511.85541575 -2383.80019164   849.1343834 ]
New Q values:  [-1043.6707427   3034.34166436 -2383.80019164   849.1343834 ]
Reward: -1  Episode Reward:  -293
xxxxx
x.g x
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5433.99832687  1678.27028364  2789.35886932 -1455.65174173]
------
Step:4, Action:North
State  208
Old Q Values:  [ 5433.99832687  1678.27028364  2789.35886932 -1455.65174173]
New Q values:  [ 3083.30183006  1678.27028364  2789.35886932 -1455.65174173]
Reward: -1  Episode Reward:  -294
xxxxx
xg.ax
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   3034.34166436 -2383.80019164   849.1343834 ]
------
Step:5, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.47314031e+03 -3.22965309e-01  8.73878122e+02]
New Q values:  [ 3.67167427e+02  1.51364667e+03 -3.22965309e-01  8.73878122e+02]
Reward: -1  Episode Reward:  -295
xxxxx
x.. x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3083.30183006  1678.27028364  2789.35886932 -1455.65174173]
------
Step:6, Action:North
State  208
Old Q Values:  [ 3083.30183006  1678.27028364  2789.35886932 -1455.65174173]
New Q values:  [ 2143.02323133  1678.27028364  2789.35886932 -1455.65174173]
Reward: -1  Episode Reward:  -296
xxxxx
xg.ax
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   3034.34166436 -2383.80019164   849.1343834 ]
------
Step:7, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.51364667e+03 -3.22965309e-01  8.73878122e+02]
New Q values:  [ 3.67167427e+02  1.44166633e+03 -3.22965309e-01  8.73878122e+02]
Reward: -1  Episode Reward:  -297
xxxxx
x.. x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2143.02323133  1678.27028364  2789.35886932 -1455.65174173]
------
Step:8, Action:East
State  210
Old Q Values:  [1958.30506722 1455.70574015 2242.33123263    3.52184257]
New Q values:  [1958.30506722 1455.70574015 1389.03186284    3.52184257]
Reward: -301  Episode Reward:  -598
xxxxx
x.. x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1958.30506722 1455.70574015 1389.03186284    3.52184257]
------
Step:9, Action:North
State  208
Old Q Values:  [ 2143.02323133  1678.27028364  2789.35886932 -1455.65174173]
New Q values:  [ 1289.10919137  1678.27028364  2789.35886932 -1455.65174173]
Reward: -1  Episode Reward:  -599
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.44166633e+03 -3.22965309e-01  8.73878122e+02]
------
Step:10, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.44166633e+03 -3.22965309e-01  8.73878122e+02]
New Q values:  [ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  8.73878122e+02]
Reward: -1  Episode Reward:  -600
xxxxx
x.. x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1289.10919137  1678.27028364  2789.35886932 -1455.65174173]
------
Step:11, Action:East
State  208
Old Q Values:  [ 1289.10919137  1678.27028364  2789.35886932 -1455.65174173]
New Q values:  [ 1289.10919137  1678.27028364 -4228.04879148 -1455.65174173]
Reward: -10301  Episode Reward:  -10901
xxxxx
x.. x
x..gx
x ..x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.34528215e+03 8.76451208e+03 2.91043938e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [  62.8218634  9575.751294   1385.53768644  408.67479662]
New Q values:  [  62.8218634  9575.751294   1239.78307745  408.67479662]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1355.56211439   372.98523174 -8896.20691497  2267.22667625]
------
Step:2, Action:West
State  208
Old Q Values:  [ 1289.10919137  1678.27028364 -4228.04879148 -1455.65174173]
New Q values:  [ 1289.10919137  1678.27028364 -4228.04879148  2046.49292756]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.34528215e+03 8.76451208e+03 2.91043938e+03]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  1.03275449e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01 -3.71565138e+03  4.71790947e+03  1.20371620e+03]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1958.30506722 1455.70574015 1389.03186284    3.52184257]
------
Step:4, Action:North
State  210
Old Q Values:  [1958.30506722 1455.70574015 1389.03186284    3.52184257]
New Q values:  [2.08235907e+04 1.45570574e+03 1.38903186e+03 3.52184257e+00]
Reward: 9  Episode Reward:  16
xxxxx
x..ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283 29105.33341548  -180.00807518 66782.89554618]
------
Step:5, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  8.73878122e+02]
New Q values:  [ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  4.01236393e+04]
Reward: 9  Episode Reward:  25
xxxxx
x.a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   497.63216429    69.13064323]
New Q values:  [ -281.736      -3455.78276043   497.63216429   324.10651591]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  970.18086207   69.96901366 -252.78192178]
------
Step:7, Action:South
State  107
Old Q Values:  [-252.35169558  970.18086207   69.96901366 -252.78192178]
New Q values:  [-252.35169558  537.81750349   69.96901366 -252.78192178]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[135.20477233   0.         501.15052887   0.        ]
------
Step:8, Action:East
State  187
Old Q Values:  [135.20477233   0.         501.15052887   0.        ]
New Q values:  [135.20477233   0.         589.29977683   0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458  1298.13188427     0.        ]
------
Step:9, Action:East
State  203
Old Q Values:  [  3.60604218 705.82716573 998.99146402   0.        ]
New Q values:  [   3.60604218  705.82716573 1909.98438219    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[5036.62598862  848.96225083    0.          429.03841886]
------
Step:10, Action:North
State  216
Old Q Values:  [ 1355.56211439   372.98523174 -8896.20691497  2267.22667625]
New Q values:  [12578.7166219    372.98523174 -8896.20691497  2267.22667625]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  4.01236393e+04]
------
Step:11, Action:West
State  136
Old Q Values:  [-1043.6707427   3034.34166436 -2383.80019164   849.1343834 ]
New Q values:  [-1043.6707427   3034.34166436 -2383.80019164   432.36795615]
Reward: -1  Episode Reward:  29
xxxxx
x agx
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:12, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2054.33521832  -880.32621716]
New Q values:  [-9594.56523706 -8069.05606225  2054.33521832  -208.89233445]
Reward: -1  Episode Reward:  28
xxxxx
xag x
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  479.46050804   -8.57207238 -180.6       ]
------
Step:13, Action:South
State  109
Old Q Values:  [-241.10880094  479.46050804   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  862.15027873   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  27
xxxxx
x  gx
xa  x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  881.83651799 2236.55358506  154.04646645]
------
Step:14, Action:East
State  189
Old Q Values:  [   9.84673294  881.83651799 2236.55358506  154.04646645]
New Q values:  [   9.84673294  881.83651799 3766.74682222  154.04646645]
Reward: -1  Episode Reward:  26
xxxxx
x g x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  9575.751294   1239.78307745  408.67479662]
------
Step:15, Action:South
State  200
Old Q Values:  [  62.8218634  9575.751294   1239.78307745  408.67479662]
New Q values:  [  62.8218634  6419.42122296 1239.78307745  408.67479662]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2881.14103877  8612.40235119]
------
Step:16, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2678.78524487  1339.52931381]
New Q values:  [   16.82637525 -5807.06396197  2678.78524487  2666.51735245]
Reward: 9  Episode Reward:  44
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2278.98816136 -2735.46306511  7084.35208975 -2601.74710518]
------
Step:17, Action:East
State  260
Old Q Values:  [ 2278.98816136 -2735.46306511  7084.35208975 -2601.74710518]
New Q values:  [ 2278.98816136 -2735.46306511  3636.77640936 -2601.74710518]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2678.78524487  2666.51735245]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2881.14103877  8612.40235119]
New Q values:  [-2527.46239811 -8521.23367799 62117.98638806  8612.40235119]
Reward: 100009  Episode Reward:  100052
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1289.10919137  1678.27028364 -4228.04879148  2046.49292756]
------
Step:1, Action:West
State  208
Old Q Values:  [ 1289.10919137  1678.27028364 -4228.04879148  2046.49292756]
New Q values:  [ 1289.10919137  1678.27028364 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3154.20409145   438.52768459  1099.96026581]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831  3154.20409145   438.52768459  1099.96026581]
New Q values:  [-5922.26708831  3003.48037431   438.52768459  1099.96026581]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x.  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2141.63811263 5787.99579245]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 62117.98638806  8612.40235119]
New Q values:  [-2527.46239811 -8521.23367799 62117.98638806 11823.72014264]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x.  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[27911.19734055  2256.66526474  4520.89517899  1875.31501677]
------
Step:4, Action:North
State  260
Old Q Values:  [ 2278.98816136 -2735.46306511  3636.77640936 -2601.74710518]
New Q values:  [ 1512.58008571 -2735.46306511  3636.77640936 -2601.74710518]
Reward: 9  Episode Reward:  36
xxxxx
xg. x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[   99.04174147  1985.28273723  1116.79388132 -4966.32149798]
------
Step:5, Action:South
State  176
Old Q Values:  [76485.61294353  1327.79507613 45408.88473875     0.        ]
New Q values:  [76485.61294353  1621.55095326 45408.88473875     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x.. x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1512.58008571 -2735.46306511  3636.77640936 -2601.74710518]
------
Step:6, Action:East
State  260
Old Q Values:  [ 1512.58008571 -2735.46306511  3636.77640936 -2601.74710518]
New Q values:  [ 1512.58008571 -2735.46306511 20089.50648016 -2601.74710518]
Reward: -1  Episode Reward:  34
xxxxx
xg. x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 62117.98638806 11823.72014264]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 62117.98638806 11823.72014264]
New Q values:  [-2527.46239811 -8521.23367799 25812.72452778 11823.72014264]
Reward: 9  Episode Reward:  43
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1355.56204243 -1609.84182322 -8192.20126966  3200.43324184]
------
Step:8, Action:West
State  288
Old Q Values:  [ 1355.56204243 -1609.84182322 -8192.20126966  3200.43324184]
New Q values:  [ 1355.56204243 -1609.84182322 -8192.20126966  9023.39065507]
Reward: -1  Episode Reward:  42
xxxxx
x.. x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 25812.72452778 11823.72014264]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 25812.72452778 11823.72014264]
New Q values:  [-2527.46239811 -8521.23367799 13031.50700763 11823.72014264]
Reward: -1  Episode Reward:  41
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1355.56204243 -1609.84182322 -8192.20126966  9023.39065507]
------
Step:10, Action:West
State  288
Old Q Values:  [ 1355.56204243 -1609.84182322 -8192.20126966  9023.39065507]
New Q values:  [ 1355.56204243 -1609.84182322 -8192.20126966  7518.20836432]
Reward: -1  Episode Reward:  40
xxxxx
x.. x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 13031.50700763 11823.72014264]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 13031.50700763 11823.72014264]
New Q values:  [-2527.46239811 -8521.23367799  7467.46531235 11823.72014264]
Reward: -1  Episode Reward:  39
xxxxx
x.. x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1355.56204243 -1609.84182322 -8192.20126966  7518.20836432]
------
Step:12, Action:West
State  288
Old Q Values:  [ 1355.56204243 -1609.84182322 -8192.20126966  7518.20836432]
New Q values:  [ 1355.56204243 -1609.84182322 -8192.20126966  6553.79938852]
Reward: -1  Episode Reward:  38
xxxxx
x.. x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7467.46531235 11823.72014264]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7467.46531235 11823.72014264]
New Q values:  [-2527.46239811 -8521.23367799  4952.5259415  11823.72014264]
Reward: -1  Episode Reward:  37
xxxxx
x.. x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1355.56204243 -1609.84182322 -8192.20126966  6553.79938852]
------
Step:14, Action:West
State  288
Old Q Values:  [ 1355.56204243 -1609.84182322 -8192.20126966  6553.79938852]
New Q values:  [ 1355.56204243 -1609.84182322 -8192.20126966  6168.0357982 ]
Reward: -1  Episode Reward:  36
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4952.5259415  11823.72014264]
------
Step:15, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4952.5259415  11823.72014264]
New Q values:  [-2527.46239811 -8521.23367799  4952.5259415  13102.24725922]
Reward: -1  Episode Reward:  35
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[27911.19734055  2256.66526474  4520.89517899  1875.31501677]
------
Step:16, Action:North
State  257
Old Q Values:  [27911.19734055  2256.66526474  4520.89517899  1875.31501677]
New Q values:  [34682.22457972  2256.66526474  4520.89517899  1875.31501677]
Reward: -1  Episode Reward:  34
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 22487.97851491  5161.65126242     0.        ]
------
Step:17, Action:North
State  181
Old Q Values:  [1358.28270375  242.76375976 2135.87853873  -30.99112081]
New Q values:  [1879.76962961  242.76375976 2135.87853873  -30.99112081]
Reward: 9  Episode Reward:  43
xxxxx
xa. x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        4436.85516038    5.4           0.        ]
------
Step:18, Action:South
State  101
Old Q Values:  [  0.         383.92064237   0.           0.        ]
New Q values:  [  0.         793.73181857   0.           0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1879.76962961  242.76375976 2135.87853873  -30.99112081]
------
Step:19, Action:East
State  177
Old Q Values:  [78394.48547832 22487.97851491  5161.65126242     0.        ]
New Q values:  [78394.48547832 22487.97851491  4693.41412922     0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.34528215e+03 8.76451208e+03 2.91043938e+03]
------
Step:20, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.34528215e+03 8.76451208e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.34528215e+03 4.03628235e+03 2.91043938e+03]
Reward: -1  Episode Reward:  40
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1289.10919137  1678.27028364 -4228.04879148  1770.25839846]
------
Step:21, Action:South
State  210
Old Q Values:  [2.08235907e+04 1.45570574e+03 1.38903186e+03 3.52184257e+00]
New Q values:  [2.08235907e+04 2.43209304e+03 1.38903186e+03 3.52184257e+00]
Reward: -1  Episode Reward:  39
xxxxx
x . x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1355.56204243 -1609.84182322 -8192.20126966  6168.0357982 ]
------
Step:22, Action:North
State  288
Old Q Values:  [ 1355.56204243 -1609.84182322 -8192.20126966  6168.0357982 ]
New Q values:  [ 6788.7020242  -1609.84182322 -8192.20126966  6168.0357982 ]
Reward: -1  Episode Reward:  38
xxxxx
x . x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.08235907e+04 2.43209304e+03 1.38903186e+03 3.52184257e+00]
------
Step:23, Action:North
State  210
Old Q Values:  [2.08235907e+04 2.43209304e+03 1.38903186e+03 3.52184257e+00]
New Q values:  [2.83637049e+04 2.43209304e+03 1.38903186e+03 3.52184257e+00]
Reward: -1  Episode Reward:  37
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36041.91667283 29105.33341548  -180.00807518 66782.89554618]
------
Step:24, Action:West
State  130
Old Q Values:  [36041.91667283 29105.33341548  -180.00807518 66782.89554618]
New Q values:  [ 36041.91667283  29105.33341548   -180.00807518 126487.24622349]
Reward: 100009  Episode Reward:  100046
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1541.05798982    26.73544252 -2339.75315766   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [ 1541.05798982    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1262.58675755    26.73544252 -2339.75315766   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1879.76962961  242.76375976 2135.87853873  -30.99112081]
------
Step:2, Action:North
State  183
Old Q Values:  [ 478.96073712  657.75889141 4183.16123993    0.        ]
New Q values:  [1528.04084296  657.75889141 4183.16123993    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xa..x
x . x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[-180.6        4436.85516038    5.4           0.        ]
------
Step:3, Action:South
State  110
Old Q Values:  [ -239.29051573 -6173.62236507   399.93689112  -180.6       ]
New Q values:  [-239.29051573 -979.06556491  399.93689112 -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xa. x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:4, Action:East
State  183
Old Q Values:  [1528.04084296  657.75889141 4183.16123993    0.        ]
New Q values:  [1528.04084296  657.75889141 3094.03733794    0.        ]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -3.71565138e+03  4.71790947e+03  1.20371620e+03]
------
Step:5, Action:East
State  195
Old Q Values:  [   38.85388605  2639.47551855 23527.39531294  1101.59744825]
New Q values:  [   38.85388605  2639.47551855 17919.46960722  1101.59744825]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.83637049e+04 2.43209304e+03 1.38903186e+03 3.52184257e+00]
------
Step:6, Action:North
State  210
Old Q Values:  [2.83637049e+04 2.43209304e+03 1.38903186e+03 3.52184257e+00]
New Q values:  [4.92970558e+04 2.43209304e+03 1.38903186e+03 3.52184257e+00]
Reward: 9  Episode Reward:  34
xxxxx
x .ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283  29105.33341548   -180.00807518 126487.24622349]
------
Step:7, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  4.01236393e+04]
New Q values:  [ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  1.62041454e+04]
Reward: 9  Episode Reward:  43
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   497.63216429   324.10651591]
------
Step:8, Action:East
State  126
Old Q Values:  [   0.          331.64678262  214.99793907 1134.18629274]
New Q values:  [   0.          331.64678262 4946.64278087 1134.18629274]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  1.62041454e+04]
------
Step:9, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  1.62041454e+04]
New Q values:  [ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  6.63034779e+03]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   497.63216429   324.10651591]
------
Step:10, Action:East
State  126
Old Q Values:  [   0.          331.64678262 4946.64278087 1134.18629274]
New Q values:  [   0.          331.64678262 3967.16144923 1134.18629274]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  6.63034779e+03]
------
Step:11, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  6.63034779e+03]
New Q values:  [ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  2.80082877e+03]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   497.63216429   324.10651591]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   497.63216429   324.10651591]
New Q values:  [ -281.736      -3455.78276043  1038.70149525   324.10651591]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  2.80082877e+03]
------
Step:13, Action:West
State  136
Old Q Values:  [-1043.6707427   3034.34166436 -2383.80019164   432.36795615]
New Q values:  [-1043.6707427   3034.34166436 -2383.80019164   788.64774796]
Reward: -1  Episode Reward:  37
xxxxx
xga x
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2054.33521832  -208.89233445]
------
Step:14, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2054.33521832  -208.89233445]
New Q values:  [-9594.56523706 -8069.05606225  1731.43658664  -208.89233445]
Reward: -1  Episode Reward:  36
xxxxx
x gax
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   3034.34166436 -2383.80019164   788.64774796]
------
Step:15, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.41287419e+03 -3.22965309e-01  2.80082877e+03]
New Q values:  [ 3.67167427e+02  4.33816466e+03 -3.22965309e-01  2.80082877e+03]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[12578.7166219    372.98523174 -8896.20691497  2267.22667625]
------
Step:16, Action:North
State  216
Old Q Values:  [12578.7166219    372.98523174 -8896.20691497  2267.22667625]
New Q values:  [ 6332.33604784   372.98523174 -8896.20691497  2267.22667625]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  4.33816466e+03 -3.22965309e-01  2.80082877e+03]
------
Step:17, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  4.33816466e+03 -3.22965309e-01  2.80082877e+03]
New Q values:  [ 3.67167427e+02  3.63436668e+03 -3.22965309e-01  2.80082877e+03]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 6332.33604784   372.98523174 -8896.20691497  2267.22667625]
------
Step:18, Action:North
State  218
Old Q Values:  [5036.62598862  848.96225083    0.          429.03841886]
New Q values:  [3104.36039939  848.96225083    0.          429.03841886]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  3.63436668e+03 -3.22965309e-01  2.80082877e+03]
------
Step:19, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  3.63436668e+03 -3.22965309e-01  2.80082877e+03]
New Q values:  [ 3.67167427e+02  2.38445479e+03 -3.22965309e-01  2.80082877e+03]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[3104.36039939  848.96225083    0.          429.03841886]
------
Step:20, Action:North
State  218
Old Q Values:  [3104.36039939  848.96225083    0.          429.03841886]
New Q values:  [2081.39278929  848.96225083    0.          429.03841886]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  2.38445479e+03 -3.22965309e-01  2.80082877e+03]
------
Step:21, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  2.38445479e+03 -3.22965309e-01  2.80082877e+03]
New Q values:  [ 3.67167427e+02  2.38445479e+03 -3.22965309e-01  1.43134195e+03]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  1038.70149525   324.10651591]
------
Step:22, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1731.43658664  -208.89233445]
New Q values:  [-9594.56523706 -8069.05606225  1602.27713396  -208.89233445]
Reward: -1  Episode Reward:  28
xxxxx
x gax
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   3034.34166436 -2383.80019164   788.64774796]
------
Step:23, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  2.38445479e+03 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  2.85288273e+03 -3.22965309e-01  1.43134195e+03]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 6332.33604784   372.98523174 -8896.20691497  2267.22667625]
------
Step:24, Action:North
State  216
Old Q Values:  [ 6332.33604784   372.98523174 -8896.20691497  2267.22667625]
New Q values:  [ 3388.19923845   372.98523174 -8896.20691497  2267.22667625]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  2.85288273e+03 -3.22965309e-01  1.43134195e+03]
------
Step:25, Action:West
State  136
Old Q Values:  [-1043.6707427   3034.34166436 -2383.80019164   788.64774796]
New Q values:  [-1043.6707427   3034.34166436 -2383.80019164   408.17330197]
Reward: -1  Episode Reward:  25
xxxxx
x agx
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:26, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1602.27713396  -208.89233445]
New Q values:  [-9594.56523706 -8069.05606225  1602.27713396   174.48814984]
Reward: -1  Episode Reward:  24
xxxxx
xag x
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  862.15027873   -8.57207238 -180.6       ]
------
Step:27, Action:South
State  111
Old Q Values:  [-177.44732869 3080.78472487  353.8832415  -120.29354603]
New Q values:  [-177.44732869 2361.73793661  353.8832415  -120.29354603]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  881.83651799 3766.74682222  154.04646645]
------
Step:28, Action:South
State  188
Old Q Values:  [-6523.78898263   386.37078626  1223.43046172     0.        ]
New Q values:  [-6523.78898263  6180.80025855  1223.43046172     0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x   x
xg  x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1512.58008571 -2735.46306511 20089.50648016 -2601.74710518]
------
Step:29, Action:East
State  260
Old Q Values:  [ 1512.58008571 -2735.46306511 20089.50648016 -2601.74710518]
New Q values:  [ 1512.58008571 -2735.46306511  8844.83816553 -2601.74710518]
Reward: 9  Episode Reward:  31
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2678.78524487  2666.51735245]
------
Step:30, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4952.5259415  13102.24725922]
New Q values:  [-2527.46239811 -8521.23367799 64023.02098386 13102.24725922]
Reward: 100009  Episode Reward:  100040
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1289.10919137  1678.27028364 -4228.04879148  1770.25839846]
------
Step:1, Action:West
State  216
Old Q Values:  [ 3388.19923845   372.98523174 -8896.20691497  2267.22667625]
New Q values:  [ 3388.19923845   372.98523174 -8896.20691497  2123.17537606]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.34528215e+03 4.03628235e+03 2.91043938e+03]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.34528215e+03 4.03628235e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.34528215e+03 2.63037271e+03 2.91043938e+03]
Reward: -1  Episode Reward:  8
xxxxx
xg. x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3388.19923845   372.98523174 -8896.20691497  2123.17537606]
------
Step:3, Action:North
State  216
Old Q Values:  [ 3388.19923845   372.98523174 -8896.20691497  2123.17537606]
New Q values:  [ 2210.54451469   372.98523174 -8896.20691497  2123.17537606]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  2.85288273e+03 -3.22965309e-01  1.43134195e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  2.85288273e+03 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  1.59296698e+04 -3.22965309e-01  1.43134195e+03]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x. ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.92970558e+04 2.43209304e+03 1.38903186e+03 3.52184257e+00]
------
Step:5, Action:North
State  216
Old Q Values:  [ 2210.54451469   372.98523174 -8896.20691497  2123.17537606]
New Q values:  [ 5662.51875948   372.98523174 -8896.20691497  2123.17537606]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.59296698e+04 -3.22965309e-01  1.43134195e+03]
------
Step:6, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.59296698e+04 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  6.90234546e+03 -3.22965309e-01  1.43134195e+03]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1289.10919137  1678.27028364 -4228.04879148  1770.25839846]
------
Step:7, Action:South
State  208
Old Q Values:  [ 1289.10919137  1678.27028364 -4228.04879148  1770.25839846]
New Q values:  [ 1289.10919137  2713.31872072 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  13
xxxxx
x.g x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6788.7020242  -1609.84182322 -8192.20126966  6168.0357982 ]
------
Step:8, Action:North
State  288
Old Q Values:  [ 6788.7020242  -1609.84182322 -8192.20126966  6168.0357982 ]
New Q values:  [ 3528.87642589 -1609.84182322 -8192.20126966  6168.0357982 ]
Reward: -1  Episode Reward:  12
xxxxx
xg. x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1289.10919137  2713.31872072 -4228.04879148  1770.25839846]
------
Step:9, Action:South
State  208
Old Q Values:  [ 1289.10919137  2713.31872072 -4228.04879148  1770.25839846]
New Q values:  [ 1289.10919137  2935.13822775 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  11
xxxxx
x.g x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3528.87642589 -1609.84182322 -8192.20126966  6168.0357982 ]
------
Step:10, Action:West
State  288
Old Q Values:  [ 3528.87642589 -1609.84182322 -8192.20126966  6168.0357982 ]
New Q values:  [ 3528.87642589 -1609.84182322 -8192.20126966 21679.52061444]
Reward: 9  Episode Reward:  20
xxxxx
xg. x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 64023.02098386 13102.24725922]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 64023.02098386 13102.24725922]
New Q values:  [-2527.46239811 -8521.23367799 32112.46457787 13102.24725922]
Reward: -1  Episode Reward:  19
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3528.87642589 -1609.84182322 -8192.20126966 21679.52061444]
------
Step:12, Action:West
State  288
Old Q Values:  [ 3528.87642589 -1609.84182322 -8192.20126966 21679.52061444]
New Q values:  [ 3528.87642589 -1609.84182322 -8192.20126966 10407.60698351]
Reward: -1  Episode Reward:  18
xxxxx
x..gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2141.63811263 5787.99579245]
------
Step:13, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2141.63811263 5787.99579245]
New Q values:  [   37.74111519  -168.92307549  2141.63811263 12725.2656909 ]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[34682.22457972  2256.66526474  4520.89517899  1875.31501677]
------
Step:14, Action:North
State  257
Old Q Values:  [34682.22457972  2256.66526474  4520.89517899  1875.31501677]
New Q values:  [37396.63547538  2256.66526474  4520.89517899  1875.31501677]
Reward: 9  Episode Reward:  36
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 22487.97851491  4693.41412922     0.        ]
------
Step:15, Action:North
State  181
Old Q Values:  [1879.76962961  242.76375976 2135.87853873  -30.99112081]
New Q values:  [2088.36439996  242.76375976 2135.87853873  -30.99112081]
Reward: 9  Episode Reward:  45
xxxxx
xa. x
x  gx
x   x
xxxxx
Step:16, Action:North
State  103
Old Q Values:  [-180.6        4436.85516038    5.4           0.        ]
New Q values:  [1078.21654811 4436.85516038    5.4           0.        ]
Reward: -301  Episode Reward:  -256
xxxxx
xa. x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[1078.21654811 4436.85516038    5.4           0.        ]
------
Step:17, Action:South
State  97
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [    0.        23517.7456435     0.            0.       ]
Reward: -1  Episode Reward:  -257
xxxxx
x g x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 22487.97851491  4693.41412922     0.        ]
------
Step:18, Action:North
State  181
Old Q Values:  [2088.36439996  242.76375976 2135.87853873  -30.99112081]
New Q values:  [1072.86530555  242.76375976 2135.87853873  -30.99112081]
Reward: -1  Episode Reward:  -258
xxxxx
xa.gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[  0.         793.73181857   0.           0.        ]
------
Step:19, Action:South
State  97
Old Q Values:  [    0.        23517.7456435     0.            0.       ]
New Q values:  [    0.        32924.8439009     0.            0.       ]
Reward: -1  Episode Reward:  -259
xxxxx
x g x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 22487.97851491  4693.41412922     0.        ]
------
Step:20, Action:North
State  181
Old Q Values:  [1072.86530555  242.76375976 2135.87853873  -30.99112081]
New Q values:  [1759.60267033  242.76375976 2135.87853873  -30.99112081]
Reward: -1  Episode Reward:  -260
xxxxx
xa. x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[1078.21654811 4436.85516038    5.4           0.        ]
------
Step:21, Action:South
State  97
Old Q Values:  [    0.        32924.8439009     0.            0.       ]
New Q values:  [    0.         36687.68320385     0.             0.        ]
Reward: -1  Episode Reward:  -261
xxxxx
x g x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 22487.97851491  4693.41412922     0.        ]
------
Step:22, Action:North
State  181
Old Q Values:  [1759.60267033  242.76375976 2135.87853873  -30.99112081]
New Q values:  [2034.29761625  242.76375976 2135.87853873  -30.99112081]
Reward: -1  Episode Reward:  -262
xxxxx
xa. x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[1078.21654811 4436.85516038    5.4           0.        ]
------
Step:23, Action:South
State  103
Old Q Values:  [1078.21654811 4436.85516038    5.4           0.        ]
New Q values:  [1078.21654811 2414.90562577    5.4           0.        ]
Reward: -1  Episode Reward:  -263
xxxxx
x . x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2034.29761625  242.76375976 2135.87853873  -30.99112081]
------
Step:24, Action:East
State  181
Old Q Values:  [2034.29761625  242.76375976 2135.87853873  -30.99112081]
New Q values:  [2034.29761625  242.76375976 1754.79552779  -30.99112081]
Reward: -1  Episode Reward:  -264
xxxxx
x .gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3003.48037431   438.52768459  1099.96026581]
------
Step:25, Action:South
State  193
Old Q Values:  [-5922.26708831  3003.48037431   438.52768459  1099.96026581]
New Q values:  [-5922.26708831  5018.37185699   438.52768459  1099.96026581]
Reward: -1  Episode Reward:  -265
xxxxx
x . x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   37.74111519  -168.92307549  2141.63811263 12725.2656909 ]
------
Step:26, Action:West
State  273
Old Q Values:  [   37.74111519  -168.92307549  2141.63811263 12725.2656909 ]
New Q values:  [  37.74111519 -168.92307549 2141.63811263 5468.28230362]
Reward: -1  Episode Reward:  -266
xxxxx
x . x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1262.58675755    26.73544252 -2339.75315766   -35.88578819]
------
Step:27, Action:North
State  261
Old Q Values:  [ 1262.58675755    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1114.72398789    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  -267
xxxxx
x . x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2034.29761625  242.76375976 1754.79552779  -30.99112081]
------
Step:28, Action:North
State  181
Old Q Values:  [2034.29761625  242.76375976 1754.79552779  -30.99112081]
New Q values:  [1051.23859207  242.76375976 1754.79552779  -30.99112081]
Reward: -1  Episode Reward:  -268
xxxxx
xa.gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[  0.         793.73181857   0.           0.        ]
------
Step:29, Action:South
State  103
Old Q Values:  [1078.21654811 2414.90562577    5.4           0.        ]
New Q values:  [1078.21654811 1491.80090864    5.4           0.        ]
Reward: -1  Episode Reward:  -269
xxxxx
x . x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1051.23859207  242.76375976 1754.79552779  -30.99112081]
------
Step:30, Action:East
State  181
Old Q Values:  [1051.23859207  242.76375976 1754.79552779  -30.99112081]
New Q values:  [1051.23859207  242.76375976 2206.82976821  -30.99112081]
Reward: -1  Episode Reward:  -270
xxxxx
x .gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5018.37185699   438.52768459  1099.96026581]
------
Step:31, Action:South
State  193
Old Q Values:  [-5922.26708831  5018.37185699   438.52768459  1099.96026581]
New Q values:  [-5922.26708831  3647.23343388   438.52768459  1099.96026581]
Reward: -1  Episode Reward:  -271
xxxxx
x . x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 2141.63811263 5468.28230362]
------
Step:32, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 2141.63811263 5468.28230362]
New Q values:  [  37.74111519 -168.92307549 2141.63811263 2521.13011782]
Reward: -1  Episode Reward:  -272
xxxxx
x . x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1114.72398789    26.73544252 -2339.75315766   -35.88578819]
------
Step:33, Action:North
State  261
Old Q Values:  [ 1114.72398789    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1107.33852562    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  -273
xxxxx
x . x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1051.23859207  242.76375976 2206.82976821  -30.99112081]
------
Step:34, Action:East
State  183
Old Q Values:  [1528.04084296  657.75889141 3094.03733794    0.        ]
New Q values:  [1528.04084296  657.75889141 6612.85581734    0.        ]
Reward: -1  Episode Reward:  -274
xxxxx
x . x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  2639.47551855 17919.46960722  1101.59744825]
------
Step:35, Action:East
State  193
Old Q Values:  [-5922.26708831  3647.23343388   438.52768459  1099.96026581]
New Q values:  [-5922.26708831  3647.23343388 -4944.64745784  1099.96026581]
Reward: -10001  Episode Reward:  -10275
xxxxx
x . x
x  gx
x   x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1107.33852562    26.73544252 -2339.75315766   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [ 1107.33852562    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1110.38434071    26.73544252 -2339.75315766   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1051.23859207  242.76375976 2206.82976821  -30.99112081]
------
Step:2, Action:East
State  181
Old Q Values:  [1051.23859207  242.76375976 2206.82976821  -30.99112081]
New Q values:  [ 1051.23859207   242.76375976 -4244.73627866   -30.99112081]
Reward: -10001  Episode Reward:  -9992
xxxxx
x...x
x g.x
x ..x
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[   99.04174147  1985.28273723  1116.79388132 -4966.32149798]
------
Step:1, Action:South
State  180
Old Q Values:  [   99.04174147  1985.28273723  1116.79388132 -4966.32149798]
New Q values:  [   99.04174147  3452.96454455  1116.79388132 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1512.58008571 -2735.46306511  8844.83816553 -2601.74710518]
------
Step:2, Action:East
State  260
Old Q Values:  [ 1512.58008571 -2735.46306511  8844.83816553 -2601.74710518]
New Q values:  [ 1512.58008571 -2735.46306511  4346.97083967 -2601.74710518]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x ..x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2678.78524487  2666.51735245]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 32112.46457787 13102.24725922]
New Q values:  [-2527.46239811 -8521.23367799 15972.6679262  13102.24725922]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3528.87642589 -1609.84182322 -8192.20126966 10407.60698351]
------
Step:4, Action:West
State  288
Old Q Values:  [ 3528.87642589 -1609.84182322 -8192.20126966 10407.60698351]
New Q values:  [ 3528.87642589 -1609.84182322 -8192.20126966  8954.24317126]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x ..x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 15972.6679262  13102.24725922]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 15972.6679262  13102.24725922]
New Q values:  [-2527.46239811 -8521.23367799  9074.74012186 13102.24725922]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3528.87642589 -1609.84182322 -8192.20126966  8954.24317126]
------
Step:6, Action:West
State  288
Old Q Values:  [ 3528.87642589 -1609.84182322 -8192.20126966  8954.24317126]
New Q values:  [ 3528.87642589 -1609.84182322 -8192.20126966  7511.77144627]
Reward: -1  Episode Reward:  24
xxxxx
xg..x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9074.74012186 13102.24725922]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9074.74012186 13102.24725922]
New Q values:  [-2527.46239811 -8521.23367799  9074.74012186  6544.39015559]
Reward: -1  Episode Reward:  23
xxxxx
xg..x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1512.58008571 -2735.46306511  4346.97083967 -2601.74710518]
------
Step:8, Action:East
State  260
Old Q Values:  [ 1512.58008571 -2735.46306511  4346.97083967 -2601.74710518]
New Q values:  [ 1512.58008571 -2735.46306511  4460.61037243 -2601.74710518]
Reward: -1  Episode Reward:  22
xxxxx
xg..x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9074.74012186  6544.39015559]
------
Step:9, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2678.78524487  2666.51735245]
New Q values:  [   16.82637525 -5807.06396197  3324.44553183  2666.51735245]
Reward: -1  Episode Reward:  21
xxxxx
x g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3528.87642589 -1609.84182322 -8192.20126966  7511.77144627]
------
Step:10, Action:West
State  288
Old Q Values:  [ 3528.87642589 -1609.84182322 -8192.20126966  7511.77144627]
New Q values:  [ 3528.87642589 -1609.84182322 -8192.20126966  5726.53061507]
Reward: -1  Episode Reward:  20
xxxxx
xg..x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9074.74012186  6544.39015559]
------
Step:11, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  3324.44553183  2666.51735245]
New Q values:  [   16.82637525 -5807.06396197  3047.13739725  2666.51735245]
Reward: -1  Episode Reward:  19
xxxxx
x g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3528.87642589 -1609.84182322 -8192.20126966  5726.53061507]
------
Step:12, Action:West
State  288
Old Q Values:  [ 3528.87642589 -1609.84182322 -8192.20126966  5726.53061507]
New Q values:  [ 3528.87642589 -1609.84182322 -8192.20126966  5012.43428258]
Reward: -1  Episode Reward:  18
xxxxx
xg..x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9074.74012186  6544.39015559]
------
Step:13, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  3047.13739725  2666.51735245]
New Q values:  [   16.82637525 -5807.06396197  2721.98524368  2666.51735245]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3528.87642589 -1609.84182322 -8192.20126966  5012.43428258]
------
Step:14, Action:West
State  288
Old Q Values:  [ 3528.87642589 -1609.84182322 -8192.20126966  5012.43428258]
New Q values:  [ 3528.87642589 -1609.84182322 -8192.20126966  4726.79574959]
Reward: -1  Episode Reward:  16
xxxxx
xg..x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9074.74012186  6544.39015559]
------
Step:15, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2721.98524368  2666.51735245]
New Q values:  [   16.82637525 -5807.06396197  2506.23282235  2666.51735245]
Reward: -1  Episode Reward:  15
xxxxx
x g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3528.87642589 -1609.84182322 -8192.20126966  4726.79574959]
------
Step:16, Action:West
State  288
Old Q Values:  [ 3528.87642589 -1609.84182322 -8192.20126966  4726.79574959]
New Q values:  [ 3528.87642589 -1609.84182322 -8192.20126966  2690.07350557]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2506.23282235  2666.51735245]
------
Step:17, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2506.23282235  2666.51735245]
New Q values:  [   16.82637525 -5807.06396197  2506.23282235  1399.12224319]
Reward: -1  Episode Reward:  13
xxxxx
x g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1110.38434071    26.73544252 -2339.75315766   -35.88578819]
------
Step:18, Action:North
State  261
Old Q Values:  [ 1110.38434071    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  758.92531391    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1051.23859207   242.76375976 -4244.73627866   -30.99112081]
------
Step:19, Action:North
State  181
Old Q Values:  [ 1051.23859207   242.76375976 -4244.73627866   -30.99112081]
New Q values:  [  678.54052045   242.76375976 -4244.73627866   -30.99112081]
Reward: -1  Episode Reward:  11
xxxxx
xag.x
x ..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  862.15027873   -8.57207238 -180.6       ]
------
Step:20, Action:South
State  109
Old Q Values:  [-241.10880094  862.15027873   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  547.82226763   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x .gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  678.54052045   242.76375976 -4244.73627866   -30.99112081]
------
Step:21, Action:North
State  181
Old Q Values:  [  678.54052045   242.76375976 -4244.73627866   -30.99112081]
New Q values:  [  718.35648077   242.76375976 -4244.73627866   -30.99112081]
Reward: -1  Episode Reward:  9
xxxxx
xa..x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[1078.21654811 1491.80090864    5.4           0.        ]
------
Step:22, Action:South
State  103
Old Q Values:  [1078.21654811 1491.80090864    5.4           0.        ]
New Q values:  [1078.21654811  811.62730769    5.4           0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  718.35648077   242.76375976 -4244.73627866   -30.99112081]
------
Step:23, Action:North
State  183
Old Q Values:  [1528.04084296  657.75889141 6612.85581734    0.        ]
New Q values:  [ 934.08130162  657.75889141 6612.85581734    0.        ]
Reward: -1  Episode Reward:  7
xxxxx
xa..x
x ..x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[1078.21654811  811.62730769    5.4           0.        ]
------
Step:24, Action:North
State  103
Old Q Values:  [1078.21654811  811.62730769    5.4           0.        ]
New Q values:  [574.15158368 811.62730769   5.4          0.        ]
Reward: -301  Episode Reward:  -294
xxxxx
xa..x
x ..x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[574.15158368 811.62730769   5.4          0.        ]
------
Step:25, Action:South
State  103
Old Q Values:  [574.15158368 811.62730769   5.4          0.        ]
New Q values:  [574.15158368 539.55786731   5.4          0.        ]
Reward: -1  Episode Reward:  -295
xxxxx
x ..x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  718.35648077   242.76375976 -4244.73627866   -30.99112081]
------
Step:26, Action:North
State  183
Old Q Values:  [ 934.08130162  657.75889141 6612.85581734    0.        ]
New Q values:  [ 545.27799575  657.75889141 6612.85581734    0.        ]
Reward: -1  Episode Reward:  -296
xxxxx
xa..x
x ..x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[574.15158368 539.55786731   5.4          0.        ]
------
Step:27, Action:North
State  103
Old Q Values:  [574.15158368 539.55786731   5.4          0.        ]
New Q values:  [221.30610858 539.55786731   5.4          0.        ]
Reward: -301  Episode Reward:  -597
xxxxx
xa..x
x ..x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[221.30610858 539.55786731   5.4          0.        ]
------
Step:28, Action:South
State  103
Old Q Values:  [221.30610858 539.55786731   5.4          0.        ]
New Q values:  [ 221.30610858 2199.07989213    5.4           0.        ]
Reward: -1  Episode Reward:  -598
xxxxx
x ..x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 545.27799575  657.75889141 6612.85581734    0.        ]
------
Step:29, Action:East
State  183
Old Q Values:  [ 545.27799575  657.75889141 6612.85581734    0.        ]
New Q values:  [ 545.27799575  657.75889141 6435.19423583    0.        ]
Reward: 9  Episode Reward:  -589
xxxxx
x ..x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -7.91881263e+03  1.26155064e+04  0.00000000e+00]
------
Step:30, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.34528215e+03 2.63037271e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.34528215e+03 1.93809055e+03 2.91043938e+03]
Reward: 9  Episode Reward:  -580
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1289.10919137  2935.13822775 -4228.04879148  1770.25839846]
------
Step:31, Action:South
State  208
Old Q Values:  [ 1289.10919137  2935.13822775 -4228.04879148  1770.25839846]
New Q values:  [ 1289.10919137  2232.11821887 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  -581
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3528.87642589 -1609.84182322 -8192.20126966  2690.07350557]
------
Step:32, Action:West
State  288
Old Q Values:  [ 3528.87642589 -1609.84182322 -8192.20126966  2690.07350557]
New Q values:  [ 3528.87642589 -1609.84182322 -8192.20126966  3797.85143879]
Reward: -1  Episode Reward:  -582
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9074.74012186  6544.39015559]
------
Step:33, Action:East
State  273
Old Q Values:  [  37.74111519 -168.92307549 2141.63811263 2521.13011782]
New Q values:  [  37.74111519 -168.92307549 1995.41067669 2521.13011782]
Reward: -1  Episode Reward:  -583
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3528.87642589 -1609.84182322 -8192.20126966  3797.85143879]
------
Step:34, Action:West
State  288
Old Q Values:  [ 3528.87642589 -1609.84182322 -8192.20126966  3797.85143879]
New Q values:  [ 3528.87642589 -1609.84182322 -8192.20126966  2274.87961086]
Reward: -1  Episode Reward:  -584
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1995.41067669 2521.13011782]
------
Step:35, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9074.74012186  6544.39015559]
New Q values:  [-2527.46239811 -8521.23367799  9074.74012186  2844.83365641]
Reward: -1  Episode Reward:  -585
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  758.92531391    26.73544252 -2339.75315766   -35.88578819]
------
Step:36, Action:North
State  260
Old Q Values:  [ 1512.58008571 -2735.46306511  4460.61037243 -2601.74710518]
New Q values:  [ 2095.4154154  -2735.46306511  4460.61037243 -2601.74710518]
Reward: -1  Episode Reward:  -586
xxxxx
x ..x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:37, Action:East
State  180
Old Q Values:  [   99.04174147  3452.96454455  1116.79388132 -4966.32149798]
New Q values:  [   99.04174147  3452.96454455  1319.24936659 -4966.32149798]
Reward: -1  Episode Reward:  -587
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.34528215e+03 1.93809055e+03 2.91043938e+03]
------
Step:38, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.34528215e+03 1.93809055e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 3.65993489e+03 1.93809055e+03 2.91043938e+03]
Reward: -1  Episode Reward:  -588
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9074.74012186  2844.83365641]
------
Step:39, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2506.23282235  1399.12224319]
New Q values:  [   16.82637525 -5807.06396197  2060.55605671  1399.12224319]
Reward: -1  Episode Reward:  -589
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3528.87642589 -1609.84182322 -8192.20126966  2274.87961086]
------
Step:40, Action:North
State  288
Old Q Values:  [ 3528.87642589 -1609.84182322 -8192.20126966  2274.87961086]
New Q values:  [ 2080.58603602 -1609.84182322 -8192.20126966  2274.87961086]
Reward: -1  Episode Reward:  -590
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1289.10919137  2232.11821887 -4228.04879148  1770.25839846]
------
Step:41, Action:South
State  210
Old Q Values:  [4.92970558e+04 2.43209304e+03 1.38903186e+03 3.52184257e+00]
New Q values:  [4.92970558e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
Reward: -1  Episode Reward:  -591
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2080.58603602 -1609.84182322 -8192.20126966  2274.87961086]
------
Step:42, Action:North
State  288
Old Q Values:  [ 2080.58603602 -1609.84182322 -8192.20126966  2274.87961086]
New Q values:  [15620.75116734 -1609.84182322 -8192.20126966  2274.87961086]
Reward: -1  Episode Reward:  -592
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.92970558e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
------
Step:43, Action:North
State  208
Old Q Values:  [ 1289.10919137  2232.11821887 -4228.04879148  1770.25839846]
New Q values:  [38467.2175436   2232.11821887 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  -583
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36041.91667283  29105.33341548   -180.00807518 126487.24622349]
------
Step:44, Action:West
State  130
Old Q Values:  [ 36041.91667283  29105.33341548   -180.00807518 126487.24622349]
New Q values:  [ 36041.91667283  29105.33341548   -180.00807518 150368.98649442]
Reward: 100009  Episode Reward:  99426
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x..ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  6.90234546e+03 -3.22965309e-01  1.43134195e+03]
------
Step:1, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  6.90234546e+03 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  1.43065034e+04 -3.22965309e-01  1.43134195e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[38467.2175436   2232.11821887 -4228.04879148  1770.25839846]
------
Step:2, Action:North
State  208
Old Q Values:  [38467.2175436   2232.11821887 -4228.04879148  1770.25839846]
New Q values:  [19678.23805128  2232.11821887 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.43065034e+04 -3.22965309e-01  1.43134195e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.43065034e+04 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  5.62547279e+03 -3.22965309e-01  1.43134195e+03]
Reward: -10001  Episode Reward:  -9993
xxxxx
x.. x
x..gx
x ..x
xxxxx
xxxxx
x.a.x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  1038.70149525   324.10651591]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  1038.70149525   324.10651591]
New Q values:  [ -281.736      -3455.78276043  2108.52243626   324.10651591]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  5.62547279e+03 -3.22965309e-01  1.43134195e+03]
------
Step:2, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  5.62547279e+03 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  1.70447059e+04 -3.22965309e-01  1.43134195e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.92970558e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [4.92970558e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
New Q values:  [2.48316341e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.70447059e+04 -3.22965309e-01  1.43134195e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.70447059e+04 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  1.27207538e+04 -3.22965309e-01  1.43134195e+03]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
xg.ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19678.23805128  2232.11821887 -4228.04879148  1770.25839846]
------
Step:5, Action:North
State  208
Old Q Values:  [19678.23805128  2232.11821887 -4228.04879148  1770.25839846]
New Q values:  [11686.92134959  2232.11821887 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.27207538e+04 -3.22965309e-01  1.43134195e+03]
------
Step:6, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.27207538e+04 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  1.25371917e+04 -3.22965309e-01  1.43134195e+03]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.48316341e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
------
Step:7, Action:North
State  208
Old Q Values:  [11686.92134959  2232.11821887 -4228.04879148  1770.25839846]
New Q values:  [ 8435.32606032  2232.11821887 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.25371917e+04 -3.22965309e-01  1.43134195e+03]
------
Step:8, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.25371917e+04 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  1.54487451e+03 -3.22965309e-01  1.43134195e+03]
Reward: -10001  Episode Reward:  -9988
xxxxx
x.  x
x..gx
x. .x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9074.74012186  2844.83365641]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9074.74012186  2844.83365641]
New Q values:  [-2527.46239811 -8521.23367799  8321.52139895  2844.83365641]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15620.75116734 -1609.84182322 -8192.20126966  2274.87961086]
------
Step:2, Action:North
State  288
Old Q Values:  [15620.75116734 -1609.84182322 -8192.20126966  2274.87961086]
New Q values:  [ 8784.29828503 -1609.84182322 -8192.20126966  2274.87961086]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8435.32606032  2232.11821887 -4228.04879148  1770.25839846]
------
Step:3, Action:South
State  208
Old Q Values:  [ 8435.32606032  2232.11821887 -4228.04879148  1770.25839846]
New Q values:  [ 8435.32606032  3527.53677306 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8784.29828503 -1609.84182322 -8192.20126966  2274.87961086]
------
Step:4, Action:West
State  288
Old Q Values:  [ 8784.29828503 -1609.84182322 -8192.20126966  2274.87961086]
New Q values:  [ 8784.29828503 -1609.84182322 -8192.20126966  1665.69087969]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1995.41067669 2521.13011782]
------
Step:5, Action:West
State  273
Old Q Values:  [  37.74111519 -168.92307549 1995.41067669 2521.13011782]
New Q values:  [  37.74111519 -168.92307549 1995.41067669 1241.5296413 ]
Reward: 9  Episode Reward:  25
xxxxx
x ..x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  758.92531391    26.73544252 -2339.75315766   -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [  758.92531391    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  524.47706979    26.73544252 -2339.75315766   -35.88578819]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  718.35648077   242.76375976 -4244.73627866   -30.99112081]
------
Step:7, Action:North
State  183
Old Q Values:  [ 545.27799575  657.75889141 6435.19423583    0.        ]
New Q values:  [ 877.23516594  657.75889141 6435.19423583    0.        ]
Reward: -1  Episode Reward:  33
xxxxx
xa..x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2199.07989213    5.4           0.        ]
------
Step:8, Action:South
State  102
Old Q Values:  [ -180.6       -2257.0253383     5.16       -180.6      ]
New Q values:  [-180.6        587.5732458    5.16      -180.6      ]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:9, Action:East
State  180
Old Q Values:  [   99.04174147  3452.96454455  1319.24936659 -4966.32149798]
New Q values:  [   99.04174147  3452.96454455  1631.08021511 -4966.32149798]
Reward: 9  Episode Reward:  41
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.65993489e+03 1.93809055e+03 2.91043938e+03]
------
Step:10, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -3.71565138e+03  4.71790947e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.00959587e+03  4.71790947e+03  1.20371620e+03]
Reward: -1  Episode Reward:  40
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8321.52139895  2844.83365641]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8321.52139895  2844.83365641]
New Q values:  [-2527.46239811 -8521.23367799  5963.29804509  2844.83365641]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8784.29828503 -1609.84182322 -8192.20126966  1665.69087969]
------
Step:12, Action:North
State  288
Old Q Values:  [ 8784.29828503 -1609.84182322 -8192.20126966  1665.69087969]
New Q values:  [ 6043.71713211 -1609.84182322 -8192.20126966  1665.69087969]
Reward: -1  Episode Reward:  38
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8435.32606032  3527.53677306 -4228.04879148  1770.25839846]
------
Step:13, Action:North
State  208
Old Q Values:  [ 8435.32606032  3527.53677306 -4228.04879148  1770.25839846]
New Q values:  [ 9303.74226399  3527.53677306 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  47
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068 19747.37279955 -8652.84       15717.21796752]
------
Step:14, Action:South
State  128
Old Q Values:  [ 8775.70846068 19747.37279955 -8652.84       15717.21796752]
New Q values:  [ 8775.70846068 10689.47179902 -8652.84       15717.21796752]
Reward: -1  Episode Reward:  46
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9303.74226399  3527.53677306 -4228.04879148  1770.25839846]
------
Step:15, Action:North
State  208
Old Q Values:  [ 9303.74226399  3527.53677306 -4228.04879148  1770.25839846]
New Q values:  [ 8436.06229585  3527.53677306 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  45
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068 10689.47179902 -8652.84       15717.21796752]
------
Step:16, Action:South
State  128
Old Q Values:  [ 8775.70846068 10689.47179902 -8652.84       15717.21796752]
New Q values:  [ 8775.70846068  6806.00740836 -8652.84       15717.21796752]
Reward: -1  Episode Reward:  44
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8436.06229585  3527.53677306 -4228.04879148  1770.25839846]
------
Step:17, Action:South
State  208
Old Q Values:  [ 8436.06229585  3527.53677306 -4228.04879148  1770.25839846]
New Q values:  [ 8436.06229585  3223.52984885 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  43
xxxxx
x . x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6043.71713211 -1609.84182322 -8192.20126966  1665.69087969]
------
Step:18, Action:West
State  288
Old Q Values:  [ 6043.71713211 -1609.84182322 -8192.20126966  1665.69087969]
New Q values:  [ 6043.71713211 -1609.84182322 -8192.20126966  1264.29955488]
Reward: -1  Episode Reward:  42
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  37.74111519 -168.92307549 1995.41067669 1241.5296413 ]
------
Step:19, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5963.29804509  2844.83365641]
New Q values:  [-2527.46239811 -8521.23367799  4197.83435767  2844.83365641]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6043.71713211 -1609.84182322 -8192.20126966  1264.29955488]
------
Step:20, Action:North
State  288
Old Q Values:  [ 6043.71713211 -1609.84182322 -8192.20126966  1264.29955488]
New Q values:  [ 4947.7055416  -1609.84182322 -8192.20126966  1264.29955488]
Reward: -1  Episode Reward:  40
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8436.06229585  3223.52984885 -4228.04879148  1770.25839846]
------
Step:21, Action:South
State  208
Old Q Values:  [ 8436.06229585  3223.52984885 -4228.04879148  1770.25839846]
New Q values:  [ 8436.06229585  2773.12360202 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4947.7055416  -1609.84182322 -8192.20126966  1264.29955488]
------
Step:22, Action:North
State  288
Old Q Values:  [ 4947.7055416  -1609.84182322 -8192.20126966  1264.29955488]
New Q values:  [ 4509.3009054  -1609.84182322 -8192.20126966  1264.29955488]
Reward: -1  Episode Reward:  38
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8436.06229585  2773.12360202 -4228.04879148  1770.25839846]
------
Step:23, Action:North
State  208
Old Q Values:  [ 8436.06229585  2773.12360202 -4228.04879148  1770.25839846]
New Q values:  [ 8088.9903086   2773.12360202 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  37
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068  6806.00740836 -8652.84       15717.21796752]
------
Step:24, Action:North
State  130
Old Q Values:  [ 36041.91667283  29105.33341548   -180.00807518 150368.98649442]
New Q values:  [ 59346.86261746  29105.33341548   -180.00807518 150368.98649442]
Reward: -301  Episode Reward:  -264
xxxxx
x .ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 59346.86261746  29105.33341548   -180.00807518 150368.98649442]
------
Step:25, Action:West
State  130
Old Q Values:  [ 59346.86261746  29105.33341548   -180.00807518 150368.98649442]
New Q values:  [ 59346.86261746  29105.33341548   -180.00807518 159921.68260279]
Reward: 100009  Episode Reward:  99745
xxxxx
x a x
x   x
x g x
xxxxx
xxxxx
x.g.x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  524.47706979    26.73544252 -2339.75315766   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [  524.47706979    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  430.69777215    26.73544252 -2339.75315766   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  718.35648077   242.76375976 -4244.73627866   -30.99112081]
------
Step:2, Action:North
State  180
Old Q Values:  [   99.04174147  3452.96454455  1631.08021511 -4966.32149798]
New Q values:  [  164.99776392  3452.96454455  1631.08021511 -4966.32149798]
Reward: 9  Episode Reward:  18
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 -979.06556491  399.93689112 -180.6       ]
------
Step:3, Action:East
State  108
Old Q Values:  [-8463.16477134   935.65494908   897.8432354      0.        ]
New Q values:  [-8463.16477134   935.65494908   839.22043435     0.        ]
Reward: -1  Episode Reward:  17
xxxxx
xga.x
x ..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1602.27713396   174.48814984]
------
Step:4, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1602.27713396   174.48814984]
New Q values:  [-9594.56523706 -8069.05606225  1556.61335289   174.48814984]
Reward: 9  Episode Reward:  26
xxxxx
x gax
x ..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   3034.34166436 -2383.80019164   408.17330197]
------
Step:5, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.54487451e+03 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  2.32210543e+03 -3.22965309e-01  1.43134195e+03]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5662.51875948   372.98523174 -8896.20691497  2123.17537606]
------
Step:6, Action:North
State  216
Old Q Values:  [ 5662.51875948   372.98523174 -8896.20691497  2123.17537606]
New Q values:  [ 2961.0391336    372.98523174 -8896.20691497  2123.17537606]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  2.32210543e+03 -3.22965309e-01  1.43134195e+03]
------
Step:7, Action:South
State  136
Old Q Values:  [-1043.6707427   3034.34166436 -2383.80019164   408.17330197]
New Q values:  [-1043.6707427   2101.44840582 -2383.80019164   408.17330197]
Reward: -1  Episode Reward:  33
xxxxx
xg  x
x .ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2961.0391336    372.98523174 -8896.20691497  2123.17537606]
------
Step:8, Action:North
State  216
Old Q Values:  [ 2961.0391336    372.98523174 -8896.20691497  2123.17537606]
New Q values:  [ 1880.44728324   372.98523174 -8896.20691497  2123.17537606]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  2.32210543e+03 -3.22965309e-01  1.43134195e+03]
------
Step:9, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  2.32210543e+03 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  1.55266001e+03 -3.22965309e-01  1.43134195e+03]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2081.39278929  848.96225083    0.          429.03841886]
------
Step:10, Action:North
State  216
Old Q Values:  [ 1880.44728324   372.98523174 -8896.20691497  2123.17537606]
New Q values:  [ 1217.37691625   372.98523174 -8896.20691497  2123.17537606]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.55266001e+03 -3.22965309e-01  1.43134195e+03]
------
Step:11, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.55266001e+03 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  1.43134195e+03]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2081.39278929  848.96225083    0.          429.03841886]
------
Step:12, Action:North
State  216
Old Q Values:  [ 1217.37691625   372.98523174 -8896.20691497  2123.17537606]
New Q values:  [  915.75335289   372.98523174 -8896.20691497  2123.17537606]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  1.43134195e+03]
------
Step:13, Action:West
State  136
Old Q Values:  [-1043.6707427   2101.44840582 -2383.80019164   408.17330197]
New Q values:  [-1043.6707427   2101.44840582 -2383.80019164   629.65332666]
Reward: -1  Episode Reward:  27
xxxxx
xga x
x . x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1556.61335289   174.48814984]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  2108.52243626   324.10651591]
New Q values:  [ -281.736      -3455.78276043  1272.21156089   324.10651591]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  1.43134195e+03]
------
Step:15, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  1.43134195e+03]
New Q values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  9.53600250e+02]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  1272.21156089   324.10651591]
------
Step:16, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  1272.21156089   324.10651591]
New Q values:  [ -281.736      -3455.78276043   881.74917658   324.10651591]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  9.53600250e+02]
------
Step:17, Action:South
State  136
Old Q Values:  [-1043.6707427   2101.44840582 -2383.80019164   629.65332666]
New Q values:  [-1043.6707427   1476.93197515 -2383.80019164   629.65332666]
Reward: -1  Episode Reward:  23
xxxxx
xg  x
x .ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  915.75335289   372.98523174 -8896.20691497  2123.17537606]
------
Step:18, Action:West
State  216
Old Q Values:  [  915.75335289   372.98523174 -8896.20691497  2123.17537606]
New Q values:  [  915.75335289   372.98523174 -8896.20691497  2780.49651731]
Reward: 9  Episode Reward:  32
xxxxx
x   x
xga x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  6419.42122296 1239.78307745  408.67479662]
------
Step:19, Action:South
State  204
Old Q Values:  [  0.         687.59443642 815.31186329 441.58769553]
New Q values:  [  0.         898.60459158 815.31186329 441.58769553]
Reward: 9  Episode Reward:  41
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2060.55605671  1399.12224319]
------
Step:20, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4197.83435767  2844.83365641]
New Q values:  [-2527.46239811 -8521.23367799 63037.32401469  2844.83365641]
Reward: 100009  Episode Reward:  100050
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  2639.47551855 17919.46960722  1101.59744825]
------
Step:1, Action:East
State  193
Old Q Values:  [-5922.26708831  3647.23343388 -4944.64745784  1099.96026581]
New Q values:  [-5922.26708831  3647.23343388 -5545.76189056  1099.96026581]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. gx
x.. x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.00959587e+03  4.71790947e+03  1.20371620e+03]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.00959587e+03  4.71790947e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.00959587e+03  9.34205402e+03  1.20371620e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.48316341e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
------
Step:2, Action:North
State  208
Old Q Values:  [ 8088.9903086   2773.12360202 -4228.04879148  1770.25839846]
New Q values:  [51217.50090428  2773.12360202 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 59346.86261746  29105.33341548   -180.00807518 159921.68260279]
------
Step:3, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  9.53600250e+02]
New Q values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  4.01555281e+04]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   881.74917658   324.10651591]
New Q values:  [ -281.736      -3455.78276043   881.74917658   170.18339097]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   117.13594869  -180.6       ]
------
Step:5, Action:East
State  106
Old Q Values:  [ -180.6        -8952.15415062   117.13594869  -180.6       ]
New Q values:  [ -180.6        -8952.15415062   310.77913245  -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   881.74917658   170.18339097]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   881.74917658   170.18339097]
New Q values:  [ -281.736      -3455.78276043 12398.75810215   170.18339097]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  4.01555281e+04]
------
Step:7, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  4.01555281e+04]
New Q values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  1.66981747e+04]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2121.87817861   453.58409043]
------
Step:8, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2121.87817861   453.58409043]
New Q values:  [ -253.44886264 -1902.20915811  5857.60368013   453.58409043]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  1.66981747e+04]
------
Step:9, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  1.66981747e+04]
New Q values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  1.03982973e+04]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043 12398.75810215   170.18339097]
------
Step:10, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  5857.60368013   453.58409043]
New Q values:  [ -253.44886264 -1902.20915811  5461.93066472   453.58409043]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  1.03982973e+04]
------
Step:11, Action:West
State  136
Old Q Values:  [-1043.6707427   1476.93197515 -2383.80019164   629.65332666]
New Q values:  [-1043.6707427   1476.93197515 -2383.80019164   344.57553345]
Reward: -1  Episode Reward:  29
xxxxx
x agx
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:12, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  5461.93066472   453.58409043]
New Q values:  [ -253.44886264 -1902.20915811  5461.93066472   342.17888722]
Reward: -1  Episode Reward:  28
xxxxx
xa  x
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  537.81750349   69.96901366 -252.78192178]
------
Step:13, Action:South
State  107
Old Q Values:  [-252.35169558  537.81750349   69.96901366 -252.78192178]
New Q values:  [-252.35169558  872.42862025   69.96901366 -252.78192178]
Reward: 9  Episode Reward:  37
xxxxx
x   x
xag x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[   9.40190913    0.         2173.00539618 -178.98      ]
------
Step:14, Action:North
State  185
Old Q Values:  [   9.40190913    0.         2173.00539618 -178.98      ]
New Q values:  [  46.66637643    0.         2173.00539618 -178.98      ]
Reward: -1  Episode Reward:  36
xxxxx
xag x
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         145.01870926   65.14560537    0.        ]
------
Step:15, Action:South
State  107
Old Q Values:  [-252.35169558  872.42862025   69.96901366 -252.78192178]
New Q values:  [-252.35169558 1000.27306695   69.96901366 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xag x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  46.66637643    0.         2173.00539618 -178.98      ]
------
Step:16, Action:North
State  185
Old Q Values:  [  46.66637643    0.         2173.00539618 -178.98      ]
New Q values:  [ 318.14847066    0.         2173.00539618 -178.98      ]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1000.27306695   69.96901366 -252.78192178]
------
Step:17, Action:South
State  107
Old Q Values:  [-252.35169558 1000.27306695   69.96901366 -252.78192178]
New Q values:  [-252.35169558 1051.41084563   69.96901366 -252.78192178]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xag x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 318.14847066    0.         2173.00539618 -178.98      ]
------
Step:18, Action:North
State  185
Old Q Values:  [ 318.14847066    0.         2173.00539618 -178.98      ]
New Q values:  [ 170.16500104    0.         2173.00539618 -178.98      ]
Reward: -1  Episode Reward:  32
xxxxx
xag x
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         145.01870926   65.14560537    0.        ]
------
Step:19, Action:South
State  104
Old Q Values:  [-8652.84         685.67580675  1078.33520689 -8652.84      ]
New Q values:  [-8652.84        4492.09836327  1078.33520689 -8652.84      ]
Reward: -1  Episode Reward:  31
xxxxx
xg  x
xa  x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[-2081.09028721     0.         14061.4268019      0.        ]
------
Step:20, Action:East
State  185
Old Q Values:  [ 170.16500104    0.         2173.00539618 -178.98      ]
New Q values:  [ 170.16500104    0.         2794.42852536 -178.98      ]
Reward: -1  Episode Reward:  30
xxxxx
x g x
x a x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  6419.42122296 1239.78307745  408.67479662]
------
Step:21, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  5.63706517e+03 -8.94356769e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  2.85284927e+03 -8.94356769e+03  2.00341972e+02]
Reward: -1  Episode Reward:  29
xxxxx
x  gx
x   x
x.a.x
xxxxx
Step:22, Action:North
State  273
Old Q Values:  [  37.74111519 -168.92307549 1995.41067669 1241.5296413 ]
New Q values:  [ 870.35122762 -168.92307549 1995.41067669 1241.5296413 ]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  2.85284927e+03 -8.94356769e+03  2.00341972e+02]
------
Step:23, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  2.85284927e+03 -8.94356769e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  1.73916291e+03 -8.94356769e+03  2.00341972e+02]
Reward: -1  Episode Reward:  27
xxxxx
x  gx
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 1995.41067669 1241.5296413 ]
------
Step:24, Action:East
State  273
Old Q Values:  [ 870.35122762 -168.92307549 1995.41067669 1241.5296413 ]
New Q values:  [ 870.35122762 -168.92307549 2156.35454229 1241.5296413 ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4509.3009054  -1609.84182322 -8192.20126966  1264.29955488]
------
Step:25, Action:West
State  288
Old Q Values:  [ 4509.3009054  -1609.84182322 -8192.20126966  1264.29955488]
New Q values:  [ 4509.3009054  -1609.84182322 -8192.20126966 19416.31702636]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 63037.32401469  2844.83365641]
------
Step:26, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 63037.32401469  2844.83365641]
New Q values:  [-2527.46239811 -8521.23367799 31039.22471378  2844.83365641]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4509.3009054  -1609.84182322 -8192.20126966 19416.31702636]
------
Step:27, Action:North
State  288
Old Q Values:  [ 4509.3009054  -1609.84182322 -8192.20126966 19416.31702636]
New Q values:  [ 2637.26931735 -1609.84182322 -8192.20126966 19416.31702636]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  915.75335289   372.98523174 -8896.20691497  2780.49651731]
------
Step:28, Action:North
State  218
Old Q Values:  [2081.39278929  848.96225083    0.          429.03841886]
New Q values:  [3951.44630838  848.96225083    0.          429.03841886]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  1.03982973e+04]
------
Step:29, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  1.03982973e+04]
New Q values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  7.87834635e+03]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043 12398.75810215   170.18339097]
------
Step:30, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043 12398.75810215   170.18339097]
New Q values:  [ -281.736      -3455.78276043  7322.40714712   170.18339097]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  7.87834635e+03]
------
Step:31, Action:West
State  130
Old Q Values:  [ 59346.86261746  29105.33341548   -180.00807518 159921.68260279]
New Q values:  [ 59346.86261746  29105.33341548   -180.00807518 103736.76104614]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:32, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043  7322.40714712   170.18339097]
New Q values:  [ -281.736      -3455.78276043  7322.40714712   160.70709612]
Reward: -1  Episode Reward:  28
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   310.77913245  -180.6       ]
------
Step:33, Action:East
State  107
Old Q Values:  [-252.35169558 1051.41084563   69.96901366 -252.78192178]
New Q values:  [-252.35169558 1051.41084563 2224.1097496  -252.78192178]
Reward: -1  Episode Reward:  27
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  7322.40714712   160.70709612]
------
Step:34, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  5461.93066472   342.17888722]
New Q values:  [ -253.44886264 -1902.20915811  4547.67617215   342.17888722]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  7.87834635e+03]
------
Step:35, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  7.87834635e+03]
New Q values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  4.51504139e+03]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  4547.67617215   342.17888722]
------
Step:36, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  7322.40714712   160.70709612]
New Q values:  [ -281.736      -3455.78276043  4282.87527685   160.70709612]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  4.51504139e+03]
------
Step:37, Action:West
State  130
Old Q Values:  [ 59346.86261746  29105.33341548   -180.00807518 103736.76104614]
New Q values:  [59346.86261746 29105.33341548  -180.00807518 81262.79242348]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:38, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043  4282.87527685   160.70709612]
New Q values:  [ -281.736      -3455.78276043  4282.87527685   730.91576333]
Reward: -1  Episode Reward:  22
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1051.41084563 2224.1097496  -252.78192178]
------
Step:39, Action:East
State  107
Old Q Values:  [-252.35169558 1051.41084563 2224.1097496  -252.78192178]
New Q values:  [-252.35169558 1051.41084563 2253.34675149 -252.78192178]
Reward: -1  Episode Reward:  21
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  4547.67617215   342.17888722]
------
Step:40, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  4547.67617215   342.17888722]
New Q values:  [ -253.44886264 -1902.20915811  3172.98288686   342.17888722]
Reward: -1  Episode Reward:  20
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  4.51504139e+03]
------
Step:41, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  4.51504139e+03]
New Q values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  2.75731142e+03]
Reward: -1  Episode Reward:  19
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  3172.98288686   342.17888722]
------
Step:42, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  4282.87527685   730.91576333]
New Q values:  [ -281.736      -3455.78276043  2539.74353775   730.91576333]
Reward: -1  Episode Reward:  18
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  2.75731142e+03]
------
Step:43, Action:West
State  130
Old Q Values:  [59346.86261746 29105.33341548  -180.00807518 81262.79242348]
New Q values:  [59346.86261746 29105.33341548  -180.00807518 72273.20497441]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:44, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043  2539.74353775   730.91576333]
New Q values:  [ -281.736      -3455.78276043  2539.74353775   967.77033078]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1051.41084563 2253.34675149 -252.78192178]
------
Step:45, Action:East
State  107
Old Q Values:  [-252.35169558 1051.41084563 2253.34675149 -252.78192178]
New Q values:  [-252.35169558 1051.41084563 1852.63356665 -252.78192178]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  3172.98288686   342.17888722]
------
Step:46, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  2539.74353775   967.77033078]
New Q values:  [ -281.736      -3455.78276043  1842.49084212   967.77033078]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  2.75731142e+03]
------
Step:47, Action:West
State  130
Old Q Values:  [59346.86261746 29105.33341548  -180.00807518 72273.20497441]
New Q values:  [59346.86261746 29105.33341548  -180.00807518 68677.36999479]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:48, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043  1842.49084212   967.77033078]
New Q values:  [ -281.736      -3455.78276043  1842.49084212   942.29820231]
Reward: -1  Episode Reward:  12
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1051.41084563 1852.63356665 -252.78192178]
------
Step:49, Action:East
State  98
Old Q Values:  [     0.          41136.94667617 121614.08308908      0.        ]
New Q values:  [    0.         41136.94667617 88413.72124065     0.        ]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:50, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043  1842.49084212   942.29820231]
New Q values:  [ -281.736      -3455.78276043  1842.49084212   932.10935092]
Reward: -1  Episode Reward:  10
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1051.41084563 1852.63356665 -252.78192178]
------
Step:51, Action:East
State  107
Old Q Values:  [-252.35169558 1051.41084563 1852.63356665 -252.78192178]
New Q values:  [-252.35169558 1051.41084563 1293.2006793  -252.78192178]
Reward: -1  Episode Reward:  9
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  1842.49084212   932.10935092]
------
Step:52, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1556.61335289   174.48814984]
New Q values:  [-9594.56523706 -8069.05606225  1065.1249337    174.48814984]
Reward: -1  Episode Reward:  8
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   1476.93197515 -2383.80019164   344.57553345]
------
Step:53, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.24488184e+03 -3.22965309e-01  2.75731142e+03]
New Q values:  [ 3.67167427e+02  1.33150169e+03 -3.22965309e-01  2.75731142e+03]
Reward: -1  Episode Reward:  7
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  915.75335289   372.98523174 -8896.20691497  2780.49651731]
------
Step:54, Action:North
State  216
Old Q Values:  [  915.75335289   372.98523174 -8896.20691497  2780.49651731]
New Q values:  [  808.7809337    372.98523174 -8896.20691497  2780.49651731]
Reward: -1  Episode Reward:  6
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-1043.6707427   1476.93197515 -2383.80019164   344.57553345]
------
Step:55, Action:South
State  136
Old Q Values:  [-1043.6707427   1476.93197515 -2383.80019164   344.57553345]
New Q values:  [-1043.6707427   1424.32174525 -2383.80019164   344.57553345]
Reward: -1  Episode Reward:  5
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  808.7809337    372.98523174 -8896.20691497  2780.49651731]
------
Step:56, Action:West
State  216
Old Q Values:  [  808.7809337    372.98523174 -8896.20691497  2780.49651731]
New Q values:  [  808.7809337    372.98523174 -8896.20691497  3037.42497381]
Reward: -1  Episode Reward:  4
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  6419.42122296 1239.78307745  408.67479662]
------
Step:57, Action:South
State  194
Old Q Values:  [-6.00000000e-01  1.00959587e+03  9.34205402e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  9.71500576e+03  9.34205402e+03  1.20371620e+03]
Reward: -1  Episode Reward:  3
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 31039.22471378  2844.83365641]
------
Step:58, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 31039.22471378  2844.83365641]
New Q values:  [-2527.46239811 -8521.23367799 18239.98499342  2844.83365641]
Reward: -1  Episode Reward:  2
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2637.26931735 -1609.84182322 -8192.20126966 19416.31702636]
------
Step:59, Action:West
State  288
Old Q Values:  [ 2637.26931735 -1609.84182322 -8192.20126966 19416.31702636]
New Q values:  [ 2637.26931735 -1609.84182322 -8192.20126966 13237.92230857]
Reward: -1  Episode Reward:  1
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 18239.98499342  2844.83365641]
------
Step:60, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 18239.98499342  2844.83365641]
New Q values:  [-2527.46239811 -8521.23367799 11266.77068994  2844.83365641]
Reward: -1  Episode Reward:  0
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2637.26931735 -1609.84182322 -8192.20126966 13237.92230857]
------
Step:61, Action:West
State  288
Old Q Values:  [ 2637.26931735 -1609.84182322 -8192.20126966 13237.92230857]
New Q values:  [ 2637.26931735 -1609.84182322 -8192.20126966  8674.60013041]
Reward: -1  Episode Reward:  -1
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 11266.77068994  2844.83365641]
------
Step:62, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 11266.77068994  2844.83365641]
New Q values:  [-2527.46239811 -8521.23367799  7108.4883151   2844.83365641]
Reward: -1  Episode Reward:  -2
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2637.26931735 -1609.84182322 -8192.20126966  8674.60013041]
------
Step:63, Action:North
State  288
Old Q Values:  [ 2637.26931735 -1609.84182322 -8192.20126966  8674.60013041]
New Q values:  [ 1965.53521908 -1609.84182322 -8192.20126966  8674.60013041]
Reward: -1  Episode Reward:  -3
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  808.7809337    372.98523174 -8896.20691497  3037.42497381]
------
Step:64, Action:North
State  218
Old Q Values:  [3951.44630838  848.96225083    0.          429.03841886]
New Q values:  [2407.17195037  848.96225083    0.          429.03841886]
Reward: -1  Episode Reward:  -4
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.33150169e+03 -3.22965309e-01  2.75731142e+03]
------
Step:65, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.33150169e+03 -3.22965309e-01  2.75731142e+03]
New Q values:  [ 3.67167427e+02  1.33150169e+03 -3.22965309e-01  1.65507182e+03]
Reward: -1  Episode Reward:  -5
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  1842.49084212   932.10935092]
------
Step:66, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  1842.49084212   932.10935092]
New Q values:  [ -281.736      -3455.78276043  1232.91788344   932.10935092]
Reward: -1  Episode Reward:  -6
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.33150169e+03 -3.22965309e-01  1.65507182e+03]
------
Step:67, Action:West
State  138
Old Q Values:  [ 3.67167427e+02  1.33150169e+03 -3.22965309e-01  1.65507182e+03]
New Q values:  [ 3.67167427e+02  1.33150169e+03 -3.22965309e-01  1.03130409e+03]
Reward: -1  Episode Reward:  -7
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043  1232.91788344   932.10935092]
------
Step:68, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043  1232.91788344   932.10935092]
New Q values:  [ -281.736      -3455.78276043   892.01766082   932.10935092]
Reward: -1  Episode Reward:  -8
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.33150169e+03 -3.22965309e-01  1.03130409e+03]
------
Step:69, Action:South
State  130
Old Q Values:  [59346.86261746 29105.33341548  -180.00807518 68677.36999479]
New Q values:  [59346.86261746 19091.02359571  -180.00807518 68677.36999479]
Reward: -1  Episode Reward:  -9
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.48316341e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
------
Step:70, Action:North
State  218
Old Q Values:  [2407.17195037  848.96225083    0.          429.03841886]
New Q values:  [1361.71928759  848.96225083    0.          429.03841886]
Reward: -1  Episode Reward:  -10
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.33150169e+03 -3.22965309e-01  1.03130409e+03]
------
Step:71, Action:South
State  130
Old Q Values:  [59346.86261746 19091.02359571  -180.00807518 68677.36999479]
New Q values:  [59346.86261746 15085.2996678   -180.00807518 68677.36999479]
Reward: -1  Episode Reward:  -11
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.48316341e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
------
Step:72, Action:North
State  216
Old Q Values:  [  808.7809337    372.98523174 -8896.20691497  3037.42497381]
New Q values:  [  722.36288093   372.98523174 -8896.20691497  3037.42497381]
Reward: -1  Episode Reward:  -12
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.33150169e+03 -3.22965309e-01  1.03130409e+03]
------
Step:73, Action:South
State  130
Old Q Values:  [59346.86261746 15085.2996678   -180.00807518 68677.36999479]
New Q values:  [59346.86261746 13483.01009663  -180.00807518 68677.36999479]
Reward: -1  Episode Reward:  -13
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.48316341e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
------
Step:74, Action:North
State  218
Old Q Values:  [1361.71928759  848.96225083    0.          429.03841886]
New Q values:  [943.53822248 848.96225083   0.         429.03841886]
Reward: -1  Episode Reward:  -14
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.33150169e+03 -3.22965309e-01  1.03130409e+03]
------
Step:75, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.33150169e+03 -3.22965309e-01  1.03130409e+03]
New Q values:  [ 3.67167427e+02  1.44322817e+03 -3.22965309e-01  1.03130409e+03]
Reward: -1  Episode Reward:  -15
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  722.36288093   372.98523174 -8896.20691497  3037.42497381]
------
Step:76, Action:North
State  218
Old Q Values:  [943.53822248 848.96225083   0.         429.03841886]
New Q values:  [809.78373961 848.96225083   0.         429.03841886]
Reward: -1  Episode Reward:  -16
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02  1.44322817e+03 -3.22965309e-01  1.03130409e+03]
------
Step:77, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  1.44322817e+03 -3.22965309e-01  1.03130409e+03]
New Q values:  [ 3.67167427e+02  8.31379943e+02 -3.22965309e-01  1.03130409e+03]
Reward: -1  Episode Reward:  -17
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[809.78373961 848.96225083   0.         429.03841886]
------
Step:78, Action:North
State  216
Old Q Values:  [  722.36288093   372.98523174 -8896.20691497  3037.42497381]
New Q values:  [  597.73638052   372.98523174 -8896.20691497  3037.42497381]
Reward: -1  Episode Reward:  -18
xxxxx
x  ax
x  gx
x.  x
xxxxx
Step:79, Action:South
State  138
Old Q Values:  [ 3.67167427e+02  8.31379943e+02 -3.22965309e-01  1.03130409e+03]
New Q values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  1.03130409e+03]
Reward: -10001  Episode Reward:  -10019
xxxxx
x  ax
x g x
x.  x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.48316341e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
------
Step:1, Action:North
State  208
Old Q Values:  [51217.50090428  2773.12360202 -4228.04879148  1770.25839846]
New Q values:  [20801.79158986  2773.12360202 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  1.03130409e+03]
------
Step:2, Action:West
State  136
Old Q Values:  [-1043.6707427   1424.32174525 -2383.80019164   344.57553345]
New Q values:  [-1043.6707427   1424.32174525 -2383.80019164 -5537.23230651]
Reward: -9991  Episode Reward:  -9982
xxxxx
x.g x
x.. x
x. .x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7108.4883151   2844.83365641]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7108.4883151   2844.83365641]
New Q values:  [-2527.46239811 -8521.23367799  5451.17536516  2844.83365641]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1965.53521908 -1609.84182322 -8192.20126966  8674.60013041]
------
Step:2, Action:West
State  288
Old Q Values:  [ 1965.53521908 -1609.84182322 -8192.20126966  8674.60013041]
New Q values:  [ 1965.53521908 -1609.84182322 -8192.20126966  5104.59266171]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5451.17536516  2844.83365641]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5451.17536516  2844.83365641]
New Q values:  [-2527.46239811 -8521.23367799  3711.24794458  2844.83365641]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1965.53521908 -1609.84182322 -8192.20126966  5104.59266171]
------
Step:4, Action:West
State  288
Old Q Values:  [ 1965.53521908 -1609.84182322 -8192.20126966  5104.59266171]
New Q values:  [ 1965.53521908 -1609.84182322 -8192.20126966  3154.61144806]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x. .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3711.24794458  2844.83365641]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3711.24794458  2844.83365641]
New Q values:  [-2527.46239811 -8521.23367799  2430.28261225  2844.83365641]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x. .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1965.53521908 -1609.84182322 -8192.20126966  3154.61144806]
------
Step:6, Action:North
State  288
Old Q Values:  [ 1965.53521908 -1609.84182322 -8192.20126966  3154.61144806]
New Q values:  [ 7032.15156459 -1609.84182322 -8192.20126966  3154.61144806]
Reward: 9  Episode Reward:  14
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20801.79158986  2773.12360202 -4228.04879148  1770.25839846]
------
Step:7, Action:North
State  208
Old Q Values:  [20801.79158986  2773.12360202 -4228.04879148  1770.25839846]
New Q values:  [28929.32763438  2773.12360202 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  23
xxxxx
x..ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[59346.86261746 13483.01009663  -180.00807518 68677.36999479]
------
Step:8, Action:West
State  138
Old Q Values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  1.03130409e+03]
New Q values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  6.97554443e+02]
Reward: 9  Episode Reward:  32
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   892.01766082   932.10935092]
------
Step:9, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1065.1249337    174.48814984]
New Q values:  [-9594.56523706 -8069.05606225  1065.1249337    118.70087271]
Reward: 9  Episode Reward:  41
xxxxx
xag x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         145.01870926   65.14560537    0.        ]
------
Step:10, Action:South
State  104
Old Q Values:  [-8652.84        4492.09836327  1078.33520689 -8652.84      ]
New Q values:  [-8652.84        6020.66738588  1078.33520689 -8652.84      ]
Reward: 9  Episode Reward:  50
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[-2081.09028721     0.         14061.4268019      0.        ]
------
Step:11, Action:East
State  185
Old Q Values:  [ 170.16500104    0.         2794.42852536 -178.98      ]
New Q values:  [ 170.16500104    0.         3042.99777703 -178.98      ]
Reward: -1  Episode Reward:  49
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  6419.42122296 1239.78307745  408.67479662]
------
Step:12, Action:South
State  200
Old Q Values:  [  62.8218634  6419.42122296 1239.78307745  408.67479662]
New Q values:  [  62.8218634  3420.61858611 1239.78307745  408.67479662]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2430.28261225  2844.83365641]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2430.28261225  2844.83365641]
New Q values:  [-2527.46239811 -8521.23367799  2430.28261225 72362.32410518]
Reward: 100009  Episode Reward:  100057
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2361.73793661  353.8832415  -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 2199.07989213    5.4           0.        ]
New Q values:  [ 221.30610858 2815.5902276     5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  657.75889141 6435.19423583    0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 877.23516594  657.75889141 6435.19423583    0.        ]
New Q values:  [ 877.23516594  657.75889141 6364.12960322    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -7.91881263e+03  1.26155064e+04  0.00000000e+00]
------
Step:3, Action:East
State  195
Old Q Values:  [   38.85388605  2639.47551855 17919.46960722  1101.59744825]
New Q values:  [   38.85388605  2639.47551855 14622.6780724   1101.59744825]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.48316341e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
------
Step:4, Action:North
State  218
Old Q Values:  [809.78373961 848.96225083   0.         429.03841886]
New Q values:  [538.57982869 848.96225083   0.         429.03841886]
Reward: 9  Episode Reward:  36
xxxxx
x .ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  6.97554443e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  6.97554443e+02]
New Q values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  1.47457021e+03]
Reward: 9  Episode Reward:  45
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 3967.16144923 1134.18629274]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   892.01766082   932.10935092]
New Q values:  [ -281.736      -3455.78276043   798.5781279    932.10935092]
Reward: -1  Episode Reward:  44
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  1.47457021e+03]
------
Step:7, Action:West
State  138
Old Q Values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  1.47457021e+03]
New Q values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  8.68860890e+02]
Reward: -1  Episode Reward:  43
xxxxx
x a x
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   798.5781279    932.10935092]
------
Step:8, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  3172.98288686   342.17888722]
New Q values:  [ -253.44886264 -1902.20915811  3172.98288686   524.23175868]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1051.41084563 1293.2006793  -252.78192178]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558 1051.41084563 1293.2006793  -252.78192178]
New Q values:  [-252.35169558 1051.41084563 1468.57513778 -252.78192178]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  3172.98288686   524.23175868]
------
Step:10, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  3172.98288686   524.23175868]
New Q values:  [ -253.44886264 -1902.20915811  1529.25142175   524.23175868]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  8.68860890e+02]
------
Step:11, Action:West
State  136
Old Q Values:  [-1043.6707427   1424.32174525 -2383.80019164 -5537.23230651]
New Q values:  [-1043.6707427   1424.32174525 -2383.80019164 -2122.17871981]
Reward: -1  Episode Reward:  39
xxxxx
x agx
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:12, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1065.1249337    118.70087271]
New Q values:  [-9594.56523706 -8069.05606225  1065.1249337     90.38596186]
Reward: -1  Episode Reward:  38
xxxxx
xag x
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         145.01870926   65.14560537    0.        ]
------
Step:13, Action:South
State  105
Old Q Values:  [-180.6         145.01870926   65.14560537    0.        ]
New Q values:  [-180.6         970.30681681   65.14560537    0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
xa  x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 170.16500104    0.         3042.99777703 -178.98      ]
------
Step:14, Action:East
State  185
Old Q Values:  [ 170.16500104    0.         3042.99777703 -178.98      ]
New Q values:  [ 170.16500104    0.         1738.34798433 -178.98      ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.73916291e+03 -8.94356769e+03  2.00341972e+02]
------
Step:15, Action:South
State  203
Old Q Values:  [   3.60604218  705.82716573 1909.98438219    0.        ]
New Q values:  [   3.60604218  928.63722898 1909.98438219    0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 2156.35454229 1241.5296413 ]
------
Step:16, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2060.55605671  1399.12224319]
New Q values:  [   16.82637525 -5807.06396197  2060.55605671   694.25822892]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  430.69777215    26.73544252 -2339.75315766   -35.88578819]
------
Step:17, Action:North
State  257
Old Q Values:  [37396.63547538  2256.66526474  4520.89517899  1875.31501677]
New Q values:  [20707.9849204   2256.66526474  4520.89517899  1875.31501677]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         17203.58890013 19166.43576748     0.        ]
------
Step:18, Action:East
State  191
Old Q Values:  [  3.06655861 206.84750324 482.93632693   0.        ]
New Q values:  [  3.06655861 206.84750324 650.32545056   0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[   0.          135.48456638 1525.83639927    0.        ]
------
Step:19, Action:East
State  206
Old Q Values:  [   0.          135.48456638 1525.83639927    0.        ]
New Q values:  [  0.         135.48456638 864.42323496   0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[538.57982869 848.96225083   0.         429.03841886]
------
Step:20, Action:South
State  208
Old Q Values:  [28929.32763438  2773.12360202 -4228.04879148  1770.25839846]
New Q values:  [28929.32763438 63224.29491019 -4228.04879148  1770.25839846]
Reward: 100009  Episode Reward:  100050
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.65993489e+03 1.93809055e+03 2.91043938e+03]
------
Step:1, Action:South
State  196
Old Q Values:  [-2469.90645144   471.55277317  4092.85112914   239.04887894]
New Q values:  [-2469.90645144   812.18792628  4092.85112914   239.04887894]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2060.55605671   694.25822892]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2430.28261225 72362.32410518]
New Q values:  [-2527.46239811 -8521.23367799  3087.15851428 72362.32410518]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7032.15156459 -1609.84182322 -8192.20126966  3154.61144806]
------
Step:3, Action:North
State  288
Old Q Values:  [ 7032.15156459 -1609.84182322 -8192.20126966  3154.61144806]
New Q values:  [21785.54909889 -1609.84182322 -8192.20126966  3154.61144806]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28929.32763438 63224.29491019 -4228.04879148  1770.25839846]
------
Step:4, Action:South
State  208
Old Q Values:  [28929.32763438 63224.29491019 -4228.04879148  1770.25839846]
New Q values:  [28929.32763438 31824.78269374 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  26
xxxxx
x. .x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21785.54909889 -1609.84182322 -8192.20126966  3154.61144806]
------
Step:5, Action:North
State  288
Old Q Values:  [21785.54909889 -1609.84182322 -8192.20126966  3154.61144806]
New Q values:  [16163.10986907 -1609.84182322 -8192.20126966  3154.61144806]
Reward: -1  Episode Reward:  25
xxxxx
x. .x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.48316341e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
------
Step:6, Action:North
State  210
Old Q Values:  [2.48316341e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
New Q values:  [1.01987119e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
Reward: 9  Episode Reward:  34
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  8.68860890e+02]
------
Step:7, Action:West
State  130
Old Q Values:  [59346.86261746 13483.01009663  -180.00807518 68677.36999479]
New Q values:  [59346.86261746 13483.01009663  -180.00807518 67239.03600293]
Reward: -1  Episode Reward:  33
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   798.5781279    932.10935092]
New Q values:  [ -281.736      -3455.78276043   798.5781279    818.8162817 ]
Reward: 9  Episode Reward:  42
xxxxx
xa  x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1051.41084563 1468.57513778 -252.78192178]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558 1051.41084563 1468.57513778 -252.78192178]
New Q values:  [-252.35169558 1051.41084563 1045.60548164 -252.78192178]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1529.25142175   524.23175868]
------
Step:10, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1529.25142175   524.23175868]
New Q values:  [ -253.44886264 -1902.20915811   871.75883571   524.23175868]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  8.68860890e+02]
------
Step:11, Action:West
State  138
Old Q Values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  8.68860890e+02]
New Q values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  6.08472007e+02]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   871.75883571   524.23175868]
------
Step:12, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   871.75883571   524.23175868]
New Q values:  [ -253.44886264 -1902.20915811   530.6451363    524.23175868]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  6.08472007e+02]
------
Step:13, Action:West
State  136
Old Q Values:  [-1043.6707427   1424.32174525 -2383.80019164 -2122.17871981]
New Q values:  [-1043.6707427   1424.32174525 -2383.80019164  -756.15728513]
Reward: -1  Episode Reward:  37
xxxxx
x agx
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:14, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   530.6451363    524.23175868]
New Q values:  [ -253.44886264 -1902.20915811   530.6451363    524.51595716]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1051.41084563 1045.60548164 -252.78192178]
------
Step:15, Action:South
State  105
Old Q Values:  [-180.6         970.30681681   65.14560537    0.        ]
New Q values:  [-180.6         915.02712203   65.14560537    0.        ]
Reward: 9  Episode Reward:  45
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 170.16500104    0.         1738.34798433 -178.98      ]
------
Step:16, Action:East
State  185
Old Q Values:  [ 170.16500104    0.         1738.34798433 -178.98      ]
New Q values:  [ 170.16500104    0.         1216.48806725 -178.98      ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.73916291e+03 -8.94356769e+03  2.00341972e+02]
------
Step:17, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  1.73916291e+03 -8.94356769e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  1.34197153e+03 -8.94356769e+03  2.00341972e+02]
Reward: -1  Episode Reward:  43
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 2156.35454229 1241.5296413 ]
------
Step:18, Action:East
State  273
Old Q Values:  [ 870.35122762 -168.92307549 2156.35454229 1241.5296413 ]
New Q values:  [ 870.35122762 -168.92307549 5710.87477764 1241.5296413 ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16163.10986907 -1609.84182322 -8192.20126966  3154.61144806]
------
Step:19, Action:West
State  288
Old Q Values:  [16163.10986907 -1609.84182322 -8192.20126966  3154.61144806]
New Q values:  [16163.10986907 -1609.84182322 -8192.20126966  2974.50701252]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 5710.87477764 1241.5296413 ]
------
Step:20, Action:East
State  273
Old Q Values:  [ 870.35122762 -168.92307549 5710.87477764 1241.5296413 ]
New Q values:  [ 870.35122762 -168.92307549 7132.68287178 1241.5296413 ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16163.10986907 -1609.84182322 -8192.20126966  2974.50701252]
------
Step:21, Action:West
State  288
Old Q Values:  [16163.10986907 -1609.84182322 -8192.20126966  2974.50701252]
New Q values:  [16163.10986907 -1609.84182322 -8192.20126966  3329.00766654]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 7132.68287178 1241.5296413 ]
------
Step:22, Action:West
State  273
Old Q Values:  [ 870.35122762 -168.92307549 7132.68287178 1241.5296413 ]
New Q values:  [  870.35122762  -168.92307549  7132.68287178 66714.40733264]
Reward: 100009  Episode Reward:  100048
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 -979.06556491  399.93689112 -180.6       ]
------
Step:1, Action:East
State  110
Old Q Values:  [-239.29051573 -979.06556491  399.93689112 -180.6       ]
New Q values:  [-239.29051573 -979.06556491  411.01964096 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   798.5781279    818.8162817 ]
------
Step:2, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1065.1249337     90.38596186]
New Q values:  [-9594.56523706 -8069.05606225  1065.1249337  -5683.74913053]
Reward: -10001  Episode Reward:  -9992
xxxxx
xg .x
x...x
x ..x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  430.69777215    26.73544252 -2339.75315766   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [  430.69777215    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  393.18605309    26.73544252 -2339.75315766   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  718.35648077   242.76375976 -4244.73627866   -30.99112081]
------
Step:2, Action:North
State  181
Old Q Values:  [  718.35648077   242.76375976 -4244.73627866   -30.99112081]
New Q values:  [ 1001.26397329   242.76375976 -4244.73627866   -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
xa..x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2361.73793661  353.8832415  -120.29354603]
------
Step:3, Action:South
State  103
Old Q Values:  [ 221.30610858 2815.5902276     5.4           0.        ]
New Q values:  [ 221.30610858 3034.87497201    5.4           0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  657.75889141 6364.12960322    0.        ]
------
Step:4, Action:East
State  181
Old Q Values:  [ 1001.26397329   242.76375976 -4244.73627866   -30.99112081]
New Q values:  [ 1001.26397329   242.76375976 -6470.63917272   -30.99112081]
Reward: -10001  Episode Reward:  -9984
xxxxx
x ..x
x g.x
x ..x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  393.18605309    26.73544252 -2339.75315766   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [  393.18605309    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  463.05361322    26.73544252 -2339.75315766   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1001.26397329   242.76375976 -6470.63917272   -30.99112081]
------
Step:2, Action:North
State  181
Old Q Values:  [ 1001.26397329   242.76375976 -6470.63917272   -30.99112081]
New Q values:  [ 1114.4269703    242.76375976 -6470.63917272   -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
xa..x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2361.73793661  353.8832415  -120.29354603]
------
Step:3, Action:South
State  103
Old Q Values:  [ 221.30610858 3034.87497201    5.4           0.        ]
New Q values:  [ 221.30610858 1547.67807989    5.4           0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1114.4269703    242.76375976 -6470.63917272   -30.99112081]
------
Step:4, Action:North
State  181
Old Q Values:  [ 1114.4269703    242.76375976 -6470.63917272   -30.99112081]
New Q values:  [ 1153.6921691    242.76375976 -6470.63917272   -30.99112081]
Reward: -1  Episode Reward:  16
xxxxx
xa..x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2361.73793661  353.8832415  -120.29354603]
------
Step:5, Action:South
State  110
Old Q Values:  [-239.29051573 -979.06556491  411.01964096 -180.6       ]
New Q values:  [ -239.29051573 -5356.3368626    411.01964096  -180.6       ]
Reward: -10001  Episode Reward:  -9985
xxxxx
x ..x
xg .x
x ..x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   798.5781279    818.8162817 ]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   798.5781279    818.8162817 ]
New Q values:  [ -281.736      -3455.78276043   798.5781279    456.23240497]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -5356.3368626    411.01964096  -180.6       ]
------
Step:2, Action:East
State  108
Old Q Values:  [-8463.16477134   935.65494908   839.22043435     0.        ]
New Q values:  [-8463.16477134   935.65494908   654.62565385     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xga.x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1065.1249337  -5683.74913053]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   798.5781279    456.23240497]
New Q values:  [ -281.736      -3455.78276043   507.37285318   456.23240497]
Reward: 9  Episode Reward:  17
xxxxx
x  ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  6.08472007e+02]
------
Step:4, Action:West
State  138
Old Q Values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  6.08472007e+02]
New Q values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  3.95000659e+02]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   507.37285318   456.23240497]
------
Step:5, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   530.6451363    524.51595716]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   524.51595716]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  3.95000659e+02]
------
Step:6, Action:West
State  138
Old Q Values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  3.95000659e+02]
New Q values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  3.09612119e+02]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   507.37285318   456.23240497]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   507.37285318   456.23240497]
New Q values:  [ -281.736      -3455.78276043   312.49936929   456.23240497]
Reward: -1  Episode Reward:  13
xxxxx
x  ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  3.09612119e+02]
------
Step:8, Action:North
State  136
Old Q Values:  [-1043.6707427   1424.32174525 -2383.80019164  -756.15728513]
New Q values:  [ -170.77177351  1424.32174525 -2383.80019164  -756.15728513]
Reward: -301  Episode Reward:  -288
xxxxx
xg ax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1424.32174525 -2383.80019164  -756.15728513]
------
Step:9, Action:South
State  138
Old Q Values:  [ 3.67167427e+02 -5.35865679e+03 -3.22965309e-01  3.09612119e+02]
New Q values:  [ 3.67167427e+02 -1.22683523e+03 -3.22965309e-01  3.09612119e+02]
Reward: 9  Episode Reward:  -279
xxxxx
x   x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  597.73638052   372.98523174 -8896.20691497  3037.42497381]
------
Step:10, Action:West
State  216
Old Q Values:  [  597.73638052   372.98523174 -8896.20691497  3037.42497381]
New Q values:  [  597.73638052   372.98523174 -8896.20691497  2318.350458  ]
Reward: 9  Episode Reward:  -270
xxxxx
xg  x
x.a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.65993489e+03 1.93809055e+03 2.91043938e+03]
------
Step:11, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.65993489e+03 1.93809055e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.31780712e+04 1.93809055e+03 2.91043938e+03]
Reward: 9  Episode Reward:  -261
xxxxx
x g x
x.  x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3087.15851428 72362.32410518]
------
Step:12, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  7132.68287178 66714.40733264]
New Q values:  [  870.35122762  -168.92307549  7132.68287178 32897.55840918]
Reward: -1  Episode Reward:  -262
xxxxx
x  gx
x.  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[20707.9849204   2256.66526474  4520.89517899  1875.31501677]
------
Step:13, Action:North
State  261
Old Q Values:  [  463.05361322    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1320.64549196    26.73544252 -2339.75315766   -35.88578819]
Reward: 9  Episode Reward:  -253
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[   9.84673294  881.83651799 3766.74682222  154.04646645]
------
Step:14, Action:East
State  189
Old Q Values:  [   9.84673294  881.83651799 3766.74682222  154.04646645]
New Q values:  [   9.84673294  881.83651799 1597.38224697  154.04646645]
Reward: -1  Episode Reward:  -254
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         304.2783936    0.         198.38683706]
------
Step:15, Action:South
State  204
Old Q Values:  [  0.         898.60459158 815.31186329 441.58769553]
New Q values:  [  0.         977.00865364 815.31186329 441.58769553]
Reward: -1  Episode Reward:  -255
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2060.55605671   694.25822892]
------
Step:16, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3087.15851428 72362.32410518]
New Q values:  [-2527.46239811 -8521.23367799 66089.19636643 72362.32410518]
Reward: 100009  Episode Reward:  99754
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1065.1249337  -5683.74913053]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   312.49936929   456.23240497]
New Q values:  [ -281.736      -3455.78276043   240.54997574   456.23240497]
Reward: 9  Episode Reward:  9
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.67167427e+02 -1.22683523e+03 -3.22965309e-01  3.09612119e+02]
------
Step:2, Action:North
State  138
Old Q Values:  [ 3.67167427e+02 -1.22683523e+03 -3.22965309e-01  3.09612119e+02]
New Q values:  [ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  3.09612119e+02]
Reward: -301  Episode Reward:  -292
xxxxx
x  ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  3.09612119e+02]
------
Step:3, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  3.09612119e+02]
New Q values:  [ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  2.80599635e+02]
Reward: -1  Episode Reward:  -293
xxxxx
x a x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211   524.51595716]
------
Step:4, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   524.51595716]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   917.72776385]
Reward: -1  Episode Reward:  -294
xxxxx
xa  x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2361.73793661  353.8832415  -120.29354603]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 2361.73793661  353.8832415  -120.29354603]
New Q values:  [-177.44732869 2859.33405561  353.8832415  -120.29354603]
Reward: 9  Episode Reward:  -285
xxxxx
x   x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  657.75889141 6364.12960322    0.        ]
------
Step:6, Action:East
State  183
Old Q Values:  [ 877.23516594  657.75889141 6364.12960322    0.        ]
New Q values:  [ 877.23516594  657.75889141 3124.04715595    0.        ]
Reward: 9  Episode Reward:  -276
xxxxx
x   x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[   3.60604218  928.63722898 1909.98438219    0.        ]
------
Step:7, Action:East
State  201
Old Q Values:  [ 2.33354578e+00  1.34197153e+03 -8.94356769e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  1.34197153e+03 -8.87652194e+03  2.00341972e+02]
Reward: -9991  Episode Reward:  -10267
xxxxx
x   x
x  gx
x...x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28929.32763438 31824.78269374 -4228.04879148  1770.25839846]
------
Step:1, Action:South
State  208
Old Q Values:  [28929.32763438 31824.78269374 -4228.04879148  1770.25839846]
New Q values:  [28929.32763438 17584.24603822 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16163.10986907 -1609.84182322 -8192.20126966  3329.00766654]
------
Step:2, Action:North
State  288
Old Q Values:  [16163.10986907 -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [ 9524.25751954 -1609.84182322 -8192.20126966  3329.00766654]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.01987119e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [1.01987119e+04 1.65470110e+03 1.38903186e+03 3.52184257e+00]
New Q values:  [4.16906465e+03 1.65470110e+03 1.38903186e+03 3.52184257e+00]
Reward: 9  Episode Reward:  17
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  2.80599635e+02]
------
Step:4, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  2.80599635e+02]
New Q values:  [ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  3.86958183e+02]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211   917.72776385]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   240.54997574   456.23240497]
New Q values:  [ -281.736      -3455.78276043   240.54997574   503.31621568]
Reward: 9  Episode Reward:  25
xxxxx
xa  x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1051.41084563 1045.60548164 -252.78192178]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869 2859.33405561  353.8832415  -120.29354603]
New Q values:  [-177.44732869 1495.24127298  353.8832415  -120.29354603]
Reward: 9  Episode Reward:  34
xxxxx
x   x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1153.6921691    242.76375976 -6470.63917272   -30.99112081]
------
Step:7, Action:North
State  181
Old Q Values:  [ 1153.6921691    242.76375976 -6470.63917272   -30.99112081]
New Q values:  [  625.22354793   242.76375976 -6470.63917272   -30.99112081]
Reward: -1  Episode Reward:  33
xxxxx
xa gx
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  547.82226763   -8.57207238 -180.6       ]
------
Step:8, Action:South
State  109
Old Q Values:  [-241.10880094  547.82226763   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  406.09597143   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x g x
xa. x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  625.22354793   242.76375976 -6470.63917272   -30.99112081]
------
Step:9, Action:North
State  185
Old Q Values:  [ 170.16500104    0.         1216.48806725 -178.98      ]
New Q values:  [ 382.88925411    0.         1216.48806725 -178.98      ]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1051.41084563 1045.60548164 -252.78192178]
------
Step:10, Action:South
State  111
Old Q Values:  [-177.44732869 1495.24127298  353.8832415  -120.29354603]
New Q values:  [-177.44732869  785.06357357  353.8832415  -120.29354603]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  625.22354793   242.76375976 -6470.63917272   -30.99112081]
------
Step:11, Action:North
State  181
Old Q Values:  [  625.22354793   242.76375976 -6470.63917272   -30.99112081]
New Q values:  [  371.3182106    242.76375976 -6470.63917272   -30.99112081]
Reward: -1  Episode Reward:  29
xxxxx
xa gx
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  406.09597143   -8.57207238 -180.6       ]
------
Step:12, Action:South
State  111
Old Q Values:  [-177.44732869  785.06357357  353.8832415  -120.29354603]
New Q values:  [-177.44732869  424.82089261  353.8832415  -120.29354603]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  371.3182106    242.76375976 -6470.63917272   -30.99112081]
------
Step:13, Action:North
State  185
Old Q Values:  [ 382.88925411    0.         1216.48806725 -178.98      ]
New Q values:  [ 467.97895533    0.         1216.48806725 -178.98      ]
Reward: -1  Episode Reward:  27
xxxxx
xa  x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1051.41084563 1045.60548164 -252.78192178]
------
Step:14, Action:South
State  109
Old Q Values:  [-241.10880094  406.09597143   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  273.23385175   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  26
xxxxx
x g x
xa. x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  371.3182106    242.76375976 -6470.63917272   -30.99112081]
------
Step:15, Action:North
State  180
Old Q Values:  [  164.99776392  3452.96454455  1631.08021511 -4966.32149798]
New Q values:  [-5653.90440971  3452.96454455  1631.08021511 -4966.32149798]
Reward: -10001  Episode Reward:  -9975
xxxxx
xg  x
x . x
x.. x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.31780712e+04 1.93809055e+03 2.91043938e+03]
------
Step:1, Action:South
State  196
Old Q Values:  [-2469.90645144   812.18792628  4092.85112914   239.04887894]
New Q values:  [-2469.90645144   948.44198752  4092.85112914   239.04887894]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2060.55605671   694.25822892]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 66089.19636643 72362.32410518]
New Q values:  [-2527.46239811 -8521.23367799 29298.35580243 72362.32410518]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9524.25751954 -1609.84182322 -8192.20126966  3329.00766654]
------
Step:3, Action:North
State  288
Old Q Values:  [ 9524.25751954 -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [12493.90129813 -1609.84182322 -8192.20126966  3329.00766654]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28929.32763438 17584.24603822 -4228.04879148  1770.25839846]
------
Step:4, Action:North
State  208
Old Q Values:  [28929.32763438 17584.24603822 -4228.04879148  1770.25839846]
New Q values:  [11693.21850869 17584.24603822 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  36
xxxxx
x .ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  3.86958183e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  3.86958183e+02]
New Q values:  [ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  3.11178138e+02]
Reward: 9  Episode Reward:  45
xxxxx
x a x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   240.54997574   503.31621568]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   240.54997574   503.31621568]
New Q values:  [ -281.736      -3455.78276043   240.54997574   516.14973996]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1051.41084563 1045.60548164 -252.78192178]
------
Step:7, Action:South
State  105
Old Q Values:  [-180.6         915.02712203   65.14560537    0.        ]
New Q values:  [-180.6         736.35726899   65.14560537    0.        ]
Reward: 9  Episode Reward:  53
xxxxx
x g x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 467.97895533    0.         1216.48806725 -178.98      ]
------
Step:8, Action:East
State  185
Old Q Values:  [ 467.97895533    0.         1216.48806725 -178.98      ]
New Q values:  [  467.97895533     0.         -4487.81919727  -178.98      ]
Reward: -10001  Episode Reward:  -9948
xxxxx
x   x
x g x
x.  x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12493.90129813 -1609.84182322 -8192.20126966  3329.00766654]
------
Step:1, Action:North
State  288
Old Q Values:  [12493.90129813 -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [10278.23433072 -1609.84182322 -8192.20126966  3329.00766654]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11693.21850869 17584.24603822 -4228.04879148  1770.25839846]
------
Step:2, Action:South
State  210
Old Q Values:  [4.16906465e+03 1.65470110e+03 1.38903186e+03 3.52184257e+00]
New Q values:  [4.16906465e+03 3.74475074e+03 1.38903186e+03 3.52184257e+00]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10278.23433072 -1609.84182322 -8192.20126966  3329.00766654]
------
Step:3, Action:North
State  288
Old Q Values:  [10278.23433072 -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [ 5361.41312819 -1609.84182322 -8192.20126966  3329.00766654]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.16906465e+03 3.74475074e+03 1.38903186e+03 3.52184257e+00]
------
Step:4, Action:North
State  208
Old Q Values:  [11693.21850869 17584.24603822 -4228.04879148  1770.25839846]
New Q values:  [24854.39820436 17584.24603822 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  16
xxxxx
x..ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[59346.86261746 13483.01009663  -180.00807518 67239.03600293]
------
Step:5, Action:West
State  136
Old Q Values:  [ -170.77177351  1424.32174525 -2383.80019164  -756.15728513]
New Q values:  [ -170.77177351  1424.32174525 -2383.80019164    22.47456606]
Reward: 9  Episode Reward:  25
xxxxx
xga x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1065.1249337  -5683.74913053]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   240.54997574   516.14973996]
New Q values:  [ -281.736      -3455.78276043   188.97343168   516.14973996]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  3.11178138e+02]
------
Step:7, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  3.11178138e+02]
New Q values:  [ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  2.78716177e+02]
Reward: -1  Episode Reward:  23
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   188.97343168   516.14973996]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   188.97343168   516.14973996]
New Q values:  [ -281.736      -3455.78276043   188.97343168   305.09363572]
Reward: 9  Episode Reward:  32
xxxxx
xa  x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   310.77913245  -180.6       ]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558 1051.41084563 1045.60548164 -252.78192178]
New Q values:  [-252.35169558 1051.41084563  509.17028337 -252.78192178]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   188.97343168   305.09363572]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   188.97343168   305.09363572]
New Q values:  [ -281.736      -3455.78276043   188.97343168   436.86070798]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1051.41084563  509.17028337 -252.78192178]
------
Step:11, Action:South
State  107
Old Q Values:  [-252.35169558 1051.41084563  509.17028337 -252.78192178]
New Q values:  [-252.35169558 1363.17848504  509.17028337 -252.78192178]
Reward: 9  Episode Reward:  39
xxxxx
x   x
xa. x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  657.75889141 3124.04715595    0.        ]
------
Step:12, Action:East
State  187
Old Q Values:  [135.20477233   0.         589.29977683   0.        ]
New Q values:  [135.20477233   0.         630.55947601   0.        ]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458  1298.13188427     0.        ]
------
Step:13, Action:East
State  200
Old Q Values:  [  62.8218634  3420.61858611 1239.78307745  408.67479662]
New Q values:  [  62.8218634  3420.61858611 1190.81836838  408.67479662]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  597.73638052   372.98523174 -8896.20691497  2318.350458  ]
------
Step:14, Action:North
State  216
Old Q Values:  [  597.73638052   372.98523174 -8896.20691497  2318.350458  ]
New Q values:  [  665.79107578   372.98523174 -8896.20691497  2318.350458  ]
Reward: -1  Episode Reward:  46
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1424.32174525 -2383.80019164    22.47456606]
------
Step:15, Action:South
State  138
Old Q Values:  [ 7.64171987e+01 -1.22683523e+03 -3.22965309e-01  2.78716177e+02]
New Q values:  [ 76.41719872 204.1710471   -0.32296531 278.71617717]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  665.79107578   372.98523174 -8896.20691497  2318.350458  ]
------
Step:16, Action:North
State  218
Old Q Values:  [538.57982869 848.96225083   0.         429.03841886]
New Q values:  [298.44678463 848.96225083   0.         429.03841886]
Reward: -1  Episode Reward:  44
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 76.41719872 204.1710471   -0.32296531 278.71617717]
------
Step:17, Action:West
State  138
Old Q Values:  [ 76.41719872 204.1710471   -0.32296531 278.71617717]
New Q values:  [ 76.41719872 204.1710471   -0.32296531 241.94468326]
Reward: -1  Episode Reward:  43
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   188.97343168   436.86070798]
------
Step:18, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   917.72776385]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   775.44465105]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1363.17848504  509.17028337 -252.78192178]
------
Step:19, Action:South
State  107
Old Q Values:  [-252.35169558 1363.17848504  509.17028337 -252.78192178]
New Q values:  [-252.35169558  685.06508062  509.17028337 -252.78192178]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  467.97895533     0.         -4487.81919727  -178.98      ]
------
Step:20, Action:North
State  185
Old Q Values:  [  467.97895533     0.         -4487.81919727  -178.98      ]
New Q values:  [  392.11110632     0.         -4487.81919727  -178.98      ]
Reward: -1  Episode Reward:  40
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  685.06508062  509.17028337 -252.78192178]
------
Step:21, Action:South
State  107
Old Q Values:  [-252.35169558  685.06508062  509.17028337 -252.78192178]
New Q values:  [-252.35169558  391.05936414  509.17028337 -252.78192178]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  392.11110632     0.         -4487.81919727  -178.98      ]
------
Step:22, Action:North
State  187
Old Q Values:  [135.20477233   0.         630.55947601   0.        ]
New Q values:  [206.23299394   0.         630.55947601   0.        ]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  391.05936414  509.17028337 -252.78192178]
------
Step:23, Action:East
State  98
Old Q Values:  [    0.         41136.94667617 88413.72124065     0.        ]
New Q values:  [    0.         41136.94667617 75133.57650128     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:24, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   188.97343168   436.86070798]
New Q values:  [ -281.736      -3455.78276043   188.97343168   267.37802293]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   310.77913245  -180.6       ]
------
Step:25, Action:East
State  107
Old Q Values:  [-252.35169558  391.05936414  509.17028337 -252.78192178]
New Q values:  [-252.35169558  391.05936414  283.28152023 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   188.97343168   267.37802293]
------
Step:26, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1065.1249337  -5683.74913053]
New Q values:  [-9594.56523706 -8069.05606225  1065.1249337  -2053.19247152]
Reward: -1  Episode Reward:  34
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         736.35726899   65.14560537    0.        ]
------
Step:27, Action:South
State  107
Old Q Values:  [-252.35169558  391.05936414  283.28152023 -252.78192178]
New Q values:  [-252.35169558  273.45707755  283.28152023 -252.78192178]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  392.11110632     0.         -4487.81919727  -178.98      ]
------
Step:28, Action:North
State  184
Old Q Values:  [-2081.09028721     0.         14061.4268019      0.        ]
New Q values:  [ -739.80237515     0.         14061.4268019      0.        ]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   310.77913245  -180.6       ]
------
Step:29, Action:East
State  98
Old Q Values:  [    0.         41136.94667617 75133.57650128     0.        ]
New Q values:  [    0.         41136.94667617 69821.51860553     0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:30, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   188.97343168   267.37802293]
New Q values:  [ -281.736      -3455.78276043   188.97343168   191.33566524]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  273.45707755  283.28152023 -252.78192178]
------
Step:31, Action:East
State  107
Old Q Values:  [-252.35169558  273.45707755  283.28152023 -252.78192178]
New Q values:  [-252.35169558  273.45707755  170.11330766 -252.78192178]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   188.97343168   191.33566524]
------
Step:32, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   775.44465105]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   391.61498369]
Reward: -1  Episode Reward:  28
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  273.45707755  170.11330766 -252.78192178]
------
Step:33, Action:South
State  105
Old Q Values:  [-180.6         736.35726899   65.14560537    0.        ]
New Q values:  [-180.6         411.57623949   65.14560537    0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  392.11110632     0.         -4487.81919727  -178.98      ]
------
Step:34, Action:North
State  185
Old Q Values:  [  392.11110632     0.         -4487.81919727  -178.98      ]
New Q values:  [  279.71731437     0.         -4487.81919727  -178.98      ]
Reward: -1  Episode Reward:  26
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         411.57623949   65.14560537    0.        ]
------
Step:35, Action:South
State  105
Old Q Values:  [-180.6         411.57623949   65.14560537    0.        ]
New Q values:  [-180.6         247.94569011   65.14560537    0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  279.71731437     0.         -4487.81919727  -178.98      ]
------
Step:36, Action:North
State  185
Old Q Values:  [  279.71731437     0.         -4487.81919727  -178.98      ]
New Q values:  [  193.32404902     0.         -4487.81919727  -178.98      ]
Reward: -1  Episode Reward:  24
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  273.45707755  170.11330766 -252.78192178]
------
Step:37, Action:South
State  105
Old Q Values:  [-180.6         247.94569011   65.14560537    0.        ]
New Q values:  [-180.6         156.57549075   65.14560537    0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  193.32404902     0.         -4487.81919727  -178.98      ]
------
Step:38, Action:North
State  185
Old Q Values:  [  193.32404902     0.         -4487.81919727  -178.98      ]
New Q values:  [  123.70226683     0.         -4487.81919727  -178.98      ]
Reward: -1  Episode Reward:  22
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         156.57549075   65.14560537    0.        ]
------
Step:39, Action:South
State  104
Old Q Values:  [-8652.84        6020.66738588  1078.33520689 -8652.84      ]
New Q values:  [-8652.84        6626.09499492  1078.33520689 -8652.84      ]
Reward: -1  Episode Reward:  21
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[ -739.80237515     0.         14061.4268019      0.        ]
------
Step:40, Action:East
State  185
Old Q Values:  [  123.70226683     0.         -4487.81919727  -178.98      ]
New Q values:  [ 123.70226683    0.         -769.54210308 -178.98      ]
Reward: -1  Episode Reward:  20
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  3420.61858611 1190.81836838  408.67479662]
------
Step:41, Action:South
State  200
Old Q Values:  [  62.8218634  3420.61858611 1190.81836838  408.67479662]
New Q values:  [   62.8218634  23076.344666    1190.81836838   408.67479662]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 29298.35580243 72362.32410518]
------
Step:42, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  7132.68287178 32897.55840918]
New Q values:  [  870.35122762  -168.92307549  7132.68287178 79376.81883979]
Reward: 100009  Episode Reward:  100028
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  424.82089261  353.8832415  -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 1547.67807989    5.4           0.        ]
New Q values:  [ 221.30610858 1561.68537874    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa .x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  657.75889141 3124.04715595    0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [  371.3182106    242.76375976 -6470.63917272   -30.99112081]
New Q values:  [  371.3182106    242.76375976 -7361.00033035   -30.99112081]
Reward: -10001  Episode Reward:  -9992
xxxxx
x ..x
x g.x
x...x
xxxxx
Episode # 700
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.71500576e+03  9.34205402e+03  1.20371620e+03]
------
Step:1, Action:South
State  194
Old Q Values:  [-6.00000000e-01  9.71500576e+03  9.34205402e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.96000995e+04  9.34205402e+03  1.20371620e+03]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. .x
x g.x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211   391.61498369]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   188.97343168   191.33566524]
New Q values:  [ -281.736      -3455.78276043   188.97343168   163.97138936]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  273.45707755  170.11330766 -252.78192178]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  424.82089261  353.8832415  -120.29354603]
New Q values:  [-177.44732869  286.72382022  353.8832415  -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xag.x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  371.3182106    242.76375976 -7361.00033035   -30.99112081]
------
Step:3, Action:North
State  181
Old Q Values:  [  371.3182106    242.76375976 -7361.00033035   -30.99112081]
New Q values:  [  254.09225669   242.76375976 -7361.00033035   -30.99112081]
Reward: -1  Episode Reward:  17
xxxxx
xa .x
x .gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  286.72382022  353.8832415  -120.29354603]
------
Step:4, Action:East
State  111
Old Q Values:  [-177.44732869  286.72382022  353.8832415  -120.29354603]
New Q values:  [-177.44732869  286.72382022  258.43779171 -120.29354603]
Reward: -1  Episode Reward:  16
xxxxx
x a.x
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211   391.61498369]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   391.61498369]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   242.06313954]
Reward: -1  Episode Reward:  15
xxxxx
xa .x
x .gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  286.72382022  258.43779171 -120.29354603]
------
Step:6, Action:South
State  109
Old Q Values:  [-241.10880094  273.23385175   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  184.92121771   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x  gx
xa..x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  254.09225669   242.76375976 -7361.00033035   -30.99112081]
------
Step:7, Action:North
State  181
Old Q Values:  [  254.09225669   242.76375976 -7361.00033035   -30.99112081]
New Q values:  [  156.51326799   242.76375976 -7361.00033035   -30.99112081]
Reward: -1  Episode Reward:  13
xxxxx
xag.x
x ..x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  184.92121771   -8.57207238 -180.6       ]
------
Step:8, Action:South
State  108
Old Q Values:  [-8463.16477134   935.65494908   654.62565385     0.        ]
New Q values:  [-8463.16477134  1409.551343     654.62565385     0.        ]
Reward: -1  Episode Reward:  12
xxxxx
xg .x
xa..x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5653.90440971  3452.96454455  1631.08021511 -4966.32149798]
------
Step:9, Action:South
State  180
Old Q Values:  [-5653.90440971  3452.96454455  1631.08021511 -4966.32149798]
New Q values:  [-5653.90440971  2724.76892955  1631.08021511 -4966.32149798]
Reward: 9  Episode Reward:  21
xxxxx
x  .x
xg..x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2095.4154154  -2735.46306511  4460.61037243 -2601.74710518]
------
Step:10, Action:East
State  260
Old Q Values:  [ 2095.4154154  -2735.46306511  4460.61037243 -2601.74710518]
New Q values:  [ 2095.4154154  -2735.46306511  2407.81096598 -2601.74710518]
Reward: 9  Episode Reward:  30
xxxxx
xg .x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2060.55605671   694.25822892]
------
Step:11, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2060.55605671   694.25822892]
New Q values:  [   16.82637525 -5807.06396197  2432.04636114   694.25822892]
Reward: -1  Episode Reward:  29
xxxxx
x g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5361.41312819 -1609.84182322 -8192.20126966  3329.00766654]
------
Step:12, Action:North
State  288
Old Q Values:  [ 5361.41312819 -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [ 9606.28471258 -1609.84182322 -8192.20126966  3329.00766654]
Reward: 9  Episode Reward:  38
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24854.39820436 17584.24603822 -4228.04879148  1770.25839846]
------
Step:13, Action:North
State  208
Old Q Values:  [24854.39820436 17584.24603822 -4228.04879148  1770.25839846]
New Q values:  [10019.74268672 17584.24603822 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  47
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 76.41719872 204.1710471   -0.32296531 241.94468326]
------
Step:14, Action:West
State  136
Old Q Values:  [ -170.77177351  1424.32174525 -2383.80019164    22.47456606]
New Q values:  [ -170.77177351  1424.32174525 -2383.80019164   327.92730653]
Reward: -1  Episode Reward:  46
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1065.1249337  -2053.19247152]
------
Step:15, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   188.97343168   163.97138936]
New Q values:  [ -281.736      -3455.78276043   147.57277765   163.97138936]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 76.41719872 204.1710471   -0.32296531 241.94468326]
------
Step:16, Action:West
State  136
Old Q Values:  [ -170.77177351  1424.32174525 -2383.80019164   327.92730653]
New Q values:  [ -170.77177351  1424.32174525 -2383.80019164   450.10840272]
Reward: -1  Episode Reward:  44
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1065.1249337  -2053.19247152]
------
Step:17, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   147.57277765   163.97138936]
New Q values:  [ -281.736      -3455.78276043   131.01251604   163.97138936]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 76.41719872 204.1710471   -0.32296531 241.94468326]
------
Step:18, Action:West
State  130
Old Q Values:  [59346.86261746 13483.01009663  -180.00807518 67239.03600293]
New Q values:  [59346.86261746 13483.01009663  -180.00807518 66663.70240619]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:19, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   242.06313954]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   182.24240188]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  286.72382022  258.43779171 -120.29354603]
------
Step:20, Action:South
State  111
Old Q Values:  [-177.44732869  286.72382022  258.43779171 -120.29354603]
New Q values:  [-177.44732869 1051.30367487  258.43779171 -120.29354603]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  657.75889141 3124.04715595    0.        ]
------
Step:21, Action:East
State  179
Old Q Values:  [    0.         17203.58890013 19166.43576748     0.        ]
New Q values:  [    0.         17203.58890013 73552.00416784     0.        ]
Reward: 100009  Episode Reward:  100049
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10019.74268672 17584.24603822 -4228.04879148  1770.25839846]
------
Step:1, Action:South
State  208
Old Q Values:  [10019.74268672 17584.24603822 -4228.04879148  1770.25839846]
New Q values:  [10019.74268672  9920.98382906 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9606.28471258 -1609.84182322 -8192.20126966  3329.00766654]
------
Step:2, Action:North
State  288
Old Q Values:  [ 9606.28471258 -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [ 6847.83669105 -1609.84182322 -8192.20126966  3329.00766654]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10019.74268672  9920.98382906 -4228.04879148  1770.25839846]
------
Step:3, Action:North
State  208
Old Q Values:  [10019.74268672  9920.98382906 -4228.04879148  1770.25839846]
New Q values:  [ 4440.59359826  9920.98382906 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  17
xxxxx
x.gax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1424.32174525 -2383.80019164   450.10840272]
------
Step:4, Action:South
State  136
Old Q Values:  [ -170.77177351  1424.32174525 -2383.80019164   450.10840272]
New Q values:  [ -170.77177351  3545.42384682 -2383.80019164   450.10840272]
Reward: -1  Episode Reward:  16
xxxxx
xg. x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4440.59359826  9920.98382906 -4228.04879148  1770.25839846]
------
Step:5, Action:South
State  208
Old Q Values:  [ 4440.59359826  9920.98382906 -4228.04879148  1770.25839846]
New Q values:  [ 4440.59359826  6022.14453894 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6847.83669105 -1609.84182322 -8192.20126966  3329.00766654]
------
Step:6, Action:North
State  288
Old Q Values:  [ 6847.83669105 -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [ 4545.1780381  -1609.84182322 -8192.20126966  3329.00766654]
Reward: -1  Episode Reward:  14
xxxxx
xg. x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4440.59359826  6022.14453894 -4228.04879148  1770.25839846]
------
Step:7, Action:South
State  208
Old Q Values:  [ 4440.59359826  6022.14453894 -4228.04879148  1770.25839846]
New Q values:  [ 4440.59359826  3771.81122701 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4545.1780381  -1609.84182322 -8192.20126966  3329.00766654]
------
Step:8, Action:North
State  288
Old Q Values:  [ 4545.1780381  -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [ 3068.19061115 -1609.84182322 -8192.20126966  3329.00766654]
Reward: -1  Episode Reward:  12
xxxxx
x.. x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.16906465e+03 3.74475074e+03 1.38903186e+03 3.52184257e+00]
------
Step:9, Action:North
State  208
Old Q Values:  [ 4440.59359826  3771.81122701 -4228.04879148  1770.25839846]
New Q values:  [21774.74816116  3771.81122701 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  11
xxxxx
x..ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[59346.86261746 13483.01009663  -180.00807518 66663.70240619]
------
Step:10, Action:West
State  138
Old Q Values:  [ 76.41719872 204.1710471   -0.32296531 241.94468326]
New Q values:  [ 76.41719872 204.1710471   -0.32296531 151.36929011]
Reward: 9  Episode Reward:  20
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   131.01251604   163.97138936]
------
Step:11, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   131.01251604   163.97138936]
New Q values:  [ -281.736      -3455.78276043   131.01251604   153.02567901]
Reward: 9  Episode Reward:  29
xxxxx
xa  x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  273.45707755  170.11330766 -252.78192178]
------
Step:12, Action:South
State  107
Old Q Values:  [-252.35169558  273.45707755  170.11330766 -252.78192178]
New Q values:  [-252.35169558  303.95067382  170.11330766 -252.78192178]
Reward: 9  Episode Reward:  38
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[206.23299394   0.         630.55947601   0.        ]
------
Step:13, Action:East
State  185
Old Q Values:  [ 123.70226683    0.         -769.54210308 -178.98      ]
New Q values:  [ 123.70226683    0.           94.17461698 -178.98      ]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.34197153e+03 -8.87652194e+03  2.00341972e+02]
------
Step:14, Action:South
State  203
Old Q Values:  [   3.60604218  928.63722898 1909.98438219    0.        ]
New Q values:  [3.60604218e+00 2.41899005e+04 1.90998438e+03 0.00000000e+00]
Reward: 9  Episode Reward:  46
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  7132.68287178 79376.81883979]
------
Step:15, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  7132.68287178 79376.81883979]
New Q values:  [  870.35122762  -168.92307549  7132.68287178 97968.52301204]
Reward: 100009  Episode Reward:  100055
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.31780712e+04 1.93809055e+03 2.91043938e+03]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.31780712e+04 1.93809055e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 3.09853257e+04 1.93809055e+03 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 29298.35580243 72362.32410518]
------
Step:2, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  7132.68287178 97968.52301204]
New Q values:  [  870.35122762  -168.92307549  7132.68287178 39589.0028524 ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1320.64549196    26.73544252 -2339.75315766   -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [ 1320.64549196    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1464.87234357    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  657.75889141 3124.04715595    0.        ]
------
Step:4, Action:East
State  181
Old Q Values:  [  156.51326799   242.76375976 -7361.00033035   -30.99112081]
New Q values:  [  156.51326799   242.76375976 -1850.83010197   -30.99112081]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3647.23343388 -5545.76189056  1099.96026581]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.09853257e+04 1.93809055e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.31231442e+04 1.93809055e+03 2.91043938e+03]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2432.04636114   694.25822892]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 29298.35580243 72362.32410518]
New Q values:  [-2527.46239811 -8521.23367799 12723.44462094 72362.32410518]
Reward: 9  Episode Reward:  24
xxxxx
x...x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3068.19061115 -1609.84182322 -8192.20126966  3329.00766654]
------
Step:7, Action:North
State  288
Old Q Values:  [ 3068.19061115 -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [ 7765.10069281 -1609.84182322 -8192.20126966  3329.00766654]
Reward: 9  Episode Reward:  33
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21774.74816116  3771.81122701 -4228.04879148  1770.25839846]
------
Step:8, Action:North
State  208
Old Q Values:  [21774.74816116  3771.81122701 -4228.04879148  1770.25839846]
New Q values:  [28714.40998632  3771.81122701 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  42
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[59346.86261746 13483.01009663  -180.00807518 66663.70240619]
------
Step:9, Action:West
State  130
Old Q Values:  [59346.86261746 13483.01009663  -180.00807518 66663.70240619]
New Q values:  [59346.86261746 13483.01009663  -180.00807518 66439.5689675 ]
Reward: 9  Episode Reward:  51
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132562.29335007]
------
Step:10, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   32083.9571164  132562.29335007]
New Q values:  [  -180.6          3557.6642036   32083.9571164  132740.45624872]
Reward: 100009  Episode Reward:  100060
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  156.51326799   242.76375976 -1850.83010197   -30.99112081]
------
Step:1, Action:South
State  181
Old Q Values:  [  156.51326799   242.76375976 -1850.83010197   -30.99112081]
New Q values:  [  156.51326799   541.96720697 -1850.83010197   -30.99112081]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1464.87234357    26.73544252 -2339.75315766   -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [ 1464.87234357    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  747.93909952    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  156.51326799   541.96720697 -1850.83010197   -30.99112081]
------
Step:3, Action:South
State  181
Old Q Values:  [  156.51326799   541.96720697 -1850.83010197   -30.99112081]
New Q values:  [  156.51326799   440.56861264 -1850.83010197   -30.99112081]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  747.93909952    26.73544252 -2339.75315766   -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [  747.93909952    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  430.7462236     26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  156.51326799   440.56861264 -1850.83010197   -30.99112081]
------
Step:5, Action:South
State  181
Old Q Values:  [  156.51326799   440.56861264 -1850.83010197   -30.99112081]
New Q values:  [  156.51326799   304.85131214 -1850.83010197   -30.99112081]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  430.7462236     26.73544252 -2339.75315766   -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [  430.7462236     26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1108.91263622    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  657.75889141 3124.04715595    0.        ]
------
Step:7, Action:East
State  189
Old Q Values:  [   9.84673294  881.83651799 1597.38224697  154.04646645]
New Q values:  [   9.84673294  881.83651799 1738.52292895  154.04646645]
Reward: 9  Episode Reward:  13
xxxxx
x.. x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3647.23343388 -5545.76189056  1099.96026581]
------
Step:8, Action:South
State  195
Old Q Values:  [   38.85388605  2639.47551855 14622.6780724   1101.59744825]
New Q values:  [   38.85388605 12937.89106314 14622.6780724   1101.59744825]
Reward: 9  Episode Reward:  22
xxxxx
x.. x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  7132.68287178 39589.0028524 ]
------
Step:9, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  7132.68287178 39589.0028524 ]
New Q values:  [  870.35122762  -168.92307549  7132.68287178 16167.67493183]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1108.91263622    26.73544252 -2339.75315766   -35.88578819]
------
Step:10, Action:North
State  261
Old Q Values:  [ 1108.91263622    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  534.42044813    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  156.51326799   304.85131214 -1850.83010197   -30.99112081]
------
Step:11, Action:South
State  180
Old Q Values:  [-5653.90440971  2724.76892955  1631.08021511 -4966.32149798]
New Q values:  [-5653.90440971  1811.65086161  1631.08021511 -4966.32149798]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2095.4154154  -2735.46306511  2407.81096598 -2601.74710518]
------
Step:12, Action:East
State  260
Old Q Values:  [ 2095.4154154  -2735.46306511  2407.81096598 -2601.74710518]
New Q values:  [ 2095.4154154  -2735.46306511 22671.22161795 -2601.74710518]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 12723.44462094 72362.32410518]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 12723.44462094 72362.32410518]
New Q values:  [-2527.46239811 -8521.23367799  7424.30805622 72362.32410518]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7765.10069281 -1609.84182322 -8192.20126966  3329.00766654]
------
Step:14, Action:North
State  288
Old Q Values:  [ 7765.10069281 -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [ 4362.15967303 -1609.84182322 -8192.20126966  3329.00766654]
Reward: 9  Episode Reward:  36
xxxxx
x.. x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.16906465e+03 3.74475074e+03 1.38903186e+03 3.52184257e+00]
------
Step:15, Action:North
State  208
Old Q Values:  [28714.40998632  3771.81122701 -4228.04879148  1770.25839846]
New Q values:  [31417.03468478  3771.81122701 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  35
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[59346.86261746 13483.01009663  -180.00807518 66439.5689675 ]
------
Step:16, Action:West
State  128
Old Q Values:  [ 8775.70846068  6806.00740836 -8652.84       15717.21796752]
New Q values:  [ 8775.70846068  6806.00740836 -8652.84        7146.42050625]
Reward: 9  Episode Reward:  44
xxxxx
x.agx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.          2847.11106414 -5999.38454759     0.        ]
------
Step:17, Action:South
State  115
Old Q Values:  [-1.80600000e+02  5.97898639e+01  6.47656656e+00  1.32644343e+05]
New Q values:  [-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.32644343e+05]
Reward: -1  Episode Reward:  43
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3647.23343388 -5545.76189056  1099.96026581]
------
Step:18, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.31231442e+04 1.93809055e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.69573549e+04 1.93809055e+03 2.91043938e+03]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7424.30805622 72362.32410518]
------
Step:19, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7424.30805622 72362.32410518]
New Q values:  [-2527.46239811 -8521.23367799  7424.30805622 35156.72511819]
Reward: -1  Episode Reward:  41
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[20707.9849204   2256.66526474  4520.89517899  1875.31501677]
------
Step:20, Action:North
State  256
Old Q Values:  [48225.76268973 15557.34758455  5576.40109469   644.94785455]
New Q values:  [42235.38895895 15557.34758455  5576.40109469   644.94785455]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NW
[76485.61294353  1621.55095326 45408.88473875     0.        ]
------
Step:21, Action:East
State  177
Old Q Values:  [78394.48547832 22487.97851491  4693.41412922     0.        ]
New Q values:  [78394.48547832 22487.97851491  9963.97212411     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.69573549e+04 1.93809055e+03 2.91043938e+03]
------
Step:22, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.69573549e+04 1.93809055e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.13293595e+04 1.93809055e+03 2.91043938e+03]
Reward: -1  Episode Reward:  38
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7424.30805622 35156.72511819]
------
Step:23, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7424.30805622 35156.72511819]
New Q values:  [-2527.46239811 -8521.23367799  7424.30805622 20274.4855234 ]
Reward: -1  Episode Reward:  37
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[20707.9849204   2256.66526474  4520.89517899  1875.31501677]
------
Step:24, Action:North
State  257
Old Q Values:  [20707.9849204   2256.66526474  4520.89517899  1875.31501677]
New Q values:  [31800.93961166  2256.66526474  4520.89517899  1875.31501677]
Reward: -1  Episode Reward:  36
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[78394.48547832 22487.97851491  9963.97212411     0.        ]
------
Step:25, Action:North
State  177
Old Q Values:  [78394.48547832 22487.97851491  9963.97212411     0.        ]
New Q values:  [111073.33310002  22487.97851491   9963.97212411      0.        ]
Reward: 100009  Episode Reward:  100045
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  156.51326799   304.85131214 -1850.83010197   -30.99112081]
------
Step:1, Action:South
State  181
Old Q Values:  [  156.51326799   304.85131214 -1850.83010197   -30.99112081]
New Q values:  [  156.51326799   287.66665929 -1850.83010197   -30.99112081]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x  .x
xa..x
xxxxx
Step:2, Action:North
State  260
Old Q Values:  [ 2095.4154154  -2735.46306511 22671.22161795 -2601.74710518]
New Q values:  [ 1381.06142465 -2735.46306511 22671.22161795 -2601.74710518]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5653.90440971  1811.65086161  1631.08021511 -4966.32149798]
------
Step:3, Action:South
State  181
Old Q Values:  [  156.51326799   287.66665929 -1850.83010197   -30.99112081]
New Q values:  [  156.51326799   274.79279816 -1850.83010197   -30.99112081]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  534.42044813    26.73544252 -2339.75315766   -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [  534.42044813    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  295.6060187     26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  156.51326799   274.79279816 -1850.83010197   -30.99112081]
------
Step:5, Action:South
State  183
Old Q Values:  [ 877.23516594  657.75889141 3124.04715595    0.        ]
New Q values:  [ 877.23516594  351.18536217 3124.04715595    0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  295.6060187     26.73544252 -2339.75315766   -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [  295.6060187     26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  200.08024693    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  4
xxxxx
x...x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  156.51326799   274.79279816 -1850.83010197   -30.99112081]
------
Step:7, Action:South
State  183
Old Q Values:  [ 877.23516594  351.18536217 3124.04715595    0.        ]
New Q values:  [ 877.23516594  199.89821895 3124.04715595    0.        ]
Reward: -1  Episode Reward:  3
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  200.08024693    26.73544252 -2339.75315766   -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [  200.08024693    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1016.64624556    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  2
xxxxx
x...x
xa .x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  199.89821895 3124.04715595    0.        ]
------
Step:9, Action:East
State  181
Old Q Values:  [  156.51326799   274.79279816 -1850.83010197   -30.99112081]
New Q values:  [156.51326799 274.79279816 353.23798938 -30.99112081]
Reward: -1  Episode Reward:  1
xxxxx
x...x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3647.23343388 -5545.76189056  1099.96026581]
------
Step:10, Action:South
State  195
Old Q Values:  [   38.85388605 12937.89106314 14622.6780724   1101.59744825]
New Q values:  [   38.85388605 10030.8589048  14622.6780724   1101.59744825]
Reward: 9  Episode Reward:  10
xxxxx
x...x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  7132.68287178 16167.67493183]
------
Step:11, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  7132.68287178 16167.67493183]
New Q values:  [ 870.35122762 -168.92307549 7132.68287178 6771.4638464 ]
Reward: -1  Episode Reward:  9
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1016.64624556    26.73544252 -2339.75315766   -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [ 1016.64624556    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1343.27264501    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  199.89821895 3124.04715595    0.        ]
------
Step:13, Action:East
State  181
Old Q Values:  [156.51326799 274.79279816 353.23798938 -30.99112081]
New Q values:  [ 156.51326799  274.79279816 1234.86522592  -30.99112081]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3647.23343388 -5545.76189056  1099.96026581]
------
Step:14, Action:South
State  195
Old Q Values:  [   38.85388605 10030.8589048  14622.6780724   1101.59744825]
New Q values:  [   38.85388605  6151.54842346 14622.6780724   1101.59744825]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 7132.68287178 6771.4638464 ]
------
Step:15, Action:West
State  273
Old Q Values:  [ 870.35122762 -168.92307549 7132.68287178 6771.4638464 ]
New Q values:  [ 870.35122762 -168.92307549 7132.68287178 3110.96733206]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1343.27264501    26.73544252 -2339.75315766   -35.88578819]
------
Step:16, Action:North
State  261
Old Q Values:  [ 1343.27264501    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  907.16862578    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  4
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 156.51326799  274.79279816 1234.86522592  -30.99112081]
------
Step:17, Action:East
State  181
Old Q Values:  [ 156.51326799  274.79279816 1234.86522592  -30.99112081]
New Q values:  [ 156.51326799  274.79279816 1721.20142911  -30.99112081]
Reward: -1  Episode Reward:  3
xxxxx
x.g.x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   948.44198752  4092.85112914   239.04887894]
------
Step:18, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.13293595e+04 1.93809055e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.13293595e+04 1.02057466e+04 2.91043938e+03]
Reward: 9  Episode Reward:  12
xxxxx
x...x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31417.03468478  3771.81122701 -4228.04879148  1770.25839846]
------
Step:19, Action:North
State  208
Old Q Values:  [31417.03468478  3771.81122701 -4228.04879148  1770.25839846]
New Q values:  [15204.92641211  3771.81122701 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  21
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068  6806.00740836 -8652.84        7146.42050625]
------
Step:20, Action:North
State  130
Old Q Values:  [59346.86261746 13483.01009663  -180.00807518 66439.5689675 ]
New Q values:  [43490.01573723 13483.01009663  -180.00807518 66439.5689675 ]
Reward: -301  Episode Reward:  -280
xxxxx
x..ax
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[43490.01573723 13483.01009663  -180.00807518 66439.5689675 ]
------
Step:21, Action:West
State  128
Old Q Values:  [ 8775.70846068  6806.00740836 -8652.84        7146.42050625]
New Q values:  [ 8775.70846068  6806.00740836 -8652.84       26817.51177882]
Reward: -9991  Episode Reward:  -10271
xxxxx
x.g x
x   x
x  .x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.13293595e+04 1.02057466e+04 2.91043938e+03]
------
Step:1, Action:South
State  196
Old Q Values:  [-2469.90645144   948.44198752  4092.85112914   239.04887894]
New Q values:  [-2469.90645144  1114.39070335  4092.85112914   239.04887894]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2432.04636114   694.25822892]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7424.30805622 20274.4855234 ]
New Q values:  [-2527.46239811 -8521.23367799  4283.7711244  20274.4855234 ]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4362.15967303 -1609.84182322 -8192.20126966  3329.00766654]
------
Step:3, Action:North
State  288
Old Q Values:  [ 4362.15967303 -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [ 6311.74179285 -1609.84182322 -8192.20126966  3329.00766654]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15204.92641211  3771.81122701 -4228.04879148  1770.25839846]
------
Step:4, Action:North
State  208
Old Q Values:  [15204.92641211  3771.81122701 -4228.04879148  1770.25839846]
New Q values:  [26019.2412551   3771.81122701 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  36
xxxxx
x. ax
xg  x
x.  x
xxxxx
Step:5, Action:North
State  130
Old Q Values:  [43490.01573723 13483.01009663  -180.00807518 66439.5689675 ]
New Q values:  [37147.27698514 13483.01009663  -180.00807518 66439.5689675 ]
Reward: -301  Episode Reward:  -265
xxxxx
x. ax
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[37147.27698514 13483.01009663  -180.00807518 66439.5689675 ]
------
Step:6, Action:West
State  130
Old Q Values:  [37147.27698514 13483.01009663  -180.00807518 66439.5689675 ]
New Q values:  [37147.27698514 13483.01009663  -180.00807518 66397.36446162]
Reward: -1  Episode Reward:  -266
xxxxx
x.a x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132740.45624872]
------
Step:7, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1065.1249337  -2053.19247152]
New Q values:  [-9594.56523706 -8069.05606225  1065.1249337  -4828.04849013]
Reward: -9991  Episode Reward:  -10257
xxxxx
xg  x
x.  x
x.  x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26019.2412551   3771.81122701 -4228.04879148  1770.25839846]
------
Step:1, Action:North
State  216
Old Q Values:  [  665.79107578   372.98523174 -8896.20691497  2318.350458  ]
New Q values:  [  332.96774444   372.98523174 -8896.20691497  2318.350458  ]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 76.41719872 204.1710471   -0.32296531 151.36929011]
------
Step:2, Action:South
State  138
Old Q Values:  [ 76.41719872 204.1710471   -0.32296531 151.36929011]
New Q values:  [ 7.64171987e+01  1.33178781e+03 -3.22965309e-01  1.51369290e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4.16906465e+03 3.74475074e+03 1.38903186e+03 3.52184257e+00]
------
Step:3, Action:North
State  210
Old Q Values:  [4.16906465e+03 3.74475074e+03 1.38903186e+03 3.52184257e+00]
New Q values:  [2.06656221e+03 3.74475074e+03 1.38903186e+03 3.52184257e+00]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.33178781e+03 -3.22965309e-01  1.51369290e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.33178781e+03 -3.22965309e-01  1.51369290e+02]
New Q values:  [ 7.64171987e+01  1.65554035e+03 -3.22965309e-01  1.51369290e+02]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
x..ax
x.g.x
xxxxx
Step:5, Action:East
State  210
Old Q Values:  [2.06656221e+03 3.74475074e+03 1.38903186e+03 3.52184257e+00]
New Q values:  [2.06656221e+03 3.74475074e+03 1.49843797e+03 3.52184257e+00]
Reward: -301  Episode Reward:  -295
xxxxx
x.  x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2.06656221e+03 3.74475074e+03 1.49843797e+03 3.52184257e+00]
------
Step:6, Action:North
State  210
Old Q Values:  [2.06656221e+03 3.74475074e+03 1.49843797e+03 3.52184257e+00]
New Q values:  [1.32268699e+03 3.74475074e+03 1.49843797e+03 3.52184257e+00]
Reward: -1  Episode Reward:  -296
xxxxx
x. ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.65554035e+03 -3.22965309e-01  1.51369290e+02]
------
Step:7, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.65554035e+03 -3.22965309e-01  1.51369290e+02]
New Q values:  [ 7.64171987e+01  1.78504136e+03 -3.22965309e-01  1.51369290e+02]
Reward: -1  Episode Reward:  -297
xxxxx
x.  x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.32268699e+03 3.74475074e+03 1.49843797e+03 3.52184257e+00]
------
Step:8, Action:East
State  210
Old Q Values:  [1.32268699e+03 3.74475074e+03 1.49843797e+03 3.52184257e+00]
New Q values:  [1.32268699e+03 3.74475074e+03 1.54220041e+03 3.52184257e+00]
Reward: -301  Episode Reward:  -598
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1.32268699e+03 3.74475074e+03 1.54220041e+03 3.52184257e+00]
------
Step:9, Action:South
State  210
Old Q Values:  [1.32268699e+03 3.74475074e+03 1.54220041e+03 3.52184257e+00]
New Q values:  [ 1322.68698646 -2603.17716687  1542.2004081      3.52184257]
Reward: -9991  Episode Reward:  -10589
xxxxx
x.  x
x.. x
x..gx
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26019.2412551   3771.81122701 -4228.04879148  1770.25839846]
------
Step:1, Action:North
State  216
Old Q Values:  [  332.96774444   372.98523174 -8896.20691497  2318.350458  ]
New Q values:  [ 1202.21425182   372.98523174 -8896.20691497  2318.350458  ]
Reward: 9  Episode Reward:  9
xxxxx
xg ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3545.42384682 -2383.80019164   450.10840272]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.78504136e+03 -3.22965309e-01  1.51369290e+02]
New Q values:  [ 7.64171987e+01  1.40892168e+03 -3.22965309e-01  1.51369290e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1202.21425182   372.98523174 -8896.20691497  2318.350458  ]
------
Step:3, Action:West
State  210
Old Q Values:  [ 1322.68698646 -2603.17716687  1542.2004081      3.52184257]
New Q values:  [ 1322.68698646 -2603.17716687  1542.2004081   5886.83859788]
Reward: 9  Episode Reward:  17
xxxxx
x.  x
x.a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  9.34205402e+03  1.20371620e+03]
------
Step:4, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.13293595e+04 1.02057466e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.46194895e+04 1.02057466e+04 2.91043938e+03]
Reward: 9  Episode Reward:  26
xxxxx
x.  x
xg  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4283.7711244  20274.4855234 ]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4283.7711244  20274.4855234 ]
New Q values:  [-2527.46239811 -8521.23367799  4283.7711244  20785.81089704]
Reward: 9  Episode Reward:  35
xxxxx
xg  x
x.  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[42235.38895895 15557.34758455  5576.40109469   644.94785455]
------
Step:6, Action:North
State  257
Old Q Values:  [31800.93961166  2256.66526474  4520.89517899  1875.31501677]
New Q values:  [46047.77577467  2256.66526474  4520.89517899  1875.31501677]
Reward: 9  Episode Reward:  44
xxxxx
x.g x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[111073.33310002  22487.97851491   9963.97212411      0.        ]
------
Step:7, Action:North
State  189
Old Q Values:  [   9.84673294  881.83651799 1738.52292895  154.04646645]
New Q values:  [  64.81505849  881.83651799 1738.52292895  154.04646645]
Reward: 9  Episode Reward:  53
xxxxx
xa gx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  184.92121771   -8.57207238 -180.6       ]
------
Step:8, Action:South
State  109
Old Q Values:  [-241.10880094  184.92121771   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  594.92536577   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  52
xxxxx
x g x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849  881.83651799 1738.52292895  154.04646645]
------
Step:9, Action:East
State  189
Old Q Values:  [  64.81505849  881.83651799 1738.52292895  154.04646645]
New Q values:  [ 64.81505849 881.83651799 786.09268966 154.04646645]
Reward: -1  Episode Reward:  51
xxxxx
x  gx
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         304.2783936    0.         198.38683706]
------
Step:10, Action:South
State  205
Old Q Values:  [  0.         304.2783936    0.         198.38683706]
New Q values:  [  0.         406.24297738   0.         198.38683706]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         -29.77444073 950.43873314]
------
Step:11, Action:West
State  277
Old Q Values:  [  1.64433      0.         -29.77444073 950.43873314]
New Q values:  [  1.64433      0.         -29.77444073 651.72608099]
Reward: -1  Episode Reward:  49
xxxxx
x  gx
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  907.16862578    26.73544252 -2339.75315766   -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [  907.16862578    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  626.81840571    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 64.81505849 881.83651799 786.09268966 154.04646645]
------
Step:13, Action:South
State  189
Old Q Values:  [ 64.81505849 881.83651799 786.09268966 154.04646645]
New Q values:  [ 64.81505849 540.18012891 786.09268966 154.04646645]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  626.81840571    26.73544252 -2339.75315766   -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [  626.81840571    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  445.22499745    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 206.84750324 650.32545056   0.        ]
------
Step:15, Action:East
State  190
Old Q Values:  [ 1.04129094e+00 -5.70379540e+03  2.01712456e+01  0.00000000e+00]
New Q values:  [ 1.04129094e+00 -5.70379540e+03  2.66795469e+02  0.00000000e+00]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         135.48456638 864.42323496   0.        ]
------
Step:16, Action:East
State  204
Old Q Values:  [  0.         977.00865364 815.31186329 441.58769553]
New Q values:  [   0.          977.00865364 1021.02988271  441.58769553]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xg ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1202.21425182   372.98523174 -8896.20691497  2318.350458  ]
------
Step:17, Action:West
State  218
Old Q Values:  [298.44678463 848.96225083   0.         429.03841886]
New Q values:  [298.44678463 848.96225083   0.         430.34233803]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         135.48456638 864.42323496   0.        ]
------
Step:18, Action:East
State  206
Old Q Values:  [  0.         135.48456638 864.42323496   0.        ]
New Q values:  [  0.         135.48456638 599.85796923   0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[298.44678463 848.96225083   0.         430.34233803]
------
Step:19, Action:South
State  208
Old Q Values:  [26019.2412551   3771.81122701 -4228.04879148  1770.25839846]
New Q values:  [26019.2412551  63407.64702866 -4228.04879148  1770.25839846]
Reward: 100009  Episode Reward:  100051
xxxxx
x   x
x g x
x  ax
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.40892168e+03 -3.22965309e-01  1.51369290e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.40892168e+03 -3.22965309e-01  1.51369290e+02]
New Q values:  [ 7.64171987e+01  1.35912628e+04 -3.22965309e-01  1.51369290e+02]
Reward: -9991  Episode Reward:  -9991
xxxxx
x.. x
x..gx
x.. x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26019.2412551  63407.64702866 -4228.04879148  1770.25839846]
------
Step:1, Action:South
State  208
Old Q Values:  [26019.2412551  63407.64702866 -4228.04879148  1770.25839846]
New Q values:  [26019.2412551  27261.98134932 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6311.74179285 -1609.84182322 -8192.20126966  3329.00766654]
------
Step:2, Action:North
State  288
Old Q Values:  [ 6311.74179285 -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [10702.69112193 -1609.84182322 -8192.20126966  3329.00766654]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26019.2412551  27261.98134932 -4228.04879148  1770.25839846]
------
Step:3, Action:South
State  208
Old Q Values:  [26019.2412551  27261.98134932 -4228.04879148  1770.25839846]
New Q values:  [26019.2412551  14114.99987631 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  7
xxxxx
xg..x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10702.69112193 -1609.84182322 -8192.20126966  3329.00766654]
------
Step:4, Action:North
State  288
Old Q Values:  [10702.69112193 -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [12086.2488253  -1609.84182322 -8192.20126966  3329.00766654]
Reward: -1  Episode Reward:  6
xxxxx
x.g.x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26019.2412551  14114.99987631 -4228.04879148  1770.25839846]
------
Step:5, Action:North
State  208
Old Q Values:  [26019.2412551  14114.99987631 -4228.04879148  1770.25839846]
New Q values:  [30332.30584052 14114.99987631 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  15
xxxxx
x..ax
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[37147.27698514 13483.01009663  -180.00807518 66397.36446162]
------
Step:6, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  1.35912628e+04 -3.22965309e-01  1.51369290e+02]
New Q values:  [ 7.64171987e+01  1.35912628e+04 -3.22965309e-01  1.11855420e+02]
Reward: 9  Episode Reward:  24
xxxxx
x.a x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   131.01251604   153.02567901]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   131.01251604   153.02567901]
New Q values:  [ -281.736      -3455.78276043   131.01251604   157.79547375]
Reward: 9  Episode Reward:  33
xxxxx
xa  x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  303.95067382  170.11330766 -252.78192178]
------
Step:8, Action:South
State  110
Old Q Values:  [ -239.29051573 -5356.3368626    411.01964096  -180.6       ]
New Q values:  [ -239.29051573 -7599.63948656   411.01964096  -180.6       ]
Reward: -10001  Episode Reward:  -9968
xxxxx
x   x
xg. x
x.. x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  9.34205402e+03  1.20371620e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.46194895e+04 1.02057466e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.46194895e+04 1.31873904e+04 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30332.30584052 14114.99987631 -4228.04879148  1770.25839846]
------
Step:2, Action:North
State  208
Old Q Values:  [30332.30584052 14114.99987631 -4228.04879148  1770.25839846]
New Q values:  [13201.94949026 14114.99987631 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  18
xxxxx
x.gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3545.42384682 -2383.80019164   450.10840272]
------
Step:3, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.35912628e+04 -3.22965309e-01  1.11855420e+02]
New Q values:  [ 7.64171987e+01  9.67040508e+03 -3.22965309e-01  1.11855420e+02]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13201.94949026 14114.99987631 -4228.04879148  1770.25839846]
------
Step:4, Action:South
State  208
Old Q Values:  [13201.94949026 14114.99987631 -4228.04879148  1770.25839846]
New Q values:  [13201.94949026  9277.27459811 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  26
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12086.2488253  -1609.84182322 -8192.20126966  3329.00766654]
------
Step:5, Action:North
State  288
Old Q Values:  [12086.2488253  -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [ 8794.4843772  -1609.84182322 -8192.20126966  3329.00766654]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13201.94949026  9277.27459811 -4228.04879148  1770.25839846]
------
Step:6, Action:North
State  208
Old Q Values:  [13201.94949026  9277.27459811 -4228.04879148  1770.25839846]
New Q values:  [25199.38913459  9277.27459811 -4228.04879148  1770.25839846]
Reward: -1  Episode Reward:  24
xxxxx
x..ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[37147.27698514 13483.01009663  -180.00807518 66397.36446162]
------
Step:7, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  9.67040508e+03 -3.22965309e-01  1.11855420e+02]
New Q values:  [ 7.64171987e+01  9.67040508e+03 -3.22965309e-01  9.74808100e+01]
Reward: 9  Episode Reward:  33
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   131.01251604   157.79547375]
------
Step:8, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1065.1249337  -4828.04849013]
New Q values:  [-9594.56523706 -8069.05606225  1065.1249337  -1878.84674883]
Reward: 9  Episode Reward:  42
xxxxx
xag x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         156.57549075   65.14560537    0.        ]
------
Step:9, Action:South
State  105
Old Q Values:  [-180.6         156.57549075   65.14560537    0.        ]
New Q values:  [-180.6         105.14087635   65.14560537    0.        ]
Reward: 9  Episode Reward:  51
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 123.70226683    0.           94.17461698 -178.98      ]
------
Step:10, Action:North
State  185
Old Q Values:  [ 123.70226683    0.           94.17461698 -178.98      ]
New Q values:  [  80.42316964    0.           94.17461698 -178.98      ]
Reward: -1  Episode Reward:  50
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         105.14087635   65.14560537    0.        ]
------
Step:11, Action:South
State  105
Old Q Values:  [-180.6         105.14087635   65.14560537    0.        ]
New Q values:  [-180.6          69.70873563   65.14560537    0.        ]
Reward: -1  Episode Reward:  49
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  80.42316964    0.           94.17461698 -178.98      ]
------
Step:12, Action:East
State  185
Old Q Values:  [  80.42316964    0.           94.17461698 -178.98      ]
New Q values:  [  80.42316964    0.         6959.97324659 -178.98      ]
Reward: -1  Episode Reward:  48
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  23076.344666    1190.81836838   408.67479662]
------
Step:13, Action:South
State  200
Old Q Values:  [   62.8218634  23076.344666    1190.81836838   408.67479662]
New Q values:  [   62.8218634  15465.68113551  1190.81836838   408.67479662]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4283.7711244  20785.81089704]
------
Step:14, Action:West
State  273
Old Q Values:  [ 870.35122762 -168.92307549 7132.68287178 3110.96733206]
New Q values:  [  870.35122762  -168.92307549  7132.68287178 75064.11966523]
Reward: 100009  Episode Reward:  100056
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -7599.63948656   411.01964096  -180.6       ]
------
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869 1051.30367487  258.43779171 -120.29354603]
New Q values:  [-177.44732869 1051.30367487  156.11375881 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   131.01251604   157.79547375]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   131.01251604   157.79547375]
New Q values:  [ -281.736      -3455.78276043   131.01251604   377.90929196]
Reward: -1  Episode Reward:  8
xxxxx
xa .x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1051.30367487  156.11375881 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 1051.30367487  156.11375881 -120.29354603]
New Q values:  [-177.44732869  942.28189868  156.11375881 -120.29354603]
Reward: 9  Episode Reward:  17
xxxxx
x  .x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 156.51326799  274.79279816 1721.20142911  -30.99112081]
------
Step:4, Action:East
State  181
Old Q Values:  [ 156.51326799  274.79279816 1721.20142911  -30.99112081]
New Q values:  [  156.51326799   274.79279816 -4078.26408962   -30.99112081]
Reward: -9991  Episode Reward:  -9974
xxxxx
x  .x
x g.x
x ..x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:1, Action:East
State  180
Old Q Values:  [-5653.90440971  1811.65086161  1631.08021511 -4966.32149798]
New Q values:  [-5653.90440971  1811.65086161  5043.67892299 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.46194895e+04 1.31873904e+04 2.91043938e+03]
------
Step:2, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.46194895e+04 1.31873904e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 6.58280969e+03 1.31873904e+04 2.91043938e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2432.04636114   694.25822892]
------
Step:3, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549  7132.68287178 75064.11966523]
New Q values:  [  870.35122762  -168.92307549  5496.81846187 75064.11966523]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8794.4843772  -1609.84182322 -8192.20126966  3329.00766654]
------
Step:4, Action:West
State  288
Old Q Values:  [ 8794.4843772  -1609.84182322 -8192.20126966  3329.00766654]
New Q values:  [ 8794.4843772  -1609.84182322 -8192.20126966 23850.23896618]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  5496.81846187 75064.11966523]
------
Step:5, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  5496.81846187 75064.11966523]
New Q values:  [  870.35122762  -168.92307549  5496.81846187 30158.61536532]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  445.22499745    26.73544252 -2339.75315766   -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [  445.22499745    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1114.70414576    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  199.89821895 3124.04715595    0.        ]
------
Step:7, Action:East
State  181
Old Q Values:  [  156.51326799   274.79279816 -4078.26408962   -30.99112081]
New Q values:  [ 156.51326799  274.79279816 -537.73560568  -30.99112081]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3647.23343388 -5545.76189056  1099.96026581]
------
Step:8, Action:South
State  195
Old Q Values:  [   38.85388605  6151.54842346 14622.6780724   1101.59744825]
New Q values:  [   38.85388605 11507.60397898 14622.6780724   1101.59744825]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  5496.81846187 30158.61536532]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4283.7711244  20785.81089704]
New Q values:  [-2527.46239811 -8521.23367799  4283.7711244   8648.13560255]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1114.70414576    26.73544252 -2339.75315766   -35.88578819]
------
Step:10, Action:North
State  261
Old Q Values:  [ 1114.70414576    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1382.49580509    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  20
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  199.89821895 3124.04715595    0.        ]
------
Step:11, Action:East
State  181
Old Q Values:  [ 156.51326799  274.79279816 -537.73560568  -30.99112081]
New Q values:  [156.51326799 274.79279816 878.47578789 -30.99112081]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3647.23343388 -5545.76189056  1099.96026581]
------
Step:12, Action:South
State  195
Old Q Values:  [   38.85388605 11507.60397898 14622.6780724   1101.59744825]
New Q values:  [   38.85388605 13650.02620119 14622.6780724   1101.59744825]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  5496.81846187 30158.61536532]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4283.7711244   8648.13560255]
New Q values:  [-2527.46239811 -8521.23367799  4283.7711244   3873.40298255]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1382.49580509    26.73544252 -2339.75315766   -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [ 1382.49580509    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 1489.61246882    26.73544252 -2339.75315766   -35.88578819]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  199.89821895 3124.04715595    0.        ]
------
Step:15, Action:East
State  183
Old Q Values:  [ 877.23516594  199.89821895 3124.04715595    0.        ]
New Q values:  [ 877.23516594  199.89821895 7129.04872323    0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  9.34205402e+03  1.20371620e+03]
------
Step:16, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.58280969e+03 1.31873904e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 6.58280969e+03 1.28401729e+04 2.91043938e+03]
Reward: 9  Episode Reward:  24
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25199.38913459  9277.27459811 -4228.04879148  1770.25839846]
------
Step:17, Action:North
State  208
Old Q Values:  [25199.38913459  9277.27459811 -4228.04879148  1770.25839846]
New Q values:  [18130.40918748  9277.27459811 -4228.04879148  1770.25839846]
Reward: 9  Episode Reward:  33
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068  6806.00740836 -8652.84       26817.51177882]
------
Step:18, Action:North
State  130
Old Q Values:  [37147.27698514 13483.01009663  -180.00807518 66397.36446162]
New Q values:  [34597.52013254 13483.01009663  -180.00807518 66397.36446162]
Reward: -301  Episode Reward:  -268
xxxxx
x..ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254 13483.01009663  -180.00807518 66397.36446162]
------
Step:19, Action:West
State  130
Old Q Values:  [34597.52013254 13483.01009663  -180.00807518 66397.36446162]
New Q values:  [34597.52013254 13483.01009663  -180.00807518 66357.64859707]
Reward: 9  Episode Reward:  -259
xxxxx
x.a x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.32644343e+05]
------
Step:20, Action:West
State  113
Old Q Values:  [    0.          2847.11106414 -5999.38454759     0.        ]
New Q values:  [    0.          2847.11106414 -5999.38454759 71011.70496116]
Reward: 100009  Episode Reward:  99750
xxxxx
xa gx
x   x
x   x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -7599.63948656   411.01964096  -180.6       ]
------
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869  942.28189868  156.11375881 -120.29354603]
New Q values:  [-177.44732869  942.28189868  181.21829111 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   131.01251604   377.90929196]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   182.24240188]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   354.98153036]
Reward: -1  Episode Reward:  8
xxxxx
xa .x
x...x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  942.28189868  181.21829111 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869  942.28189868  181.21829111 -120.29354603]
New Q values:  [-177.44732869 2521.02737644  181.21829111 -120.29354603]
Reward: 9  Episode Reward:  17
xxxxx
x  .x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  199.89821895 7129.04872323    0.        ]
------
Step:4, Action:East
State  181
Old Q Values:  [156.51326799 274.79279816 878.47578789 -30.99112081]
New Q values:  [  156.51326799   274.79279816 -4415.3543461    -30.99112081]
Reward: -9991  Episode Reward:  -9974
xxxxx
x  .x
x g.x
x ..x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.58280969e+03 1.28401729e+04 2.91043938e+03]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.96000995e+04  9.34205402e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.96000995e+04  5.50827319e+03  1.20371620e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1322.68698646 -2603.17716687  1542.2004081   5886.83859788]
------
Step:2, Action:West
State  208
Old Q Values:  [18130.40918748  9277.27459811 -4228.04879148  1770.25839846]
New Q values:  [18130.40918748  9277.27459811 -4228.04879148  4559.55522984]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.58280969e+03 1.28401729e+04 2.91043938e+03]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.96000995e+04  5.50827319e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.96000995e+04  3.96876085e+03  1.20371620e+03]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1322.68698646 -2603.17716687  1542.2004081   5886.83859788]
------
Step:4, Action:West
State  208
Old Q Values:  [18130.40918748  9277.27459811 -4228.04879148  4559.55522984]
New Q values:  [18130.40918748  9277.27459811 -4228.04879148  5675.27396239]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.58280969e+03 1.28401729e+04 2.91043938e+03]
------
Step:5, Action:East
State  200
Old Q Values:  [   62.8218634  15465.68113551  1190.81836838   408.67479662]
New Q values:  [   62.8218634  15465.68113551  1171.23248475   408.67479662]
Reward: -1  Episode Reward:  5
xxxxx
xg..x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1202.21425182   372.98523174 -8896.20691497  2318.350458  ]
------
Step:6, Action:West
State  208
Old Q Values:  [18130.40918748  9277.27459811 -4228.04879148  5675.27396239]
New Q values:  [18130.40918748  9277.27459811 -4228.04879148  6121.56145541]
Reward: -1  Episode Reward:  4
xxxxx
x...x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.58280969e+03 1.28401729e+04 2.91043938e+03]
------
Step:7, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.96000995e+04  3.96876085e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.96000995e+04  3.35295592e+03  1.20371620e+03]
Reward: -1  Episode Reward:  3
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1322.68698646 -2603.17716687  1542.2004081   5886.83859788]
------
Step:8, Action:West
State  208
Old Q Values:  [18130.40918748  9277.27459811 -4228.04879148  6121.56145541]
New Q values:  [18130.40918748  9277.27459811 -4228.04879148  6300.07645262]
Reward: -1  Episode Reward:  2
xxxxx
x...x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.58280969e+03 1.28401729e+04 2.91043938e+03]
------
Step:9, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.96000995e+04  3.35295592e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.96000995e+04  3.10663395e+03  1.20371620e+03]
Reward: -1  Episode Reward:  1
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1322.68698646 -2603.17716687  1542.2004081   5886.83859788]
------
Step:10, Action:West
State  208
Old Q Values:  [18130.40918748  9277.27459811 -4228.04879148  6300.07645262]
New Q values:  [18130.40918748  9277.27459811 -4228.04879148  6371.4824515 ]
Reward: -1  Episode Reward:  0
xxxxx
x...x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.58280969e+03 1.28401729e+04 2.91043938e+03]
------
Step:11, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.96000995e+04  3.10663395e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.96000995e+04  3.00810516e+03  1.20371620e+03]
Reward: -1  Episode Reward:  -1
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1322.68698646 -2603.17716687  1542.2004081   5886.83859788]
------
Step:12, Action:West
State  208
Old Q Values:  [18130.40918748  9277.27459811 -4228.04879148  6371.4824515 ]
New Q values:  [18130.40918748  9277.27459811 -4228.04879148  6400.04485106]
Reward: -1  Episode Reward:  -2
xxxxx
x...x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.58280969e+03 1.28401729e+04 2.91043938e+03]
------
Step:13, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.96000995e+04  3.00810516e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.96000995e+04  2.96869364e+03  1.20371620e+03]
Reward: -1  Episode Reward:  -3
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1322.68698646 -2603.17716687  1542.2004081   5886.83859788]
------
Step:14, Action:West
State  210
Old Q Values:  [ 1322.68698646 -2603.17716687  1542.2004081   5886.83859788]
New Q values:  [ 1322.68698646 -2603.17716687  1542.2004081   8234.1653    ]
Reward: -1  Episode Reward:  -4
xxxxx
x...x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  2.96869364e+03  1.20371620e+03]
------
Step:15, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.96000995e+04  2.96869364e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.96000995e+04  3.65712705e+03  1.20371620e+03]
Reward: -1  Episode Reward:  -5
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1322.68698646 -2603.17716687  1542.2004081   8234.1653    ]
------
Step:16, Action:West
State  210
Old Q Values:  [ 1322.68698646 -2603.17716687  1542.2004081   8234.1653    ]
New Q values:  [ 1322.68698646 -2603.17716687  1542.2004081   9173.09598085]
Reward: -1  Episode Reward:  -6
xxxxx
x...x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  3.65712705e+03  1.20371620e+03]
------
Step:17, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.96000995e+04  3.65712705e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.96000995e+04  4.21417961e+03  1.20371620e+03]
Reward: -1  Episode Reward:  -7
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1322.68698646 -2603.17716687  1542.2004081   9173.09598085]
------
Step:18, Action:West
State  210
Old Q Values:  [ 1322.68698646 -2603.17716687  1542.2004081   9173.09598085]
New Q values:  [ 1322.68698646 -2603.17716687  1542.2004081   9548.66825319]
Reward: -1  Episode Reward:  -8
xxxxx
x...x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  4.21417961e+03  1.20371620e+03]
------
Step:19, Action:East
State  195
Old Q Values:  [   38.85388605 13650.02620119 14622.6780724   1101.59744825]
New Q values:  [   38.85388605 13650.02620119  8713.07170492  1101.59744825]
Reward: -1  Episode Reward:  -9
xxxxx
x...x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1322.68698646 -2603.17716687  1542.2004081   9548.66825319]
------
Step:20, Action:West
State  210
Old Q Values:  [ 1322.68698646 -2603.17716687  1542.2004081   9548.66825319]
New Q values:  [ 1322.68698646 -2603.17716687  1542.2004081   9698.89716213]
Reward: -1  Episode Reward:  -10
xxxxx
x...x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  4.21417961e+03  1.20371620e+03]
------
Step:21, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.58280969e+03 1.28401729e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 6.58280969e+03 1.05745919e+04 2.91043938e+03]
Reward: -1  Episode Reward:  -11
xxxxx
x...x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18130.40918748  9277.27459811 -4228.04879148  6400.04485106]
------
Step:22, Action:North
State  216
Old Q Values:  [ 1202.21425182   372.98523174 -8896.20691497  2318.350458  ]
New Q values:  [ 3387.40722334   372.98523174 -8896.20691497  2318.350458  ]
Reward: 9  Episode Reward:  -2
xxxxx
x..ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  9.67040508e+03 -3.22965309e-01  9.74808100e+01]
------
Step:23, Action:South
State  130
Old Q Values:  [34597.52013254 13483.01009663  -180.00807518 66357.64859707]
New Q values:  [34597.52013254  8302.27318729  -180.00807518 66357.64859707]
Reward: -1  Episode Reward:  -3
xxxxx
x.. x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1322.68698646 -2603.17716687  1542.2004081   9698.89716213]
------
Step:24, Action:West
State  210
Old Q Values:  [ 1322.68698646 -2603.17716687  1542.2004081   9698.89716213]
New Q values:  [ 1322.68698646 -2603.17716687  1542.2004081   9758.9887257 ]
Reward: -1  Episode Reward:  -4
xxxxx
x.. x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  4.21417961e+03  1.20371620e+03]
------
Step:25, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.58280969e+03 1.05745919e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 6.58280969e+03 5.24545893e+03 2.91043938e+03]
Reward: -1  Episode Reward:  -5
xxxxx
x.. x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3387.40722334   372.98523174 -8896.20691497  2318.350458  ]
------
Step:26, Action:North
State  216
Old Q Values:  [ 3387.40722334   372.98523174 -8896.20691497  2318.350458  ]
New Q values:  [ 2417.99004338   372.98523174 -8896.20691497  2318.350458  ]
Reward: -1  Episode Reward:  -6
xxxxx
x.gax
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3545.42384682 -2383.80019164   450.10840272]
------
Step:27, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  9.67040508e+03 -3.22965309e-01  9.74808100e+01]
New Q values:  [ 7.64171987e+01  4.59295904e+03 -3.22965309e-01  9.74808100e+01]
Reward: -1  Episode Reward:  -7
xxxxx
x.. x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2417.99004338   372.98523174 -8896.20691497  2318.350458  ]
------
Step:28, Action:North
State  210
Old Q Values:  [ 1322.68698646 -2603.17716687  1542.2004081   9758.9887257 ]
New Q values:  [20435.7693737  -2603.17716687  1542.2004081   9758.9887257 ]
Reward: -1  Episode Reward:  -8
xxxxx
x..ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254  8302.27318729  -180.00807518 66357.64859707]
------
Step:29, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  4.59295904e+03 -3.22965309e-01  9.74808100e+01]
New Q values:  [ 7.64171987e+01  4.59295904e+03 -3.22965309e-01  1.57765112e+02]
Reward: 9  Episode Reward:  1
xxxxx
x.a x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   131.01251604   377.90929196]
------
Step:30, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   354.98153036]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   903.70082508]
Reward: 9  Episode Reward:  10
xxxxx
xa  x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2521.02737644  181.21829111 -120.29354603]
------
Step:31, Action:South
State  107
Old Q Values:  [-252.35169558  303.95067382  170.11330766 -252.78192178]
New Q values:  [-252.35169558  310.14811233  170.11330766 -252.78192178]
Reward: -1  Episode Reward:  9
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[206.23299394   0.         630.55947601   0.        ]
------
Step:32, Action:East
State  187
Old Q Values:  [206.23299394   0.         630.55947601   0.        ]
New Q values:  [206.23299394   0.         641.06335568   0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x   x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5884.35407458  1298.13188427     0.        ]
------
Step:33, Action:East
State  202
Old Q Values:  [    0.         -5884.35407458  1298.13188427     0.        ]
New Q values:  [    0.         -5884.35407458   773.34142896     0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x   x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[298.44678463 848.96225083   0.         430.34233803]
------
Step:34, Action:South
State  218
Old Q Values:  [298.44678463 848.96225083   0.         430.34233803]
New Q values:  [ 298.44678463 7500.05659019    0.          430.34233803]
Reward: 9  Episode Reward:  16
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8794.4843772  -1609.84182322 -8192.20126966 23850.23896618]
------
Step:35, Action:North
State  288
Old Q Values:  [ 8794.4843772  -1609.84182322 -8192.20126966 23850.23896618]
New Q values:  [ 5767.21072794 -1609.84182322 -8192.20126966 23850.23896618]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 298.44678463 7500.05659019    0.          430.34233803]
------
Step:36, Action:South
State  218
Old Q Values:  [ 298.44678463 7500.05659019    0.          430.34233803]
New Q values:  [  298.44678463 10154.49432593     0.           430.34233803]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5767.21072794 -1609.84182322 -8192.20126966 23850.23896618]
------
Step:37, Action:North
State  288
Old Q Values:  [ 5767.21072794 -1609.84182322 -8192.20126966 23850.23896618]
New Q values:  [ 5352.63258895 -1609.84182322 -8192.20126966 23850.23896618]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  298.44678463 10154.49432593     0.           430.34233803]
------
Step:38, Action:South
State  218
Old Q Values:  [  298.44678463 10154.49432593     0.           430.34233803]
New Q values:  [  298.44678463 11216.26942023     0.           430.34233803]
Reward: -1  Episode Reward:  12
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5352.63258895 -1609.84182322 -8192.20126966 23850.23896618]
------
Step:39, Action:North
State  288
Old Q Values:  [ 5352.63258895 -1609.84182322 -8192.20126966 23850.23896618]
New Q values:  [ 2865.8500486  -1609.84182322 -8192.20126966 23850.23896618]
Reward: -1  Episode Reward:  11
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2417.99004338   372.98523174 -8896.20691497  2318.350458  ]
------
Step:40, Action:North
State  216
Old Q Values:  [ 2417.99004338   372.98523174 -8896.20691497  2318.350458  ]
New Q values:  [ 2344.4837303    372.98523174 -8896.20691497  2318.350458  ]
Reward: -1  Episode Reward:  10
xxxxx
x  ax
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.59295904e+03 -3.22965309e-01  1.57765112e+02]
------
Step:41, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  4.59295904e+03 -3.22965309e-01  1.57765112e+02]
New Q values:  [ 7.64171987e+01  4.59295904e+03 -3.22965309e-01  3.33616292e+02]
Reward: -1  Episode Reward:  9
xxxxx
x a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211   903.70082508]
------
Step:42, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   903.70082508]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   453.92476373]
Reward: -1  Episode Reward:  8
xxxxx
xa  x
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  310.14811233  170.11330766 -252.78192178]
------
Step:43, Action:South
State  107
Old Q Values:  [-252.35169558  310.14811233  170.11330766 -252.78192178]
New Q values:  [-252.35169558 2211.45121891  170.11330766 -252.78192178]
Reward: -1  Episode Reward:  7
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  80.42316964    0.         6959.97324659 -178.98      ]
------
Step:44, Action:North
State  185
Old Q Values:  [  80.42316964    0.         6959.97324659 -178.98      ]
New Q values:  [ 695.00463353    0.         6959.97324659 -178.98      ]
Reward: -1  Episode Reward:  6
xxxxx
xa  x
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 2211.45121891  170.11330766 -252.78192178]
------
Step:45, Action:South
State  107
Old Q Values:  [-252.35169558 2211.45121891  170.11330766 -252.78192178]
New Q values:  [-252.35169558 2971.97246154  170.11330766 -252.78192178]
Reward: -1  Episode Reward:  5
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 695.00463353    0.         6959.97324659 -178.98      ]
------
Step:46, Action:North
State  185
Old Q Values:  [ 695.00463353    0.         6959.97324659 -178.98      ]
New Q values:  [1168.99359187    0.         6959.97324659 -178.98      ]
Reward: -1  Episode Reward:  4
xxxxx
xa  x
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 2971.97246154  170.11330766 -252.78192178]
------
Step:47, Action:South
State  107
Old Q Values:  [-252.35169558 2971.97246154  170.11330766 -252.78192178]
New Q values:  [-252.35169558 1380.50799132  170.11330766 -252.78192178]
Reward: -1  Episode Reward:  3
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[206.23299394   0.         641.06335568   0.        ]
------
Step:48, Action:East
State  185
Old Q Values:  [1168.99359187    0.         6959.97324659 -178.98      ]
New Q values:  [1168.99359187    0.         3185.98075685 -178.98      ]
Reward: -1  Episode Reward:  2
xxxxx
x   x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.34197153e+03 -8.87652194e+03  2.00341972e+02]
------
Step:49, Action:South
State  203
Old Q Values:  [3.60604218e+00 2.41899005e+04 1.90998438e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 1.87289448e+04 1.90998438e+03 0.00000000e+00]
Reward: 9  Episode Reward:  11
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  5496.81846187 30158.61536532]
------
Step:50, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  5496.81846187 30158.61536532]
New Q values:  [  870.35122762  -168.92307549  5496.81846187 85883.17887853]
Reward: 100009  Episode Reward:  100020
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18130.40918748  9277.27459811 -4228.04879148  6400.04485106]
------
Step:1, Action:North
State  216
Old Q Values:  [ 2344.4837303    372.98523174 -8896.20691497  2318.350458  ]
New Q values:  [ 2006.82064617   372.98523174 -8896.20691497  2318.350458  ]
Reward: 9  Episode Reward:  9
xxxxx
x.gax
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3545.42384682 -2383.80019164   450.10840272]
------
Step:2, Action:South
State  136
Old Q Values:  [ -170.77177351  3545.42384682 -2383.80019164   450.10840272]
New Q values:  [ -170.77177351  2113.07467613 -2383.80019164   450.10840272]
Reward: -1  Episode Reward:  8
xxxxx
xg. x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2006.82064617   372.98523174 -8896.20691497  2318.350458  ]
------
Step:3, Action:West
State  216
Old Q Values:  [ 2006.82064617   372.98523174 -8896.20691497  2318.350458  ]
New Q values:  [ 2006.82064617   372.98523174 -8896.20691497  2901.58309048]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.58280969e+03 5.24545893e+03 2.91043938e+03]
------
Step:4, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.58280969e+03 5.24545893e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 3.92365521e+03 5.24545893e+03 2.91043938e+03]
Reward: 9  Episode Reward:  16
xxxxx
xg. x
x.  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4283.7711244   3873.40298255]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4283.7711244   3873.40298255]
New Q values:  [-2527.46239811 -8521.23367799  8873.98013961  3873.40298255]
Reward: 9  Episode Reward:  25
xxxxx
x.. x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2865.8500486  -1609.84182322 -8192.20126966 23850.23896618]
------
Step:6, Action:West
State  288
Old Q Values:  [ 2865.8500486  -1609.84182322 -8192.20126966 23850.23896618]
New Q values:  [ 2865.8500486  -1609.84182322 -8192.20126966 12201.68962836]
Reward: -1  Episode Reward:  24
xxxxx
xg. x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8873.98013961  3873.40298255]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8873.98013961  3873.40298255]
New Q values:  [-2527.46239811 -8521.23367799  7209.49894435  3873.40298255]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2865.8500486  -1609.84182322 -8192.20126966 12201.68962836]
------
Step:8, Action:West
State  288
Old Q Values:  [ 2865.8500486  -1609.84182322 -8192.20126966 12201.68962836]
New Q values:  [ 2865.8500486  -1609.84182322 -8192.20126966  7042.92553465]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7209.49894435  3873.40298255]
------
Step:9, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549  5496.81846187 85883.17887853]
New Q values:  [  870.35122762  -168.92307549  4311.00504514 85883.17887853]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2865.8500486  -1609.84182322 -8192.20126966  7042.92553465]
------
Step:10, Action:West
State  288
Old Q Values:  [ 2865.8500486  -1609.84182322 -8192.20126966  7042.92553465]
New Q values:  [ 2865.8500486  -1609.84182322 -8192.20126966  4979.41989717]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7209.49894435  3873.40298255]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7209.49894435  3873.40298255]
New Q values:  [-2527.46239811 -8521.23367799  4377.02554689  3873.40298255]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2865.8500486  -1609.84182322 -8192.20126966  4979.41989717]
------
Step:12, Action:North
State  288
Old Q Values:  [ 2865.8500486  -1609.84182322 -8192.20126966  4979.41989717]
New Q values:  [ 6584.86277568 -1609.84182322 -8192.20126966  4979.41989717]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18130.40918748  9277.27459811 -4228.04879148  6400.04485106]
------
Step:13, Action:North
State  208
Old Q Values:  [18130.40918748  9277.27459811 -4228.04879148  6400.04485106]
New Q values:  [27158.85825411  9277.27459811 -4228.04879148  6400.04485106]
Reward: -1  Episode Reward:  17
xxxxx
x..ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254  8302.27318729  -180.00807518 66357.64859707]
------
Step:14, Action:West
State  136
Old Q Values:  [ -170.77177351  2113.07467613 -2383.80019164   450.10840272]
New Q values:  [ -170.77177351  2113.07467613 -2383.80019164   278.75756388]
Reward: 9  Episode Reward:  26
xxxxx
x.agx
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:15, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   453.92476373]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   601.12230289]
Reward: 9  Episode Reward:  35
xxxxx
xa  x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1380.50799132  170.11330766 -252.78192178]
------
Step:16, Action:South
State  107
Old Q Values:  [-252.35169558 1380.50799132  170.11330766 -252.78192178]
New Q values:  [-252.35169558  749.92220323  170.11330766 -252.78192178]
Reward: 9  Episode Reward:  44
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[206.23299394   0.         641.06335568   0.        ]
------
Step:17, Action:East
State  187
Old Q Values:  [206.23299394   0.         641.06335568   0.        ]
New Q values:  [ 206.23299394    0.         5874.50879038    0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x a x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 1.87289448e+04 1.90998438e+03 0.00000000e+00]
------
Step:18, Action:South
State  202
Old Q Values:  [    0.         -5884.35407458   773.34142896     0.        ]
New Q values:  [    0.         -7041.23396577   773.34142896     0.        ]
Reward: -10001  Episode Reward:  -9958
xxxxx
x   x
x   x
x.g x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27158.85825411  9277.27459811 -4228.04879148  6400.04485106]
------
Step:1, Action:North
State  216
Old Q Values:  [ 2006.82064617   372.98523174 -8896.20691497  2901.58309048]
New Q values:  [ 2186.01597142   372.98523174 -8896.20691497  2901.58309048]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.59295904e+03 -3.22965309e-01  3.33616292e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.59295904e+03 -3.22965309e-01  3.33616292e+02]
New Q values:  [ 7.64171987e+01  3.98424109e+03 -3.22965309e-01  3.33616292e+02]
Reward: -10001  Episode Reward:  -9992
xxxxx
x.. x
x .gx
x...x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6584.86277568 -1609.84182322 -8192.20126966  4979.41989717]
------
Step:1, Action:North
State  288
Old Q Values:  [ 6584.86277568 -1609.84182322 -8192.20126966  4979.41989717]
New Q values:  [10787.00258651 -1609.84182322 -8192.20126966  4979.41989717]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27158.85825411  9277.27459811 -4228.04879148  6400.04485106]
------
Step:2, Action:North
State  208
Old Q Values:  [27158.85825411  9277.27459811 -4228.04879148  6400.04485106]
New Q values:  [12064.2156297   9277.27459811 -4228.04879148  6400.04485106]
Reward: 9  Episode Reward:  18
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.98424109e+03 -3.22965309e-01  3.33616292e+02]
------
Step:3, Action:South
State  136
Old Q Values:  [ -170.77177351  2113.07467613 -2383.80019164   278.75756388]
New Q values:  [ -170.77177351  4463.89455936 -2383.80019164   278.75756388]
Reward: -1  Episode Reward:  17
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12064.2156297   9277.27459811 -4228.04879148  6400.04485106]
------
Step:4, Action:North
State  208
Old Q Values:  [12064.2156297   9277.27459811 -4228.04879148  6400.04485106]
New Q values:  [ 6164.25461969  9277.27459811 -4228.04879148  6400.04485106]
Reward: -1  Episode Reward:  16
xxxxx
xg ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  4463.89455936 -2383.80019164   278.75756388]
------
Step:5, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.98424109e+03 -3.22965309e-01  3.33616292e+02]
New Q values:  [ 7.64171987e+01  4.37627882e+03 -3.22965309e-01  3.33616292e+02]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6164.25461969  9277.27459811 -4228.04879148  6400.04485106]
------
Step:6, Action:South
State  210
Old Q Values:  [20435.7693737  -2603.17716687  1542.2004081   9758.9887257 ]
New Q values:  [20435.7693737   2194.22990921  1542.2004081   9758.9887257 ]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10787.00258651 -1609.84182322 -8192.20126966  4979.41989717]
------
Step:7, Action:North
State  288
Old Q Values:  [10787.00258651 -1609.84182322 -8192.20126966  4979.41989717]
New Q values:  [10444.93184671 -1609.84182322 -8192.20126966  4979.41989717]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[20435.7693737   2194.22990921  1542.2004081   9758.9887257 ]
------
Step:8, Action:North
State  210
Old Q Values:  [20435.7693737   2194.22990921  1542.2004081   9758.9887257 ]
New Q values:  [9486.59139453 2194.22990921 1542.2004081  9758.9887257 ]
Reward: -1  Episode Reward:  12
xxxxx
x. ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.37627882e+03 -3.22965309e-01  3.33616292e+02]
------
Step:9, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.37627882e+03 -3.22965309e-01  3.33616292e+02]
New Q values:  [ 7.64171987e+01  4.53309391e+03 -3.22965309e-01  3.33616292e+02]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6164.25461969  9277.27459811 -4228.04879148  6400.04485106]
------
Step:10, Action:South
State  208
Old Q Values:  [ 6164.25461969  9277.27459811 -4228.04879148  6400.04485106]
New Q values:  [ 6164.25461969  6843.78939326 -4228.04879148  6400.04485106]
Reward: -1  Episode Reward:  10
xxxxx
xg  x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10444.93184671 -1609.84182322 -8192.20126966  4979.41989717]
------
Step:11, Action:North
State  288
Old Q Values:  [10444.93184671 -1609.84182322 -8192.20126966  4979.41989717]
New Q values:  [ 6230.50955666 -1609.84182322 -8192.20126966  4979.41989717]
Reward: -1  Episode Reward:  9
xxxxx
x.  x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6164.25461969  6843.78939326 -4228.04879148  6400.04485106]
------
Step:12, Action:South
State  210
Old Q Values:  [9486.59139453 2194.22990921 1542.2004081  9758.9887257 ]
New Q values:  [9486.59139453 2746.24483068 1542.2004081  9758.9887257 ]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6230.50955666 -1609.84182322 -8192.20126966  4979.41989717]
------
Step:13, Action:North
State  288
Old Q Values:  [ 6230.50955666 -1609.84182322 -8192.20126966  4979.41989717]
New Q values:  [ 4544.74064064 -1609.84182322 -8192.20126966  4979.41989717]
Reward: -1  Episode Reward:  7
xxxxx
x.  x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6164.25461969  6843.78939326 -4228.04879148  6400.04485106]
------
Step:14, Action:South
State  210
Old Q Values:  [9486.59139453 2746.24483068 1542.2004081  9758.9887257 ]
New Q values:  [9486.59139453 2591.72390142 1542.2004081  9758.9887257 ]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4544.74064064 -1609.84182322 -8192.20126966  4979.41989717]
------
Step:15, Action:West
State  288
Old Q Values:  [ 4544.74064064 -1609.84182322 -8192.20126966  4979.41989717]
New Q values:  [ 4544.74064064 -1609.84182322 -8192.20126966  3310.27562293]
Reward: 9  Episode Reward:  15
xxxxx
x.  x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4377.02554689  3873.40298255]
------
Step:16, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4377.02554689  3873.40298255]
New Q values:  [-2527.46239811 -8521.23367799  3113.63241095  3873.40298255]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4544.74064064 -1609.84182322 -8192.20126966  3310.27562293]
------
Step:17, Action:North
State  288
Old Q Values:  [ 4544.74064064 -1609.84182322 -8192.20126966  3310.27562293]
New Q values:  [ 3870.43307424 -1609.84182322 -8192.20126966  3310.27562293]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6164.25461969  6843.78939326 -4228.04879148  6400.04485106]
------
Step:18, Action:South
State  210
Old Q Values:  [9486.59139453 2591.72390142 1542.2004081  9758.9887257 ]
New Q values:  [9486.59139453 2197.21948284 1542.2004081  9758.9887257 ]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3870.43307424 -1609.84182322 -8192.20126966  3310.27562293]
------
Step:19, Action:North
State  288
Old Q Values:  [ 3870.43307424 -1609.84182322 -8192.20126966  3310.27562293]
New Q values:  [ 4475.2698474  -1609.84182322 -8192.20126966  3310.27562293]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9486.59139453 2197.21948284 1542.2004081  9758.9887257 ]
------
Step:20, Action:West
State  208
Old Q Values:  [ 6164.25461969  6843.78939326 -4228.04879148  6400.04485106]
New Q values:  [ 6164.25461969  6843.78939326 -4228.04879148 -1860.94437945]
Reward: -9991  Episode Reward:  -9980
xxxxx
x.  x
x.g x
x.  x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3113.63241095  3873.40298255]
------
Step:1, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4311.00504514 85883.17887853]
New Q values:  [  870.35122762  -168.92307549  4311.00504514 34805.55529206]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1489.61246882    26.73544252 -2339.75315766   -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [ 1489.61246882    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [  683.68282698    26.73544252 -2339.75315766   -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  156.51326799   274.79279816 -4415.3543461    -30.99112081]
------
Step:3, Action:South
State  183
Old Q Values:  [ 877.23516594  199.89821895 7129.04872323    0.        ]
New Q values:  [ 877.23516594  284.46413567 7129.04872323    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  683.68282698    26.73544252 -2339.75315766   -35.88578819]
------
Step:4, Action:North
State  260
Old Q Values:  [ 1381.06142465 -2735.46306511 22671.22161795 -2601.74710518]
New Q values:  [ 2042.80795098 -2735.46306511 22671.22161795 -2601.74710518]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xa .x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  4969.94460373     0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [ 877.23516594  284.46413567 7129.04872323    0.        ]
New Q values:  [ 877.23516594  284.46413567 8731.04935014    0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  4.21417961e+03  1.20371620e+03]
------
Step:6, Action:East
State  195
Old Q Values:  [   38.85388605 13650.02620119  8713.07170492  1101.59744825]
New Q values:  [   38.85388605 13650.02620119  6418.32529968  1101.59744825]
Reward: 9  Episode Reward:  24
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9486.59139453 2197.21948284 1542.2004081  9758.9887257 ]
------
Step:7, Action:West
State  208
Old Q Values:  [ 6164.25461969  6843.78939326 -4228.04879148 -1860.94437945]
New Q values:  [ 6164.25461969  6843.78939326 -4228.04879148   349.19227838]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3647.23343388 -5545.76189056  1099.96026581]
------
Step:8, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.92365521e+03 5.24545893e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.73088298e+03 5.24545893e+03 2.91043938e+03]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3113.63241095  3873.40298255]
------
Step:9, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2432.04636114   694.25822892]
New Q values:  [   16.82637525 -5807.06396197  2432.04636114   482.20813966]
Reward: -1  Episode Reward:  21
xxxxx
x.g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  683.68282698    26.73544252 -2339.75315766   -35.88578819]
------
Step:10, Action:North
State  260
Old Q Values:  [ 2042.80795098 -2735.46306511 22671.22161795 -2601.74710518]
New Q values:  [ 2329.62685729 -2735.46306511 22671.22161795 -2601.74710518]
Reward: -1  Episode Reward:  20
xxxxx
xg..x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5653.90440971  1811.65086161  5043.67892299 -4966.32149798]
------
Step:11, Action:East
State  177
Old Q Values:  [111073.33310002  22487.97851491   9963.97212411      0.        ]
New Q values:  [111073.33310002  22487.97851491   5558.62652977      0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x.g.x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.73088298e+03 5.24545893e+03 2.91043938e+03]
------
Step:12, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.73088298e+03 5.24545893e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.73088298e+03 4.15072039e+03 2.91043938e+03]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6164.25461969  6843.78939326 -4228.04879148   349.19227838]
------
Step:13, Action:South
State  208
Old Q Values:  [ 6164.25461969  6843.78939326 -4228.04879148   349.19227838]
New Q values:  [ 6164.25461969  4085.49671153 -4228.04879148   349.19227838]
Reward: 9  Episode Reward:  27
xxxxx
x.g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4475.2698474  -1609.84182322 -8192.20126966  3310.27562293]
------
Step:14, Action:North
State  288
Old Q Values:  [ 4475.2698474  -1609.84182322 -8192.20126966  3310.27562293]
New Q values:  [ 3638.78432487 -1609.84182322 -8192.20126966  3310.27562293]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6164.25461969  4085.49671153 -4228.04879148   349.19227838]
------
Step:15, Action:South
State  208
Old Q Values:  [ 6164.25461969  4085.49671153 -4228.04879148   349.19227838]
New Q values:  [ 6164.25461969  2725.23398207 -4228.04879148   349.19227838]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3638.78432487 -1609.84182322 -8192.20126966  3310.27562293]
------
Step:16, Action:West
State  288
Old Q Values:  [ 3638.78432487 -1609.84182322 -8192.20126966  3310.27562293]
New Q values:  [ 3638.78432487 -1609.84182322 -8192.20126966 11765.17683679]
Reward: -1  Episode Reward:  24
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4311.00504514 34805.55529206]
------
Step:17, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3113.63241095  3873.40298255]
New Q values:  [-2527.46239811 -8521.23367799  3113.63241095 15363.09392542]
Reward: -1  Episode Reward:  23
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[46047.77577467  2256.66526474  4520.89517899  1875.31501677]
------
Step:18, Action:North
State  260
Old Q Values:  [ 2329.62685729 -2735.46306511 22671.22161795 -2601.74710518]
New Q values:  [ 2444.35441981 -2735.46306511 22671.22161795 -2601.74710518]
Reward: -1  Episode Reward:  22
xxxxx
xg..x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5653.90440971  1811.65086161  5043.67892299 -4966.32149798]
------
Step:19, Action:East
State  180
Old Q Values:  [-5653.90440971  1811.65086161  5043.67892299 -4966.32149798]
New Q values:  [-5653.90440971  1811.65086161  3262.08768664 -4966.32149798]
Reward: -1  Episode Reward:  21
xxxxx
x...x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.73088298e+03 4.15072039e+03 2.91043938e+03]
------
Step:20, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.73088298e+03 4.15072039e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.73088298e+03 3.50896454e+03 2.91043938e+03]
Reward: -1  Episode Reward:  20
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6164.25461969  2725.23398207 -4228.04879148   349.19227838]
------
Step:21, Action:North
State  208
Old Q Values:  [ 6164.25461969  2725.23398207 -4228.04879148   349.19227838]
New Q values:  [22378.396427    2725.23398207 -4228.04879148   349.19227838]
Reward: 9  Episode Reward:  29
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254  8302.27318729  -180.00807518 66357.64859707]
------
Step:22, Action:West
State  130
Old Q Values:  [34597.52013254  8302.27318729  -180.00807518 66357.64859707]
New Q values:  [34597.52013254  8302.27318729  -180.00807518 66370.59631344]
Reward: 9  Episode Reward:  38
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132740.45624872]
------
Step:23, Action:West
State  112
Old Q Values:  [    0.          3629.92591876  6789.02994987 99845.1452544 ]
New Q values:  [     0.           3629.92591876   6789.02994987 110949.76306292]
Reward: 100009  Episode Reward:  100047
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.53309391e+03 -3.22965309e-01  3.33616292e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.53309391e+03 -3.22965309e-01  3.33616292e+02]
New Q values:  [ 7.64171987e+01  4.74633418e+03 -3.22965309e-01  3.33616292e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9486.59139453 2197.21948284 1542.2004081  9758.9887257 ]
------
Step:2, Action:West
State  210
Old Q Values:  [9486.59139453 2197.21948284 1542.2004081  9758.9887257 ]
New Q values:  [9486.59139453 2197.21948284 1542.2004081  9789.02535113]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  4.21417961e+03  1.20371620e+03]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.96000995e+04  4.21417961e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.96000995e+04  4.62177945e+03  1.20371620e+03]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9486.59139453 2197.21948284 1542.2004081  9789.02535113]
------
Step:4, Action:West
State  210
Old Q Values:  [9486.59139453 2197.21948284 1542.2004081  9789.02535113]
New Q values:  [9486.59139453 2197.21948284 1542.2004081  9795.0400013 ]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  4.62177945e+03  1.20371620e+03]
------
Step:5, Action:East
State  195
Old Q Values:  [   38.85388605 13650.02620119  6418.32529968  1101.59744825]
New Q values:  [   38.85388605 13650.02620119  5505.24212026  1101.59744825]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9486.59139453 2197.21948284 1542.2004081  9795.0400013 ]
------
Step:6, Action:West
State  216
Old Q Values:  [ 2186.01597142   372.98523174 -8896.20691497  2901.58309048]
New Q values:  [ 2186.01597142   372.98523174 -8896.20691497  2254.20326636]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3647.23343388 -5545.76189056  1099.96026581]
------
Step:7, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.73088298e+03 3.50896454e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 5.70668137e+03 3.50896454e+03 2.91043938e+03]
Reward: 9  Episode Reward:  23
xxxxx
x.. x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3113.63241095 15363.09392542]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3113.63241095 15363.09392542]
New Q values:  [-2527.46239811 -8521.23367799  3113.63241095 12952.00405555]
Reward: 9  Episode Reward:  32
xxxxx
x.. x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2444.35441981 -2735.46306511 22671.22161795 -2601.74710518]
------
Step:9, Action:East
State  261
Old Q Values:  [  683.68282698    26.73544252 -2339.75315766   -35.88578819]
New Q values:  [ 683.68282698   26.73544252 2949.0999536   -35.88578819]
Reward: -1  Episode Reward:  31
xxxxx
x.. x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3113.63241095 12952.00405555]
------
Step:10, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3113.63241095 12952.00405555]
New Q values:  [-2527.46239811 -8521.23367799  3113.63241095  6064.9316083 ]
Reward: -1  Episode Reward:  30
xxxxx
x.. x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 683.68282698   26.73544252 2949.0999536   -35.88578819]
------
Step:11, Action:North
State  257
Old Q Values:  [46047.77577467  2256.66526474  4520.89517899  1875.31501677]
New Q values:  [40484.11156022  2256.66526474  4520.89517899  1875.31501677]
Reward: -1  Episode Reward:  29
xxxxx
x.. x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         17203.58890013 73552.00416784     0.        ]
------
Step:12, Action:East
State  179
Old Q Values:  [    0.         17203.58890013 73552.00416784     0.        ]
New Q values:  [    0.         17203.58890013 35300.23152799     0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x.. x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  4.62177945e+03  1.20371620e+03]
------
Step:13, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.96000995e+04  4.62177945e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.96000995e+04  4.78662378e+03  1.20371620e+03]
Reward: -1  Episode Reward:  27
xxxxx
x.. x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9486.59139453 2197.21948284 1542.2004081  9795.0400013 ]
------
Step:14, Action:West
State  210
Old Q Values:  [9486.59139453 2197.21948284 1542.2004081  9795.0400013 ]
New Q values:  [9486.59139453 2197.21948284 1542.2004081  9797.44586137]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  4.78662378e+03  1.20371620e+03]
------
Step:15, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.96000995e+04  4.78662378e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.96000995e+04  4.85328327e+03  1.20371620e+03]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9486.59139453 2197.21948284 1542.2004081  9797.44586137]
------
Step:16, Action:West
State  208
Old Q Values:  [22378.396427    2725.23398207 -4228.04879148   349.19227838]
New Q values:  [22378.396427    2725.23398207 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.70668137e+03 3.50896454e+03 2.91043938e+03]
------
Step:17, Action:South
State  192
Old Q Values:  [3.89777037e-01 5.70668137e+03 3.50896454e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 4.10155203e+03 3.50896454e+03 2.91043938e+03]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3113.63241095  6064.9316083 ]
------
Step:18, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3113.63241095  6064.9316083 ]
New Q values:  [-2527.46239811 -8521.23367799  3113.63241095 14570.60611139]
Reward: -1  Episode Reward:  22
xxxxx
x.g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[40484.11156022  2256.66526474  4520.89517899  1875.31501677]
------
Step:19, Action:North
State  261
Old Q Values:  [ 683.68282698   26.73544252 2949.0999536   -35.88578819]
New Q values:  [ 3.35948731e+04  2.67354425e+01  2.94909995e+03 -3.58857882e+01]
Reward: -1  Episode Reward:  21
xxxxx
x..gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[111073.33310002  22487.97851491   5558.62652977      0.        ]
------
Step:20, Action:North
State  181
Old Q Values:  [  156.51326799   274.79279816 -4415.3543461    -30.99112081]
New Q values:  [  536.51092082   274.79279816 -4415.3543461    -30.99112081]
Reward: 9  Episode Reward:  30
xxxxx
xa. x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1561.68537874    5.4           0.        ]
------
Step:21, Action:South
State  103
Old Q Values:  [ 221.30610858 1561.68537874    5.4           0.        ]
New Q values:  [ 221.30610858 3243.38895654    5.4           0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  284.46413567 8731.04935014    0.        ]
------
Step:22, Action:East
State  181
Old Q Values:  [  536.51092082   274.79279816 -4415.3543461    -30.99112081]
New Q values:  [ 536.51092082  274.79279816 -672.57170828  -30.99112081]
Reward: -1  Episode Reward:  28
xxxxx
x . x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3647.23343388 -5545.76189056  1099.96026581]
------
Step:23, Action:South
State  193
Old Q Values:  [-5922.26708831  3647.23343388 -5545.76189056  1099.96026581]
New Q values:  [-5922.26708831 11899.95996117 -5545.76189056  1099.96026581]
Reward: -1  Episode Reward:  27
xxxxx
x .gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4311.00504514 34805.55529206]
------
Step:24, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2432.04636114   482.20813966]
New Q values:  [   16.82637525 -5807.06396197  2432.04636114 10270.7451741 ]
Reward: -1  Episode Reward:  26
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3.35948731e+04  2.67354425e+01  2.94909995e+03 -3.58857882e+01]
------
Step:25, Action:North
State  261
Old Q Values:  [ 3.35948731e+04  2.67354425e+01  2.94909995e+03 -3.58857882e+01]
New Q values:  [13598.30250056    26.73544252  2949.0999536    -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x .gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 536.51092082  274.79279816 -672.57170828  -30.99112081]
------
Step:26, Action:North
State  181
Old Q Values:  [ 536.51092082  274.79279816 -672.57170828  -30.99112081]
New Q values:  [ 452.1239139   274.79279816 -672.57170828  -30.99112081]
Reward: -1  Episode Reward:  24
xxxxx
xa.gx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[  0.         793.73181857   0.           0.        ]
------
Step:27, Action:South
State  103
Old Q Values:  [ 221.30610858 3243.38895654    5.4           0.        ]
New Q values:  [ 221.30610858 1432.39275678    5.4           0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x . x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 452.1239139   274.79279816 -672.57170828  -30.99112081]
------
Step:28, Action:North
State  181
Old Q Values:  [ 452.1239139   274.79279816 -672.57170828  -30.99112081]
New Q values:  [ 609.96739259  274.79279816 -672.57170828  -30.99112081]
Reward: -1  Episode Reward:  22
xxxxx
xa. x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1432.39275678    5.4           0.        ]
------
Step:29, Action:South
State  103
Old Q Values:  [ 221.30610858 1432.39275678    5.4           0.        ]
New Q values:  [ 221.30610858 3191.67190776    5.4           0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x . x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  284.46413567 8731.04935014    0.        ]
------
Step:30, Action:East
State  181
Old Q Values:  [ 609.96739259  274.79279816 -672.57170828  -30.99112081]
New Q values:  [  609.96739259   274.79279816 -5039.1630742    -30.99112081]
Reward: -10001  Episode Reward:  -9980
xxxxx
x . x
x g x
x  .x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22378.396427    2725.23398207 -4228.04879148  1851.08132227]
------
Step:1, Action:North
State  210
Old Q Values:  [9486.59139453 2197.21948284 1542.2004081  9797.44586137]
New Q values:  [5223.93681187 2197.21948284 1542.2004081  9797.44586137]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.74633418e+03 -3.22965309e-01  3.33616292e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.74633418e+03 -3.22965309e-01  3.33616292e+02]
New Q values:  [ 7.64171987e+01  4.83716743e+03 -3.22965309e-01  3.33616292e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5223.93681187 2197.21948284 1542.2004081  9797.44586137]
------
Step:3, Action:West
State  210
Old Q Values:  [5223.93681187 2197.21948284 1542.2004081  9797.44586137]
New Q values:  [5223.93681187 2197.21948284 1542.2004081  9804.4082054 ]
Reward: 9  Episode Reward:  17
xxxxx
x.. x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  4.85328327e+03  1.20371620e+03]
------
Step:4, Action:South
State  192
Old Q Values:  [3.89777037e-01 4.10155203e+03 3.50896454e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 6.01720265e+03 3.50896454e+03 2.91043938e+03]
Reward: 9  Episode Reward:  26
xxxxx
x.. x
xg  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3113.63241095 14570.60611139]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3113.63241095 14570.60611139]
New Q values:  [-2527.46239811 -8521.23367799  3113.63241095 12635.00892994]
Reward: 9  Episode Reward:  35
xxxxx
xg. x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2444.35441981 -2735.46306511 22671.22161795 -2601.74710518]
------
Step:6, Action:East
State  260
Old Q Values:  [ 2444.35441981 -2735.46306511 22671.22161795 -2601.74710518]
New Q values:  [ 2444.35441981 -2735.46306511 12858.39132616 -2601.74710518]
Reward: -1  Episode Reward:  34
xxxxx
x.. x
xg  x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3113.63241095 12635.00892994]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3113.63241095 12635.00892994]
New Q values:  [-2527.46239811 -8521.23367799  3113.63241095  8910.92096982]
Reward: -1  Episode Reward:  33
xxxxx
xg. x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2444.35441981 -2735.46306511 12858.39132616 -2601.74710518]
------
Step:8, Action:East
State  260
Old Q Values:  [ 2444.35441981 -2735.46306511 12858.39132616 -2601.74710518]
New Q values:  [ 2444.35441981 -2735.46306511  7816.03282141 -2601.74710518]
Reward: -1  Episode Reward:  32
xxxxx
x.. x
xg  x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3113.63241095  8910.92096982]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3113.63241095  8910.92096982]
New Q values:  [-2527.46239811 -8521.23367799  3113.63241095  7643.2591381 ]
Reward: -1  Episode Reward:  31
xxxxx
x.. x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[13598.30250056    26.73544252  2949.0999536    -35.88578819]
------
Step:10, Action:North
State  260
Old Q Values:  [ 2444.35441981 -2735.46306511  7816.03282141 -2601.74710518]
New Q values:  [17922.82565099 -2735.46306511  7816.03282141 -2601.74710518]
Reward: -10001  Episode Reward:  -9970
xxxxx
x.. x
xg  x
x  .x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  4463.89455936 -2383.80019164   278.75756388]
------
Step:1, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.83716743e+03 -3.22965309e-01  3.33616292e+02]
New Q values:  [ 7.64171987e+01  2.61652795e+03 -3.22965309e-01  3.33616292e+02]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2186.01597142   372.98523174 -8896.20691497  2254.20326636]
------
Step:2, Action:West
State  210
Old Q Values:  [5223.93681187 2197.21948284 1542.2004081  9804.4082054 ]
New Q values:  [5223.93681187 2197.21948284 1542.2004081  9807.19314301]
Reward: 9  Episode Reward:  18
xxxxx
x . x
x.a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.96000995e+04  4.85328327e+03  1.20371620e+03]
------
Step:3, Action:South
State  194
Old Q Values:  [-6.00000000e-01  1.96000995e+04  4.85328327e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  4.13841756e+03  4.85328327e+03  1.20371620e+03]
Reward: -9991  Episode Reward:  -9973
xxxxx
x . x
x.  x
x.g.x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4311.00504514 34805.55529206]
------
Step:1, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4311.00504514 34805.55529206]
New Q values:  [  870.35122762  -168.92307549  4311.00504514 18007.11286699]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[13598.30250056    26.73544252  2949.0999536    -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [13598.30250056    26.73544252  2949.0999536    -35.88578819]
New Q values:  [5627.711218     26.73544252 2949.0999536   -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  609.96739259   274.79279816 -5039.1630742    -30.99112081]
------
Step:3, Action:North
State  181
Old Q Values:  [  609.96739259   274.79279816 -5039.1630742    -30.99112081]
New Q values:  [ 1005.69516997   274.79279816 -5039.1630742    -30.99112081]
Reward: 9  Episode Reward:  27
xxxxx
xa. x
x .gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2521.02737644  181.21829111 -120.29354603]
------
Step:4, Action:South
State  109
Old Q Values:  [-241.10880094  594.92536577   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  539.0786973    -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  26
xxxxx
x .gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1005.69516997   274.79279816 -5039.1630742    -30.99112081]
------
Step:5, Action:North
State  181
Old Q Values:  [ 1005.69516997   274.79279816 -5039.1630742    -30.99112081]
New Q values:  [  563.40167718   274.79279816 -5039.1630742    -30.99112081]
Reward: -1  Episode Reward:  25
xxxxx
xag x
x ..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  539.0786973    -8.57207238 -180.6       ]
------
Step:6, Action:South
State  108
Old Q Values:  [-8463.16477134  1409.551343     654.62565385     0.        ]
New Q values:  [-8463.16477134  1541.84684319   654.62565385     0.        ]
Reward: -1  Episode Reward:  24
xxxxx
xg. x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5653.90440971  1811.65086161  3262.08768664 -4966.32149798]
------
Step:7, Action:East
State  181
Old Q Values:  [  563.40167718   274.79279816 -5039.1630742    -30.99112081]
New Q values:  [ 563.40167718  274.79279816 -782.40989094  -30.99112081]
Reward: 9  Episode Reward:  33
xxxxx
x g x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1114.39070335  4092.85112914   239.04887894]
------
Step:8, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.01720265e+03 3.50896454e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 6.01720265e+03 8.12250475e+03 2.91043938e+03]
Reward: 9  Episode Reward:  42
xxxxx
xg. x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22378.396427    2725.23398207 -4228.04879148  1851.08132227]
------
Step:9, Action:North
State  208
Old Q Values:  [22378.396427    2725.23398207 -4228.04879148  1851.08132227]
New Q values:  [28861.93746483  2725.23398207 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  41
xxxxx
x .ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254  8302.27318729  -180.00807518 66370.59631344]
------
Step:10, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.61652795e+03 -3.22965309e-01  3.33616292e+02]
New Q values:  [ 7.64171987e+01  2.61652795e+03 -3.22965309e-01  1.32899495e+03]
Reward: 9  Episode Reward:  50
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 3967.16144923 1134.18629274]
------
Step:11, Action:East
State  126
Old Q Values:  [   0.          331.64678262 3967.16144923 1134.18629274]
New Q values:  [   0.          331.64678262 2371.22296532 1134.18629274]
Reward: -1  Episode Reward:  49
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.61652795e+03 -3.22965309e-01  1.32899495e+03]
------
Step:12, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.61652795e+03 -3.22965309e-01  1.32899495e+03]
New Q values:  [ 7.64171987e+01  4.41089201e+03 -3.22965309e-01  1.32899495e+03]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  298.44678463 11216.26942023     0.           430.34233803]
------
Step:13, Action:South
State  208
Old Q Values:  [28861.93746483  2725.23398207 -4228.04879148  1851.08132227]
New Q values:  [28861.93746483 64625.04664387 -4228.04879148  1851.08132227]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.41089201e+03 -3.22965309e-01  1.32899495e+03]
------
Step:1, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.41089201e+03 -3.22965309e-01  1.32899495e+03]
New Q values:  [ 7.64171987e+01  4.71191475e+03 -3.22965309e-01  1.32899495e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5223.93681187 2197.21948284 1542.2004081  9807.19314301]
------
Step:2, Action:West
State  210
Old Q Values:  [5223.93681187 2197.21948284 1542.2004081  9807.19314301]
New Q values:  [5223.93681187 2197.21948284 1542.2004081  5384.2622384 ]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.a x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  4.13841756e+03  4.85328327e+03  1.20371620e+03]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  4.13841756e+03  4.85328327e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  4.13841756e+03  3.55599198e+03  1.20371620e+03]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5223.93681187 2197.21948284 1542.2004081  5384.2622384 ]
------
Step:4, Action:West
State  210
Old Q Values:  [5223.93681187 2197.21948284 1542.2004081  5384.2622384 ]
New Q values:  [5223.93681187 2197.21948284 1542.2004081  3394.63016213]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x.a x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  4.13841756e+03  3.55599198e+03  1.20371620e+03]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.01720265e+03 8.12250475e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 4.70525880e+03 8.12250475e+03 2.91043938e+03]
Reward: 9  Episode Reward:  25
xxxxx
x.. x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3113.63241095  7643.2591381 ]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3113.63241095  7643.2591381 ]
New Q values:  [-2527.46239811 -8521.23367799  3113.63241095 15733.32034293]
Reward: 9  Episode Reward:  34
xxxxx
xg. x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[42235.38895895 15557.34758455  5576.40109469   644.94785455]
------
Step:7, Action:North
State  256
Old Q Values:  [42235.38895895 15557.34758455  5576.40109469   644.94785455]
New Q values:  [33845.23946664 15557.34758455  5576.40109469   644.94785455]
Reward: -9991  Episode Reward:  -9957
xxxxx
x.. x
xg  x
x   x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5627.711218     26.73544252 2949.0999536   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [5627.711218     26.73544252 2949.0999536   -35.88578819]
New Q values:  [4875.79929224   26.73544252 2949.0999536   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  284.46413567 8731.04935014    0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 877.23516594  284.46413567 8731.04935014    0.        ]
New Q values:  [ 877.23516594  284.46413567 7592.82760041    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 13650.02620119  5505.24212026  1101.59744825]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831 11899.95996117 -5545.76189056  1099.96026581]
New Q values:  [-5922.26708831 10167.51784457 -5545.76189056  1099.96026581]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4311.00504514 18007.11286699]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3113.63241095 15733.32034293]
New Q values:  [-2527.46239811 -8521.23367799  3113.63241095  7755.46792484]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4875.79929224   26.73544252 2949.0999536   -35.88578819]
------
Step:5, Action:North
State  260
Old Q Values:  [17922.82565099 -2735.46306511  7816.03282141 -2601.74710518]
New Q values:  [ 2147.15656639 -2735.46306511  7816.03282141 -2601.74710518]
Reward: -10001  Episode Reward:  -9975
xxxxx
x...x
xg .x
x   x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 563.40167718  274.79279816 -782.40989094  -30.99112081]
------
Step:1, Action:North
State  181
Old Q Values:  [ 563.40167718  274.79279816 -782.40989094  -30.99112081]
New Q values:  [ 987.0688838   274.79279816 -782.40989094  -30.99112081]
Reward: 9  Episode Reward:  9
xxxxx
xa. x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2521.02737644  181.21829111 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 2521.02737644  181.21829111 -120.29354603]
New Q values:  [-177.44732869 1303.93161572  181.21829111 -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x . x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 987.0688838   274.79279816 -782.40989094  -30.99112081]
------
Step:3, Action:North
State  181
Old Q Values:  [ 987.0688838   274.79279816 -782.40989094  -30.99112081]
New Q values:  [ 555.95116271  274.79279816 -782.40989094  -30.99112081]
Reward: -1  Episode Reward:  7
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  539.0786973    -8.57207238 -180.6       ]
------
Step:4, Action:South
State  108
Old Q Values:  [-8463.16477134  1541.84684319   654.62565385     0.        ]
New Q values:  [-8463.16477134  1594.76504327   654.62565385     0.        ]
Reward: -1  Episode Reward:  6
xxxxx
xg. x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5653.90440971  1811.65086161  3262.08768664 -4966.32149798]
------
Step:5, Action:East
State  189
Old Q Values:  [ 64.81505849 540.18012891 786.09268966 154.04646645]
New Q values:  [  64.81505849  540.18012891 4959.54141652  154.04646645]
Reward: 9  Episode Reward:  15
xxxxx
x g x
x a.x
x...x
xxxxx
Step:6, Action:West
State  200
Old Q Values:  [   62.8218634  15465.68113551  1171.23248475   408.67479662]
New Q values:  [   62.8218634  15465.68113551  1171.23248475  1141.49622464]
Reward: -1  Episode Reward:  14
xxxxx
xg. x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5653.90440971  1811.65086161  3262.08768664 -4966.32149798]
------
Step:7, Action:East
State  180
Old Q Values:  [-5653.90440971  1811.65086161  3262.08768664 -4966.32149798]
New Q values:  [-5653.90440971  1811.65086161  5943.93941531 -4966.32149798]
Reward: -1  Episode Reward:  13
xxxxx
x . x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  15465.68113551  1171.23248475  1141.49622464]
------
Step:8, Action:South
State  194
Old Q Values:  [-6.00000000e-01  4.13841756e+03  3.55599198e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  4.74199057e+03  3.55599198e+03  1.20371620e+03]
Reward: 9  Episode Reward:  22
xxxxx
x . x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2432.04636114 10270.7451741 ]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3113.63241095  7755.46792484]
New Q values:  [-2527.46239811 -8521.23367799  4780.40601542  7755.46792484]
Reward: 9  Episode Reward:  31
xxxxx
x . x
x  .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3638.78432487 -1609.84182322 -8192.20126966 11765.17683679]
------
Step:10, Action:North
State  288
Old Q Values:  [ 3638.78432487 -1609.84182322 -8192.20126966 11765.17683679]
New Q values:  [ 3028.09477351 -1609.84182322 -8192.20126966 11765.17683679]
Reward: 9  Episode Reward:  40
xxxxx
x . x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5223.93681187 2197.21948284 1542.2004081  3394.63016213]
------
Step:11, Action:North
State  210
Old Q Values:  [5223.93681187 2197.21948284 1542.2004081  3394.63016213]
New Q values:  [22000.15361878  2197.21948284  1542.2004081   3394.63016213]
Reward: -1  Episode Reward:  39
xxxxx
x .ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254  8302.27318729  -180.00807518 66370.59631344]
------
Step:12, Action:West
State  130
Old Q Values:  [34597.52013254  8302.27318729  -180.00807518 66370.59631344]
New Q values:  [34597.52013254  8302.27318729  -180.00807518 66375.77539999]
Reward: 9  Episode Reward:  48
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132740.45624872]
------
Step:13, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   131.01251604   377.90929196]
New Q values:  [ -281.736      -3455.78276043   131.01251604   375.54037776]
Reward: -1  Episode Reward:  47
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  749.92220323  170.11330766 -252.78192178]
------
Step:14, Action:South
State  98
Old Q Values:  [    0.         41136.94667617 69821.51860553     0.        ]
New Q values:  [    0.         42813.15027277 69821.51860553     0.        ]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:15, Action:East
State  187
Old Q Values:  [ 206.23299394    0.         5874.50879038    0.        ]
New Q values:  [ 206.23299394    0.         2581.20594484    0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577   773.34142896     0.        ]
------
Step:16, Action:East
State  200
Old Q Values:  [   62.8218634  15465.68113551  1171.23248475  1141.49622464]
New Q values:  [   62.8218634  15465.68113551  1144.15397381  1141.49622464]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2186.01597142   372.98523174 -8896.20691497  2254.20326636]
------
Step:17, Action:North
State  216
Old Q Values:  [ 2186.01597142   372.98523174 -8896.20691497  2254.20326636]
New Q values:  [ 2287.38081227   372.98523174 -8896.20691497  2254.20326636]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.71191475e+03 -3.22965309e-01  1.32899495e+03]
------
Step:18, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  4.71191475e+03 -3.22965309e-01  1.32899495e+03]
New Q values:  [ 7.64171987e+01  4.71191475e+03 -3.22965309e-01  7.11334672e+02]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211   601.12230289]
------
Step:19, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   131.01251604   375.54037776]
New Q values:  [ -281.736      -3455.78276043   131.01251604   374.59281207]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  749.92220323  170.11330766 -252.78192178]
------
Step:20, Action:South
State  98
Old Q Values:  [    0.         42813.15027277 69821.51860553     0.        ]
New Q values:  [    0.         43483.63171141 69821.51860553     0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:21, Action:East
State  187
Old Q Values:  [ 206.23299394    0.         2581.20594484    0.        ]
New Q values:  [ 206.23299394    0.         1263.88480662    0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577   773.34142896     0.        ]
------
Step:22, Action:East
State  194
Old Q Values:  [-6.00000000e-01  4.74199057e+03  3.55599198e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  4.74199057e+03  8.02184288e+03  1.20371620e+03]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[22000.15361878  2197.21948284  1542.2004081   3394.63016213]
------
Step:23, Action:North
State  216
Old Q Values:  [ 2287.38081227   372.98523174 -8896.20691497  2254.20326636]
New Q values:  [ 2327.92674861   372.98523174 -8896.20691497  2254.20326636]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.71191475e+03 -3.22965309e-01  7.11334672e+02]
------
Step:24, Action:South
State  136
Old Q Values:  [ -170.77177351  4463.89455936 -2383.80019164   278.75756388]
New Q values:  [ -170.77177351  2483.33584833 -2383.80019164   278.75756388]
Reward: -1  Episode Reward:  36
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2327.92674861   372.98523174 -8896.20691497  2254.20326636]
------
Step:25, Action:North
State  216
Old Q Values:  [ 2327.92674861   372.98523174 -8896.20691497  2254.20326636]
New Q values:  [ 2344.14512314   372.98523174 -8896.20691497  2254.20326636]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.71191475e+03 -3.22965309e-01  7.11334672e+02]
------
Step:26, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.71191475e+03 -3.22965309e-01  7.11334672e+02]
New Q values:  [ 7.64171987e+01  2.58740944e+03 -3.22965309e-01  7.11334672e+02]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2344.14512314   372.98523174 -8896.20691497  2254.20326636]
------
Step:27, Action:North
State  216
Old Q Values:  [ 2344.14512314   372.98523174 -8896.20691497  2254.20326636]
New Q values:  [ 1713.28087982   372.98523174 -8896.20691497  2254.20326636]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.58740944e+03 -3.22965309e-01  7.11334672e+02]
------
Step:28, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.58740944e+03 -3.22965309e-01  7.11334672e+02]
New Q values:  [ 7.64171987e+01  2.58740944e+03 -3.22965309e-01  3.96311712e+02]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   131.01251604   374.59281207]
------
Step:29, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1065.1249337  -1878.84674883]
New Q values:  [-9594.56523706 -8069.05606225  1065.1249337   -731.22607884]
Reward: -1  Episode Reward:  31
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6          69.70873563   65.14560537    0.        ]
------
Step:30, Action:South
State  104
Old Q Values:  [-8652.84        6626.09499492  1078.33520689 -8652.84      ]
New Q values:  [-8652.84        6868.26603854  1078.33520689 -8652.84      ]
Reward: -1  Episode Reward:  30
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[ -739.80237515     0.         14061.4268019      0.        ]
------
Step:31, Action:East
State  185
Old Q Values:  [1168.99359187    0.         3185.98075685 -178.98      ]
New Q values:  [1168.99359187    0.         5913.49664339 -178.98      ]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  15465.68113551  1144.15397381  1141.49622464]
------
Step:32, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  1.34197153e+03 -8.87652194e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  5.93832247e+03 -8.87652194e+03  2.00341972e+02]
Reward: -1  Episode Reward:  28
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4311.00504514 18007.11286699]
------
Step:33, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4780.40601542  7755.46792484]
New Q values:  [-2527.46239811 -8521.23367799  4780.40601542 75252.820638  ]
Reward: 100009  Episode Reward:  100037
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1303.93161572  181.21829111 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 3191.67190776    5.4           0.        ]
New Q values:  [ 221.30610858 1448.85411192    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 555.95116271  274.79279816 -782.40989094  -30.99112081]
------
Step:2, Action:North
State  181
Old Q Values:  [ 555.95116271  274.79279816 -782.40989094  -30.99112081]
New Q values:  [ 612.9599498   274.79279816 -782.40989094  -30.99112081]
Reward: -1  Episode Reward:  8
xxxxx
xa..x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1303.93161572  181.21829111 -120.29354603]
------
Step:3, Action:South
State  110
Old Q Values:  [ -239.29051573 -7599.63948656   411.01964096  -180.6       ]
New Q values:  [ -239.29051573 -7257.27397003   411.01964096  -180.6       ]
Reward: -10001  Episode Reward:  -9993
xxxxx
x ..x
xg .x
x...x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 612.9599498   274.79279816 -782.40989094  -30.99112081]
------
Step:1, Action:North
State  181
Old Q Values:  [ 612.9599498   274.79279816 -782.40989094  -30.99112081]
New Q values:  [ 412.30758911  274.79279816 -782.40989094  -30.99112081]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  539.0786973    -8.57207238 -180.6       ]
------
Step:2, Action:South
State  108
Old Q Values:  [-8463.16477134  1594.76504327   654.62565385     0.        ]
New Q values:  [-8463.16477134  2420.4878419    654.62565385     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xg. x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5653.90440971  1811.65086161  5943.93941531 -4966.32149798]
------
Step:3, Action:East
State  180
Old Q Values:  [-5653.90440971  1811.65086161  5943.93941531 -4966.32149798]
New Q values:  [-5653.90440971  1811.65086161  7022.68010678 -4966.32149798]
Reward: 9  Episode Reward:  17
xxxxx
x . x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  15465.68113551  1144.15397381  1141.49622464]
------
Step:4, Action:South
State  194
Old Q Values:  [-6.00000000e-01  4.74199057e+03  8.02184288e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  4.98341978e+03  8.02184288e+03  1.20371620e+03]
Reward: 9  Episode Reward:  26
xxxxx
x . x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2432.04636114 10270.7451741 ]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4780.40601542 75252.820638  ]
New Q values:  [-2527.46239811 -8521.23367799  5447.1154572  75252.820638  ]
Reward: 9  Episode Reward:  35
xxxxx
x . x
x  .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3028.09477351 -1609.84182322 -8192.20126966 11765.17683679]
------
Step:6, Action:North
State  288
Old Q Values:  [ 3028.09477351 -1609.84182322 -8192.20126966 11765.17683679]
New Q values:  [ 7816.68399504 -1609.84182322 -8192.20126966 11765.17683679]
Reward: 9  Episode Reward:  44
xxxxx
x . x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[22000.15361878  2197.21948284  1542.2004081   3394.63016213]
------
Step:7, Action:North
State  210
Old Q Values:  [22000.15361878  2197.21948284  1542.2004081   3394.63016213]
New Q values:  [28712.19406751  2197.21948284  1542.2004081   3394.63016213]
Reward: -1  Episode Reward:  43
xxxxx
x .ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254  8302.27318729  -180.00807518 66375.77539999]
------
Step:8, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.58740944e+03 -3.22965309e-01  3.96311712e+02]
New Q values:  [ 7.64171987e+01  2.58740944e+03 -3.22965309e-01  3.44261376e+02]
Reward: 9  Episode Reward:  52
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211   601.12230289]
------
Step:9, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   601.12230289]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   464.82558213]
Reward: -1  Episode Reward:  51
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  749.92220323  170.11330766 -252.78192178]
------
Step:10, Action:South
State  107
Old Q Values:  [-252.35169558  749.92220323  170.11330766 -252.78192178]
New Q values:  [-252.35169558  678.53432328  170.11330766 -252.78192178]
Reward: -1  Episode Reward:  50
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 206.23299394    0.         1263.88480662    0.        ]
------
Step:11, Action:East
State  187
Old Q Values:  [ 206.23299394    0.         1263.88480662    0.        ]
New Q values:  [206.23299394   0.         736.95635134   0.        ]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577   773.34142896     0.        ]
------
Step:12, Action:East
State  194
Old Q Values:  [-6.00000000e-01  4.98341978e+03  8.02184288e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  4.98341978e+03  1.18217954e+04  1.20371620e+03]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28712.19406751  2197.21948284  1542.2004081   3394.63016213]
------
Step:13, Action:North
State  216
Old Q Values:  [ 1713.28087982   372.98523174 -8896.20691497  2254.20326636]
New Q values:  [ 1460.93518249   372.98523174 -8896.20691497  2254.20326636]
Reward: -1  Episode Reward:  47
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.58740944e+03 -3.22965309e-01  3.44261376e+02]
------
Step:14, Action:South
State  130
Old Q Values:  [34597.52013254  8302.27318729  -180.00807518 66375.77539999]
New Q values:  [34597.52013254 11933.96749517  -180.00807518 66375.77539999]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28712.19406751  2197.21948284  1542.2004081   3394.63016213]
------
Step:15, Action:North
State  218
Old Q Values:  [  298.44678463 11216.26942023     0.           430.34233803]
New Q values:  [  895.00154441 11216.26942023     0.           430.34233803]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.58740944e+03 -3.22965309e-01  3.44261376e+02]
------
Step:16, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.58740944e+03 -3.22965309e-01  3.44261376e+02]
New Q values:  [ 7.64171987e+01  1.71062475e+03 -3.22965309e-01  3.44261376e+02]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1460.93518249   372.98523174 -8896.20691497  2254.20326636]
------
Step:17, Action:North
State  218
Old Q Values:  [  895.00154441 11216.26942023     0.           430.34233803]
New Q values:  [  870.58804396 11216.26942023     0.           430.34233803]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.71062475e+03 -3.22965309e-01  3.44261376e+02]
------
Step:18, Action:South
State  130
Old Q Values:  [34597.52013254 11933.96749517  -180.00807518 66375.77539999]
New Q values:  [34597.52013254 13386.64521832  -180.00807518 66375.77539999]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28712.19406751  2197.21948284  1542.2004081   3394.63016213]
------
Step:19, Action:North
State  218
Old Q Values:  [  870.58804396 11216.26942023     0.           430.34233803]
New Q values:  [  860.82264378 11216.26942023     0.           430.34233803]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.71062475e+03 -3.22965309e-01  3.44261376e+02]
------
Step:20, Action:South
State  130
Old Q Values:  [34597.52013254 13386.64521832  -180.00807518 66375.77539999]
New Q values:  [34597.52013254 13967.71630758  -180.00807518 66375.77539999]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28712.19406751  2197.21948284  1542.2004081   3394.63016213]
------
Step:21, Action:North
State  216
Old Q Values:  [ 1460.93518249   372.98523174 -8896.20691497  2254.20326636]
New Q values:  [ 1096.96149919   372.98523174 -8896.20691497  2254.20326636]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.71062475e+03 -3.22965309e-01  3.44261376e+02]
------
Step:22, Action:South
State  130
Old Q Values:  [34597.52013254 13967.71630758  -180.00807518 66375.77539999]
New Q values:  [34597.52013254 14200.14474329  -180.00807518 66375.77539999]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28712.19406751  2197.21948284  1542.2004081   3394.63016213]
------
Step:23, Action:North
State  218
Old Q Values:  [  860.82264378 11216.26942023     0.           430.34233803]
New Q values:  [  856.91648371 11216.26942023     0.           430.34233803]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.71062475e+03 -3.22965309e-01  3.44261376e+02]
------
Step:24, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.71062475e+03 -3.22965309e-01  3.44261376e+02]
New Q values:  [ 7.64171987e+01  1.35991088e+03 -3.22965309e-01  3.44261376e+02]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1096.96149919   372.98523174 -8896.20691497  2254.20326636]
------
Step:25, Action:North
State  216
Old Q Values:  [ 1096.96149919   372.98523174 -8896.20691497  2254.20326636]
New Q values:  [ 1183.18535418   372.98523174 -8896.20691497  2254.20326636]
Reward: -1  Episode Reward:  35
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2483.33584833 -2383.80019164   278.75756388]
------
Step:26, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.35991088e+03 -3.22965309e-01  3.44261376e+02]
New Q values:  [ 7.64171987e+01  1.21962533e+03 -3.22965309e-01  3.44261376e+02]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1183.18535418   372.98523174 -8896.20691497  2254.20326636]
------
Step:27, Action:North
State  218
Old Q Values:  [  856.91648371 11216.26942023     0.           430.34233803]
New Q values:  [  708.05419324 11216.26942023     0.           430.34233803]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.21962533e+03 -3.22965309e-01  3.44261376e+02]
------
Step:28, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.21962533e+03 -3.22965309e-01  3.44261376e+02]
New Q values:  [ 7.64171987e+01  3.85213096e+03 -3.22965309e-01  3.44261376e+02]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  708.05419324 11216.26942023     0.           430.34233803]
------
Step:29, Action:North
State  216
Old Q Values:  [ 1183.18535418   372.98523174 -8896.20691497  2254.20326636]
New Q values:  [ 1628.31342939   372.98523174 -8896.20691497  2254.20326636]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.85213096e+03 -3.22965309e-01  3.44261376e+02]
------
Step:30, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.85213096e+03 -3.22965309e-01  3.44261376e+02]
New Q values:  [ 7.64171987e+01  3.85213096e+03 -3.22965309e-01  2.76552225e+02]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211   464.82558213]
------
Step:31, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   131.01251604   374.59281207]
New Q values:  [ -281.736      -3455.78276043   131.01251604   352.79742181]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  678.53432328  170.11330766 -252.78192178]
------
Step:32, Action:South
State  107
Old Q Values:  [-252.35169558  678.53432328  170.11330766 -252.78192178]
New Q values:  [-252.35169558 2044.86272233  170.11330766 -252.78192178]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1168.99359187    0.         5913.49664339 -178.98      ]
------
Step:33, Action:North
State  187
Old Q Values:  [206.23299394   0.         736.95635134   0.        ]
New Q values:  [695.35201428   0.         736.95635134   0.        ]
Reward: -1  Episode Reward:  27
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 2044.86272233  170.11330766 -252.78192178]
------
Step:34, Action:South
State  107
Old Q Values:  [-252.35169558 2044.86272233  170.11330766 -252.78192178]
New Q values:  [-252.35169558 1038.43199433  170.11330766 -252.78192178]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[695.35201428   0.         736.95635134   0.        ]
------
Step:35, Action:East
State  187
Old Q Values:  [695.35201428   0.         736.95635134   0.        ]
New Q values:  [695.35201428   0.         526.18496922   0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577   773.34142896     0.        ]
------
Step:36, Action:East
State  200
Old Q Values:  [   62.8218634  15465.68113551  1144.15397381  1141.49622464]
New Q values:  [   62.8218634  15465.68113551  1133.32256943  1141.49622464]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1628.31342939   372.98523174 -8896.20691497  2254.20326636]
------
Step:37, Action:North
State  216
Old Q Values:  [ 1628.31342939   372.98523174 -8896.20691497  2254.20326636]
New Q values:  [ 1395.72612625   372.98523174 -8896.20691497  2254.20326636]
Reward: -1  Episode Reward:  23
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2483.33584833 -2383.80019164   278.75756388]
------
Step:38, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.85213096e+03 -3.22965309e-01  2.76552225e+02]
New Q values:  [ 7.64171987e+01  2.21651336e+03 -3.22965309e-01  2.76552225e+02]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1395.72612625   372.98523174 -8896.20691497  2254.20326636]
------
Step:39, Action:North
State  216
Old Q Values:  [ 1395.72612625   372.98523174 -8896.20691497  2254.20326636]
New Q values:  [ 1302.691205     372.98523174 -8896.20691497  2254.20326636]
Reward: -1  Episode Reward:  21
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2483.33584833 -2383.80019164   278.75756388]
------
Step:40, Action:South
State  136
Old Q Values:  [ -170.77177351  2483.33584833 -2383.80019164   278.75756388]
New Q values:  [ -170.77177351  1668.99531924 -2383.80019164   278.75756388]
Reward: -1  Episode Reward:  20
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1302.691205     372.98523174 -8896.20691497  2254.20326636]
------
Step:41, Action:West
State  216
Old Q Values:  [ 1302.691205     372.98523174 -8896.20691497  2254.20326636]
New Q values:  [ 1302.691205     372.98523174 -8896.20691497  5540.7856472 ]
Reward: -1  Episode Reward:  19
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  15465.68113551  1133.32256943  1141.49622464]
------
Step:42, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  5.93832247e+03 -8.87652194e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  7.77686285e+03 -8.87652194e+03  2.00341972e+02]
Reward: -1  Episode Reward:  18
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4311.00504514 18007.11286699]
------
Step:43, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5447.1154572  75252.820638  ]
New Q values:  [ -2527.46239811  -8521.23367799   5447.1154572  102251.76172327]
Reward: 100009  Episode Reward:  100027
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28861.93746483 64625.04664387 -4228.04879148  1851.08132227]
------
Step:1, Action:South
State  210
Old Q Values:  [28712.19406751  2197.21948284  1542.2004081   3394.63016213]
New Q values:  [28712.19406751  4413.84084417  1542.2004081   3394.63016213]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7816.68399504 -1609.84182322 -8192.20126966 11765.17683679]
------
Step:2, Action:North
State  288
Old Q Values:  [ 7816.68399504 -1609.84182322 -8192.20126966 11765.17683679]
New Q values:  [11739.73181827 -1609.84182322 -8192.20126966 11765.17683679]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28712.19406751  4413.84084417  1542.2004081   3394.63016213]
------
Step:3, Action:North
State  208
Old Q Values:  [28861.93746483 64625.04664387 -4228.04879148  1851.08132227]
New Q values:  [31462.90760593 64625.04664387 -4228.04879148  1851.08132227]
Reward: 9  Episode Reward:  17
xxxxx
x..ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254 14200.14474329  -180.00807518 66375.77539999]
------
Step:4, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.21651336e+03 -3.22965309e-01  2.76552225e+02]
New Q values:  [ 7.64171987e+01  2.21651336e+03 -3.22965309e-01  2.21860117e+02]
Reward: 9  Episode Reward:  26
xxxxx
x.a x
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   131.01251604   352.79742181]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   131.01251604   352.79742181]
New Q values:  [ -281.736      -3455.78276043   131.01251604   239.75270846]
Reward: 9  Episode Reward:  35
xxxxx
xa  x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   310.77913245  -180.6       ]
------
Step:6, Action:East
State  106
Old Q Values:  [ -180.6        -8952.15415062   310.77913245  -180.6       ]
New Q values:  [ -180.6        -8952.15415062   195.63746552  -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   131.01251604   239.75270846]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   131.01251604   239.75270846]
New Q values:  [ -281.736      -3455.78276043   131.01251604   153.99232304]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   195.63746552  -180.6       ]
------
Step:8, Action:East
State  106
Old Q Values:  [ -180.6        -8952.15415062   195.63746552  -180.6       ]
New Q values:  [ -180.6        -8952.15415062   123.85268312  -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   131.01251604   153.99232304]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -3455.78276043   131.01251604   153.99232304]
New Q values:  [ -281.736      -3455.78276043   131.01251604    98.15273415]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   123.85268312  -180.6       ]
------
Step:10, Action:East
State  107
Old Q Values:  [-252.35169558 1038.43199433  170.11330766 -252.78192178]
New Q values:  [-252.35169558 1038.43199433  106.74907788 -252.78192178]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -3455.78276043   131.01251604    98.15273415]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -3455.78276043   131.01251604    98.15273415]
New Q values:  [ -281.736      -3455.78276043   716.75901548    98.15273415]
Reward: -1  Episode Reward:  29
xxxxx
x  ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.21651336e+03 -3.22965309e-01  2.21860117e+02]
------
Step:12, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.21651336e+03 -3.22965309e-01  2.21860117e+02]
New Q values:  [ 7.64171987e+01  4.25088617e+03 -3.22965309e-01  2.21860117e+02]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  708.05419324 11216.26942023     0.           430.34233803]
------
Step:13, Action:North
State  216
Old Q Values:  [ 1302.691205     372.98523174 -8896.20691497  5540.7856472 ]
New Q values:  [ 1795.74233344   372.98523174 -8896.20691497  5540.7856472 ]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.25088617e+03 -3.22965309e-01  2.21860117e+02]
------
Step:14, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  4.25088617e+03 -3.22965309e-01  2.21860117e+02]
New Q values:  [ 7.64171987e+01  4.25088617e+03 -3.22965309e-01  2.27591721e+02]
Reward: -1  Episode Reward:  26
xxxxx
x a x
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211   464.82558213]
------
Step:15, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   464.82558213]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   496.85983115]
Reward: -1  Episode Reward:  25
xxxxx
xa  x
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1038.43199433  106.74907788 -252.78192178]
------
Step:16, Action:South
State  107
Old Q Values:  [-252.35169558 1038.43199433  106.74907788 -252.78192178]
New Q values:  [-252.35169558 2194.82179075  106.74907788 -252.78192178]
Reward: 9  Episode Reward:  34
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1168.99359187    0.         5913.49664339 -178.98      ]
------
Step:17, Action:North
State  184
Old Q Values:  [ -739.80237515     0.         14061.4268019      0.        ]
New Q values:  [ -259.36514512     0.         14061.4268019      0.        ]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   123.85268312  -180.6       ]
------
Step:18, Action:East
State  110
Old Q Values:  [ -239.29051573 -7257.27397003   411.01964096  -180.6       ]
New Q values:  [ -239.29051573 -7257.27397003   378.83556103  -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x   x
xg. x
xxxxx
Step:19, Action:South
State  122
Old Q Values:  [ -281.736      -3455.78276043   716.75901548    98.15273415]
New Q values:  [ -281.736      -1150.91067548   716.75901548    98.15273415]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577   773.34142896     0.        ]
------
Step:20, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.87289448e+04 1.90998438e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 1.87289448e+04 4.12827458e+03 0.00000000e+00]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  708.05419324 11216.26942023     0.           430.34233803]
------
Step:21, Action:North
State  218
Old Q Values:  [  708.05419324 11216.26942023     0.           430.34233803]
New Q values:  [ 1557.88752874 11216.26942023     0.           430.34233803]
Reward: -1  Episode Reward:  29
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.25088617e+03 -3.22965309e-01  2.27591721e+02]
------
Step:22, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.25088617e+03 -3.22965309e-01  2.27591721e+02]
New Q values:  [ 7.64171987e+01  5.06463529e+03 -3.22965309e-01  2.27591721e+02]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 1557.88752874 11216.26942023     0.           430.34233803]
------
Step:23, Action:South
State  218
Old Q Values:  [ 1557.88752874 11216.26942023     0.           430.34233803]
New Q values:  [1557.88752874 8015.46081913    0.          430.34233803]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11739.73181827 -1609.84182322 -8192.20126966 11765.17683679]
------
Step:24, Action:North
State  288
Old Q Values:  [11739.73181827 -1609.84182322 -8192.20126966 11765.17683679]
New Q values:  [ 7099.93097305 -1609.84182322 -8192.20126966 11765.17683679]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1557.88752874 8015.46081913    0.          430.34233803]
------
Step:25, Action:South
State  218
Old Q Values:  [1557.88752874 8015.46081913    0.          430.34233803]
New Q values:  [1557.88752874 6735.13737869    0.          430.34233803]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7099.93097305 -1609.84182322 -8192.20126966 11765.17683679]
------
Step:26, Action:North
State  288
Old Q Values:  [ 7099.93097305 -1609.84182322 -8192.20126966 11765.17683679]
New Q values:  [ 4859.91360282 -1609.84182322 -8192.20126966 11765.17683679]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1557.88752874 6735.13737869    0.          430.34233803]
------
Step:27, Action:North
State  216
Old Q Values:  [ 1795.74233344   372.98523174 -8896.20691497  5540.7856472 ]
New Q values:  [ 2237.08752178   372.98523174 -8896.20691497  5540.7856472 ]
Reward: -1  Episode Reward:  23
xxxxx
x  ax
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.06463529e+03 -3.22965309e-01  2.27591721e+02]
------
Step:28, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  5.06463529e+03 -3.22965309e-01  2.27591721e+02]
New Q values:  [ 7.64171987e+01  5.06463529e+03 -3.22965309e-01  2.39494638e+02]
Reward: -1  Episode Reward:  22
xxxxx
x a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211   496.85983115]
------
Step:29, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   496.85983115]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   856.59046969]
Reward: -1  Episode Reward:  21
xxxxx
xa  x
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 2194.82179075  106.74907788 -252.78192178]
------
Step:30, Action:South
State  107
Old Q Values:  [-252.35169558 2194.82179075  106.74907788 -252.78192178]
New Q values:  [-252.35169558 2651.37770932  106.74907788 -252.78192178]
Reward: -1  Episode Reward:  20
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1168.99359187    0.         5913.49664339 -178.98      ]
------
Step:31, Action:North
State  185
Old Q Values:  [1168.99359187    0.         5913.49664339 -178.98      ]
New Q values:  [1262.41074955    0.         5913.49664339 -178.98      ]
Reward: -1  Episode Reward:  19
xxxxx
xa  x
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 2651.37770932  106.74907788 -252.78192178]
------
Step:32, Action:South
State  107
Old Q Values:  [-252.35169558 2651.37770932  106.74907788 -252.78192178]
New Q values:  [-252.35169558 2834.00007675  106.74907788 -252.78192178]
Reward: -1  Episode Reward:  18
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1262.41074955    0.         5913.49664339 -178.98      ]
------
Step:33, Action:North
State  185
Old Q Values:  [1262.41074955    0.         5913.49664339 -178.98      ]
New Q values:  [1354.56432284    0.         5913.49664339 -178.98      ]
Reward: -1  Episode Reward:  17
xxxxx
xa  x
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 2834.00007675  106.74907788 -252.78192178]
------
Step:34, Action:South
State  105
Old Q Values:  [-180.6          69.70873563   65.14560537    0.        ]
New Q values:  [-180.6        1801.33248727   65.14560537    0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x  gx
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1354.56432284    0.         5913.49664339 -178.98      ]
------
Step:35, Action:East
State  185
Old Q Values:  [1354.56432284    0.         5913.49664339 -178.98      ]
New Q values:  [1354.56432284    0.         7004.50299801 -178.98      ]
Reward: -1  Episode Reward:  15
xxxxx
x g x
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  15465.68113551  1133.32256943  1141.49622464]
------
Step:36, Action:South
State  200
Old Q Values:  [   62.8218634  15465.68113551  1133.32256943  1141.49622464]
New Q values:  [   62.8218634  36867.20097118  1133.32256943  1141.49622464]
Reward: 9  Episode Reward:  24
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -2527.46239811  -8521.23367799   5447.1154572  102251.76172327]
------
Step:37, Action:West
State  272
Old Q Values:  [ -2527.46239811  -8521.23367799   5447.1154572  102251.76172327]
New Q values:  [ -2527.46239811  -8521.23367799   5447.1154572  113051.33815737]
Reward: 100009  Episode Reward:  100033
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -2527.46239811  -8521.23367799   5447.1154572  113051.33815737]
------
Step:1, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2432.04636114 10270.7451741 ]
New Q values:  [   16.82637525 -5807.06396197  2432.04636114  6458.50791606]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2147.15656639 -2735.46306511  7816.03282141 -2601.74710518]
------
Step:2, Action:East
State  260
Old Q Values:  [ 2147.15656639 -2735.46306511  7816.03282141 -2601.74710518]
New Q values:  [ 2147.15656639 -2735.46306511 37041.21457578 -2601.74710518]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -2527.46239811  -8521.23367799   5447.1154572  113051.33815737]
------
Step:3, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2432.04636114  6458.50791606]
New Q values:  [   16.82637525 -5807.06396197  2432.04636114  4045.5429541 ]
Reward: -1  Episode Reward:  7
xxxxx
x g.x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4875.79929224   26.73544252 2949.0999536   -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [4875.79929224   26.73544252 2949.0999536   -35.88578819]
New Q values:  [2079.41199363   26.73544252 2949.0999536   -35.88578819]
Reward: 9  Episode Reward:  16
xxxxx
x ..x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 412.30758911  274.79279816 -782.40989094  -30.99112081]
------
Step:5, Action:North
State  180
Old Q Values:  [-5653.90440971  1811.65086161  7022.68010678 -4966.32149798]
New Q values:  [-2148.51109557  1811.65086161  7022.68010678 -4966.32149798]
Reward: -1  Episode Reward:  15
xxxxx
xa..x
xg..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -7257.27397003   378.83556103  -180.6       ]
------
Step:6, Action:East
State  108
Old Q Values:  [-8463.16477134  2420.4878419    654.62565385     0.        ]
New Q values:  [-8463.16477134  2420.4878419    809.24484357     0.        ]
Reward: 9  Episode Reward:  24
xxxxx
xga.x
x ..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 1806.64860675  963.6944397 ]
------
Step:7, Action:East
State  126
Old Q Values:  [   0.          331.64678262 2371.22296532 1134.18629274]
New Q values:  [   0.          331.64678262 2473.27977453 1134.18629274]
Reward: 9  Episode Reward:  33
xxxxx
x  ax
xg..x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.06463529e+03 -3.22965309e-01  2.39494638e+02]
------
Step:8, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  5.06463529e+03 -3.22965309e-01  2.39494638e+02]
New Q values:  [ 7.64171987e+01  1.06449123e+04 -3.22965309e-01  2.39494638e+02]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x .ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28712.19406751  4413.84084417  1542.2004081   3394.63016213]
------
Step:9, Action:North
State  210
Old Q Values:  [28712.19406751  4413.84084417  1542.2004081   3394.63016213]
New Q values:  [14677.75132844  4413.84084417  1542.2004081   3394.63016213]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x . x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.06449123e+04 -3.22965309e-01  2.39494638e+02]
------
Step:10, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.06449123e+04 -3.22965309e-01  2.39494638e+02]
New Q values:  [ 7.64171987e+01  8.66069033e+03 -3.22965309e-01  2.39494638e+02]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x .ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[14677.75132844  4413.84084417  1542.2004081   3394.63016213]
------
Step:11, Action:North
State  210
Old Q Values:  [14677.75132844  4413.84084417  1542.2004081   3394.63016213]
New Q values:  [8468.70763151 4413.84084417 1542.2004081  3394.63016213]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
x . x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  8.66069033e+03 -3.22965309e-01  2.39494638e+02]
------
Step:12, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  8.66069033e+03 -3.22965309e-01  2.39494638e+02]
New Q values:  [ 7.64171987e+01  6.00428842e+03 -3.22965309e-01  2.39494638e+02]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8468.70763151 4413.84084417 1542.2004081  3394.63016213]
------
Step:13, Action:North
State  208
Old Q Values:  [31462.90760593 64625.04664387 -4228.04879148  1851.08132227]
New Q values:  [14385.84956926 64625.04664387 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
x .gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  6.00428842e+03 -3.22965309e-01  2.39494638e+02]
------
Step:14, Action:West
State  136
Old Q Values:  [ -170.77177351  1668.99531924 -2383.80019164   278.75756388]
New Q values:  [ -170.77177351  1668.99531924 -2383.80019164   204.21722834]
Reward: -1  Episode Reward:  36
xxxxx
x agx
x . x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:15, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1065.1249337   -731.22607884]
New Q values:  [-9594.56523706 -8069.05606225  1065.1249337   -131.36682235]
Reward: -1  Episode Reward:  35
xxxxx
xag x
x . x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  539.0786973    -8.57207238 -180.6       ]
------
Step:16, Action:South
State  109
Old Q Values:  [-241.10880094  539.0786973    -8.57207238 -180.6       ]
New Q values:  [-241.10880094  338.72375565   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
xa. x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 412.30758911  274.79279816 -782.40989094  -30.99112081]
------
Step:17, Action:North
State  181
Old Q Values:  [ 412.30758911  274.79279816 -782.40989094  -30.99112081]
New Q values:  [ 265.94016234  274.79279816 -782.40989094  -30.99112081]
Reward: -1  Episode Reward:  33
xxxxx
xag x
x . x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  338.72375565   -8.57207238 -180.6       ]
------
Step:18, Action:South
State  108
Old Q Values:  [-8463.16477134  2420.4878419    809.24484357     0.        ]
New Q values:  [-8463.16477134  3074.39916879   809.24484357     0.        ]
Reward: -1  Episode Reward:  32
xxxxx
xg  x
xa. x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2148.51109557  1811.65086161  7022.68010678 -4966.32149798]
------
Step:19, Action:East
State  188
Old Q Values:  [-6523.78898263  6180.80025855  1223.43046172     0.        ]
New Q values:  [-6523.78898263  6180.80025855   801.0811495      0.        ]
Reward: 9  Episode Reward:  41
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.          977.00865364 1021.02988271  441.58769553]
------
Step:20, Action:East
State  204
Old Q Values:  [   0.          977.00865364 1021.02988271  441.58769553]
New Q values:  [   0.          977.00865364 2070.04764724  441.58769553]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2237.08752178   372.98523174 -8896.20691497  5540.7856472 ]
------
Step:21, Action:West
State  216
Old Q Values:  [ 2237.08752178   372.98523174 -8896.20691497  5540.7856472 ]
New Q values:  [ 2237.08752178   372.98523174 -8896.20691497  2836.72855305]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.          977.00865364 2070.04764724  441.58769553]
------
Step:22, Action:East
State  204
Old Q Values:  [   0.          977.00865364 2070.04764724  441.58769553]
New Q values:  [   0.          977.00865364 1678.43762481  441.58769553]
Reward: -1  Episode Reward:  38
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2237.08752178   372.98523174 -8896.20691497  2836.72855305]
------
Step:23, Action:West
State  216
Old Q Values:  [ 2237.08752178   372.98523174 -8896.20691497  2836.72855305]
New Q values:  [ 2237.08752178   372.98523174 -8896.20691497  1637.62270866]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.          977.00865364 1678.43762481  441.58769553]
------
Step:24, Action:East
State  204
Old Q Values:  [   0.          977.00865364 1678.43762481  441.58769553]
New Q values:  [   0.          977.00865364 1341.90130646  441.58769553]
Reward: -1  Episode Reward:  36
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2237.08752178   372.98523174 -8896.20691497  1637.62270866]
------
Step:25, Action:North
State  216
Old Q Values:  [ 2237.08752178   372.98523174 -8896.20691497  1637.62270866]
New Q values:  [ 1394.93360448   372.98523174 -8896.20691497  1637.62270866]
Reward: -1  Episode Reward:  35
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1668.99531924 -2383.80019164   204.21722834]
------
Step:26, Action:South
State  136
Old Q Values:  [ -170.77177351  1668.99531924 -2383.80019164   204.21722834]
New Q values:  [ -170.77177351  1158.28494029 -2383.80019164   204.21722834]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1394.93360448   372.98523174 -8896.20691497  1637.62270866]
------
Step:27, Action:West
State  216
Old Q Values:  [ 1394.93360448   372.98523174 -8896.20691497  1637.62270866]
New Q values:  [ 1394.93360448   372.98523174 -8896.20691497   776.32197668]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         406.24297738   0.         198.38683706]
------
Step:28, Action:South
State  204
Old Q Values:  [   0.          977.00865364 1341.90130646  441.58769553]
New Q values:  [   0.         1603.86634769 1341.90130646  441.58769553]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2432.04636114  4045.5429541 ]
------
Step:29, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2432.04636114  4045.5429541 ]
New Q values:  [   16.82637525 -5807.06396197  2432.04636114  2502.34716772]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2079.41199363   26.73544252 2949.0999536   -35.88578819]
------
Step:30, Action:North
State  261
Old Q Values:  [2079.41199363   26.73544252 2949.0999536   -35.88578819]
New Q values:  [2319.02722241   26.73544252 2949.0999536   -35.88578819]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849  540.18012891 4959.54141652  154.04646645]
------
Step:31, Action:South
State  189
Old Q Values:  [  64.81505849  540.18012891 4959.54141652  154.04646645]
New Q values:  [  64.81505849 1100.20203764 4959.54141652  154.04646645]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2319.02722241   26.73544252 2949.0999536   -35.88578819]
------
Step:32, Action:East
State  260
Old Q Values:  [ 2147.15656639 -2735.46306511 37041.21457578 -2601.74710518]
New Q values:  [ 2147.15656639 -2735.46306511 15566.58998063 -2601.74710518]
Reward: -1  Episode Reward:  28
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2432.04636114  2502.34716772]
------
Step:33, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2432.04636114  2502.34716772]
New Q values:  [   16.82637525 -5807.06396197  2432.04636114  5670.31586128]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2147.15656639 -2735.46306511 15566.58998063 -2601.74710518]
------
Step:34, Action:East
State  261
Old Q Values:  [2319.02722241   26.73544252 2949.0999536   -35.88578819]
New Q values:  [2319.02722241   26.73544252 2880.13473982  -35.88578819]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2432.04636114  5670.31586128]
------
Step:35, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2432.04636114  5670.31586128]
New Q values:  [   16.82637525 -5807.06396197  2432.04636114  3131.56676646]
Reward: -1  Episode Reward:  25
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2319.02722241   26.73544252 2880.13473982  -35.88578819]
------
Step:36, Action:East
State  261
Old Q Values:  [2319.02722241   26.73544252 2880.13473982  -35.88578819]
New Q values:  [2319.02722241   26.73544252 1346.97172023  -35.88578819]
Reward: -1  Episode Reward:  24
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         -29.77444073 651.72608099]
------
Step:37, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2432.04636114  3131.56676646]
New Q values:  [   16.82637525 -5807.06396197  2432.04636114  1947.73487331]
Reward: -1  Episode Reward:  23
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2319.02722241   26.73544252 1346.97172023  -35.88578819]
------
Step:38, Action:North
State  261
Old Q Values:  [2319.02722241   26.73544252 1346.97172023  -35.88578819]
New Q values:  [2414.87331392   26.73544252 1346.97172023  -35.88578819]
Reward: -1  Episode Reward:  22
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849 1100.20203764 4959.54141652  154.04646645]
------
Step:39, Action:South
State  188
Old Q Values:  [-6523.78898263  6180.80025855   801.0811495      0.        ]
New Q values:  [-6523.78898263  7141.69709761   801.0811495      0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2147.15656639 -2735.46306511 15566.58998063 -2601.74710518]
------
Step:40, Action:East
State  261
Old Q Values:  [2414.87331392   26.73544252 1346.97172023  -35.88578819]
New Q values:  [2414.87331392   26.73544252 1267.80259643  -35.88578819]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2432.04636114  1947.73487331]
------
Step:41, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549  4311.00504514 18007.11286699]
New Q values:  [  870.35122762  -168.92307549 65259.35506909 18007.11286699]
Reward: 100009  Episode Reward:  100029
xxxxx
x   x
x  gx
x  ax
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -7257.27397003   378.83556103  -180.6       ]
------
Step:1, Action:East
State  108
Old Q Values:  [-8463.16477134  3074.39916879   809.24484357     0.        ]
New Q values:  [-8463.16477134  3074.39916879   648.63541754     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xga.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1065.1249337   -131.36682235]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1065.1249337   -131.36682235]
New Q values:  [-9594.56523706 -8069.05606225   778.93545557  -131.36682235]
Reward: 9  Episode Reward:  18
xxxxx
x gax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1158.28494029 -2383.80019164   204.21722834]
------
Step:3, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  6.00428842e+03 -3.22965309e-01  2.39494638e+02]
New Q values:  [ 7.64171987e+01  2.82559545e+03 -3.22965309e-01  2.39494638e+02]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1394.93360448   372.98523174 -8896.20691497   776.32197668]
------
Step:4, Action:North
State  216
Old Q Values:  [ 1394.93360448   372.98523174 -8896.20691497   776.32197668]
New Q values:  [ 1405.05207695   372.98523174 -8896.20691497   776.32197668]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.82559545e+03 -3.22965309e-01  2.39494638e+02]
------
Step:5, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.82559545e+03 -3.22965309e-01  2.39494638e+02]
New Q values:  [ 7.64171987e+01  1.55115380e+03 -3.22965309e-01  2.39494638e+02]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1405.05207695   372.98523174 -8896.20691497   776.32197668]
------
Step:6, Action:North
State  218
Old Q Values:  [1557.88752874 6735.13737869    0.          430.34233803]
New Q values:  [1087.90115249 6735.13737869    0.          430.34233803]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.55115380e+03 -3.22965309e-01  2.39494638e+02]
------
Step:7, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.55115380e+03 -3.22965309e-01  2.39494638e+02]
New Q values:  [ 7.64171987e+01  2.64040273e+03 -3.22965309e-01  2.39494638e+02]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x .ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1087.90115249 6735.13737869    0.          430.34233803]
------
Step:8, Action:North
State  216
Old Q Values:  [ 1405.05207695   372.98523174 -8896.20691497   776.32197668]
New Q values:  [ 1353.54165126   372.98523174 -8896.20691497   776.32197668]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.64040273e+03 -3.22965309e-01  2.39494638e+02]
------
Step:9, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.64040273e+03 -3.22965309e-01  2.39494638e+02]
New Q values:  [ 7.64171987e+01  2.64040273e+03 -3.22965309e-01  3.52174996e+02]
Reward: -1  Episode Reward:  21
xxxxx
x a x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211   856.59046969]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   716.75901548    98.15273415]
New Q values:  [ -281.736      -1150.91067548   716.75901548   429.84057838]
Reward: -1  Episode Reward:  20
xxxxx
xa  x
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1303.93161572  181.21829111 -120.29354603]
------
Step:11, Action:South
State  111
Old Q Values:  [-177.44732869 1303.93161572  181.21829111 -120.29354603]
New Q values:  [-177.44732869 2008.83507124  181.21829111 -120.29354603]
Reward: -1  Episode Reward:  19
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849 1100.20203764 4959.54141652  154.04646645]
------
Step:12, Action:South
State  189
Old Q Values:  [  64.81505849 1100.20203764 4959.54141652  154.04646645]
New Q values:  [  64.81505849 1169.94280923 4959.54141652  154.04646645]
Reward: 9  Episode Reward:  28
xxxxx
x g x
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2414.87331392   26.73544252 1267.80259643  -35.88578819]
------
Step:13, Action:North
State  260
Old Q Values:  [ 2147.15656639 -2735.46306511 15566.58998063 -2601.74710518]
New Q values:  [ 3000.77175584 -2735.46306511 15566.58998063 -2601.74710518]
Reward: -1  Episode Reward:  27
xxxxx
xg  x
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  7141.69709761   801.0811495      0.        ]
------
Step:14, Action:South
State  189
Old Q Values:  [  64.81505849 1169.94280923 4959.54141652  154.04646645]
New Q values:  [  64.81505849 1191.83911787 4959.54141652  154.04646645]
Reward: -1  Episode Reward:  26
xxxxx
x g x
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2414.87331392   26.73544252 1267.80259643  -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [2414.87331392   26.73544252 1267.80259643  -35.88578819]
New Q values:  [2453.21175052   26.73544252 1267.80259643  -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849 1191.83911787 4959.54141652  154.04646645]
------
Step:16, Action:South
State  183
Old Q Values:  [ 877.23516594  284.46413567 7592.82760041    0.        ]
New Q values:  [ 877.23516594  849.14917943 7592.82760041    0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x . x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2453.21175052   26.73544252 1267.80259643  -35.88578819]
------
Step:17, Action:North
State  260
Old Q Values:  [ 3000.77175584 -2735.46306511 15566.58998063 -2601.74710518]
New Q values:  [ 1279.74734295 -2735.46306511 15566.58998063 -2601.74710518]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xa. x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -5.70379540e+03  2.66795469e+02  0.00000000e+00]
------
Step:18, Action:East
State  188
Old Q Values:  [-6523.78898263  7141.69709761   801.0811495      0.        ]
New Q values:  [-6523.78898263  7141.69709761 11385.99275116     0.        ]
Reward: 9  Episode Reward:  32
xxxxx
x   x
xga x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  36867.20097118  1133.32256943  1141.49622464]
------
Step:19, Action:South
State  206
Old Q Values:  [  0.         135.48456638 599.85796923   0.        ]
New Q values:  [  0.         789.20773489 599.85796923   0.        ]
Reward: 9  Episode Reward:  41
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2432.04636114  1947.73487331]
------
Step:20, Action:East
State  272
Old Q Values:  [ -2527.46239811  -8521.23367799   5447.1154572  113051.33815737]
New Q values:  [ -2527.46239811  -8521.23367799  65713.79923392 113051.33815737]
Reward: 100009  Episode Reward:  100050
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -2527.46239811  -8521.23367799  65713.79923392 113051.33815737]
------
Step:1, Action:East
State  272
Old Q Values:  [ -2527.46239811  -8521.23367799  65713.79923392 113051.33815737]
New Q values:  [ -2527.46239811  -8521.23367799  29820.4727446  113051.33815737]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4859.91360282 -1609.84182322 -8192.20126966 11765.17683679]
------
Step:2, Action:West
State  288
Old Q Values:  [ 4859.91360282 -1609.84182322 -8192.20126966 11765.17683679]
New Q values:  [ 4859.91360282 -1609.84182322 -8192.20126966 38620.87218193]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -2527.46239811  -8521.23367799  29820.4727446  113051.33815737]
------
Step:3, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549 65259.35506909 18007.11286699]
New Q values:  [  870.35122762  -168.92307549 65259.35506909  7938.20867195]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2453.21175052   26.73544252 1267.80259643  -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [2453.21175052   26.73544252 1267.80259643  -35.88578819]
New Q values:  [1069.12253966   26.73544252 1267.80259643  -35.88578819]
Reward: 9  Episode Reward:  16
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 265.94016234  274.79279816 -782.40989094  -30.99112081]
------
Step:5, Action:South
State  181
Old Q Values:  [ 265.94016234  274.79279816 -782.40989094  -30.99112081]
New Q values:  [ 265.94016234  489.65789819 -782.40989094  -30.99112081]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1069.12253966   26.73544252 1267.80259643  -35.88578819]
------
Step:6, Action:East
State  261
Old Q Values:  [1069.12253966   26.73544252 1267.80259643  -35.88578819]
New Q values:  [ 1.06912254e+03  2.67354425e+01  3.44219225e+04 -3.58857882e+01]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -2527.46239811  -8521.23367799  29820.4727446  113051.33815737]
------
Step:7, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549 65259.35506909  7938.20867195]
New Q values:  [  870.35122762  -168.92307549 65259.35506909 13501.26021452]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1.06912254e+03  2.67354425e+01  3.44219225e+04 -3.58857882e+01]
------
Step:8, Action:East
State  261
Old Q Values:  [ 1.06912254e+03  2.67354425e+01  3.44219225e+04 -3.58857882e+01]
New Q values:  [ 1.06912254e+03  2.67354425e+01  3.33459755e+04 -3.58857882e+01]
Reward: -1  Episode Reward:  12
xxxxx
x..gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 65259.35506909 13501.26021452]
------
Step:9, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549 65259.35506909 13501.26021452]
New Q values:  [  870.35122762  -168.92307549 37689.40368222 13501.26021452]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4859.91360282 -1609.84182322 -8192.20126966 38620.87218193]
------
Step:10, Action:West
State  288
Old Q Values:  [ 4859.91360282 -1609.84182322 -8192.20126966 38620.87218193]
New Q values:  [ 4859.91360282 -1609.84182322 -8192.20126966 26754.56997744]
Reward: -1  Episode Reward:  10
xxxxx
x..gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 37689.40368222 13501.26021452]
------
Step:11, Action:East
State  272
Old Q Values:  [ -2527.46239811  -8521.23367799  29820.4727446  113051.33815737]
New Q values:  [ -2527.46239811  -8521.23367799  19953.96009107 113051.33815737]
Reward: -1  Episode Reward:  9
xxxxx
x.g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4859.91360282 -1609.84182322 -8192.20126966 26754.56997744]
------
Step:12, Action:West
State  288
Old Q Values:  [ 4859.91360282 -1609.84182322 -8192.20126966 26754.56997744]
New Q values:  [ 4859.91360282 -1609.84182322 -8192.20126966 22008.04909564]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 37689.40368222 13501.26021452]
------
Step:13, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549 37689.40368222 13501.26021452]
New Q values:  [  870.35122762  -168.92307549 21677.57620158 13501.26021452]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4859.91360282 -1609.84182322 -8192.20126966 22008.04909564]
------
Step:14, Action:West
State  288
Old Q Values:  [ 4859.91360282 -1609.84182322 -8192.20126966 22008.04909564]
New Q values:  [ 4859.91360282 -1609.84182322 -8192.20126966 15305.89249873]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x ..x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 21677.57620158 13501.26021452]
------
Step:15, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549 21677.57620158 13501.26021452]
New Q values:  [  870.35122762  -168.92307549 21677.57620158 15403.69674032]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1.06912254e+03  2.67354425e+01  3.33459755e+04 -3.58857882e+01]
------
Step:16, Action:East
State  261
Old Q Values:  [ 1.06912254e+03  2.67354425e+01  3.33459755e+04 -3.58857882e+01]
New Q values:  [ 1069.12253966    26.73544252 19841.06306649   -35.88578819]
Reward: -1  Episode Reward:  4
xxxxx
x..gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 21677.57620158 15403.69674032]
------
Step:17, Action:East
State  272
Old Q Values:  [ -2527.46239811  -8521.23367799  19953.96009107 113051.33815737]
New Q values:  [ -2527.46239811  -8521.23367799  12572.75178605 113051.33815737]
Reward: -1  Episode Reward:  3
xxxxx
x.g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4859.91360282 -1609.84182322 -8192.20126966 15305.89249873]
------
Step:18, Action:West
State  288
Old Q Values:  [ 4859.91360282 -1609.84182322 -8192.20126966 15305.89249873]
New Q values:  [ 4859.91360282 -1609.84182322 -8192.20126966 12625.02985997]
Reward: -1  Episode Reward:  2
xxxxx
x..gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 21677.57620158 15403.69674032]
------
Step:19, Action:East
State  272
Old Q Values:  [ -2527.46239811  -8521.23367799  12572.75178605 113051.33815737]
New Q values:  [ -2527.46239811  -8521.23367799   8816.00967241 113051.33815737]
Reward: -1  Episode Reward:  1
xxxxx
x.g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4859.91360282 -1609.84182322 -8192.20126966 12625.02985997]
------
Step:20, Action:West
State  288
Old Q Values:  [ 4859.91360282 -1609.84182322 -8192.20126966 12625.02985997]
New Q values:  [ 4859.91360282 -1609.84182322 -8192.20126966 38964.8133912 ]
Reward: -1  Episode Reward:  0
xxxxx
xg..x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -2527.46239811  -8521.23367799   8816.00967241 113051.33815737]
------
Step:21, Action:West
State  272
Old Q Values:  [ -2527.46239811  -8521.23367799   8816.00967241 113051.33815737]
New Q values:  [-2527.46239811 -8521.23367799  8816.00967241 51172.2541829 ]
Reward: -1  Episode Reward:  -1
xxxxx
x.g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1069.12253966    26.73544252 19841.06306649   -35.88578819]
------
Step:22, Action:East
State  260
Old Q Values:  [ 1279.74734295 -2735.46306511 15566.58998063 -2601.74710518]
New Q values:  [ 1279.74734295 -2735.46306511 21577.71224712 -2601.74710518]
Reward: -1  Episode Reward:  -2
xxxxx
xg..x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8816.00967241 51172.2541829 ]
------
Step:23, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8816.00967241 51172.2541829 ]
New Q values:  [-2527.46239811 -8521.23367799  8816.00967241 26420.62059311]
Reward: -1  Episode Reward:  -3
xxxxx
x.g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1069.12253966    26.73544252 19841.06306649   -35.88578819]
------
Step:24, Action:East
State  260
Old Q Values:  [ 1279.74734295 -2735.46306511 21577.71224712 -2601.74710518]
New Q values:  [ 1279.74734295 -2735.46306511 16556.67107678 -2601.74710518]
Reward: -1  Episode Reward:  -4
xxxxx
xg..x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8816.00967241 26420.62059311]
------
Step:25, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8816.00967241 26420.62059311]
New Q values:  [-2527.46239811 -8521.23367799  8816.00967241 16519.96715719]
Reward: -1  Episode Reward:  -5
xxxxx
x.g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1069.12253966    26.73544252 19841.06306649   -35.88578819]
------
Step:26, Action:East
State  261
Old Q Values:  [ 1069.12253966    26.73544252 19841.06306649   -35.88578819]
New Q values:  [ 1069.12253966    26.73544252 12891.81537375   -35.88578819]
Reward: -1  Episode Reward:  -6
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8816.00967241 16519.96715719]
------
Step:27, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8816.00967241 16519.96715719]
New Q values:  [-2527.46239811 -8521.23367799  8816.00967241 11574.38818591]
Reward: -1  Episode Reward:  -7
xxxxx
x...x
xg..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1279.74734295 -2735.46306511 16556.67107678 -2601.74710518]
------
Step:28, Action:East
State  260
Old Q Values:  [ 1279.74734295 -2735.46306511 16556.67107678 -2601.74710518]
New Q values:  [ 1279.74734295 -2735.46306511 10094.38488648 -2601.74710518]
Reward: -1  Episode Reward:  -8
xxxxx
x...x
x ..x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8816.00967241 11574.38818591]
------
Step:29, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8816.00967241 11574.38818591]
New Q values:  [-2527.46239811 -8521.23367799 15215.24788632 11574.38818591]
Reward: -1  Episode Reward:  -9
xxxxx
x...x
x ..x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4859.91360282 -1609.84182322 -8192.20126966 38964.8133912 ]
------
Step:30, Action:North
State  288
Old Q Values:  [ 4859.91360282 -1609.84182322 -8192.20126966 38964.8133912 ]
New Q values:  [21336.87943429 -1609.84182322 -8192.20126966 38964.8133912 ]
Reward: 9  Episode Reward:  0
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14385.84956926 64625.04664387 -4228.04879148  1851.08132227]
------
Step:31, Action:South
State  208
Old Q Values:  [14385.84956926 64625.04664387 -4228.04879148  1851.08132227]
New Q values:  [14385.84956926 37538.86267491 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  -1
xxxxx
x.g.x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21336.87943429 -1609.84182322 -8192.20126966 38964.8133912 ]
------
Step:32, Action:West
State  288
Old Q Values:  [21336.87943429 -1609.84182322 -8192.20126966 38964.8133912 ]
New Q values:  [21336.87943429 -1609.84182322 -8192.20126966 20149.89972238]
Reward: -1  Episode Reward:  -2
xxxxx
xg..x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 15215.24788632 11574.38818591]
------
Step:33, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 15215.24788632 11574.38818591]
New Q values:  [-2527.46239811 -8521.23367799 12486.56298482 11574.38818591]
Reward: -1  Episode Reward:  -3
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21336.87943429 -1609.84182322 -8192.20126966 20149.89972238]
------
Step:34, Action:North
State  288
Old Q Values:  [21336.87943429 -1609.84182322 -8192.20126966 20149.89972238]
New Q values:  [11074.76406317 -1609.84182322 -8192.20126966 20149.89972238]
Reward: -1  Episode Reward:  -4
xxxxx
x...x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8468.70763151 4413.84084417 1542.2004081  3394.63016213]
------
Step:35, Action:North
State  208
Old Q Values:  [14385.84956926 37538.86267491 -4228.04879148  1851.08132227]
New Q values:  [25672.4724477  37538.86267491 -4228.04879148  1851.08132227]
Reward: 9  Episode Reward:  5
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254 14200.14474329  -180.00807518 66375.77539999]
------
Step:36, Action:West
State  130
Old Q Values:  [34597.52013254 14200.14474329  -180.00807518 66375.77539999]
New Q values:  [34597.52013254 14200.14474329  -180.00807518 66377.84703461]
Reward: 9  Episode Reward:  14
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132740.45624872]
------
Step:37, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   856.59046969]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211   950.68670925]
Reward: 9  Episode Reward:  23
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2008.83507124  181.21829111 -120.29354603]
------
Step:38, Action:South
State  99
Old Q Values:  [    0.         37679.7176592  65700.46302897     0.        ]
New Q values:  [    0.         48393.28699369 65700.46302897     0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[111073.33310002  22487.97851491   5558.62652977      0.        ]
------
Step:39, Action:North
State  180
Old Q Values:  [-2148.51109557  1811.65086161  7022.68010678 -4966.32149798]
New Q values:  [ -746.35376992  1811.65086161  7022.68010678 -4966.32149798]
Reward: -1  Episode Reward:  21
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -7257.27397003   378.83556103  -180.6       ]
------
Step:40, Action:East
State  108
Old Q Values:  [-8463.16477134  3074.39916879   648.63541754     0.        ]
New Q values:  [-8463.16477134  3074.39916879   492.53480369     0.        ]
Reward: -1  Episode Reward:  20
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   778.93545557  -131.36682235]
------
Step:41, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   778.93545557  -131.36682235]
New Q values:  [-9594.56523706 -8069.05606225   658.45966432  -131.36682235]
Reward: -1  Episode Reward:  19
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1158.28494029 -2383.80019164   204.21722834]
------
Step:42, Action:South
State  136
Old Q Values:  [ -170.77177351  1158.28494029 -2383.80019164   204.21722834]
New Q values:  [ -170.77177351 11724.37277859 -2383.80019164   204.21722834]
Reward: -1  Episode Reward:  18
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25672.4724477  37538.86267491 -4228.04879148  1851.08132227]
------
Step:43, Action:South
State  208
Old Q Values:  [25672.4724477  37538.86267491 -4228.04879148  1851.08132227]
New Q values:  [25672.4724477  21059.91498668 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  17
xxxxx
xg  x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11074.76406317 -1609.84182322 -8192.20126966 20149.89972238]
------
Step:44, Action:West
State  288
Old Q Values:  [11074.76406317 -1609.84182322 -8192.20126966 20149.89972238]
New Q values:  [11074.76406317 -1609.84182322 -8192.20126966 11805.3287844 ]
Reward: -1  Episode Reward:  16
xxxxx
x   x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 12486.56298482 11574.38818591]
------
Step:45, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 12486.56298482 11574.38818591]
New Q values:  [-2527.46239811 -8521.23367799  8535.62382924 11574.38818591]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11074.76406317 -1609.84182322 -8192.20126966 11805.3287844 ]
------
Step:46, Action:West
State  288
Old Q Values:  [11074.76406317 -1609.84182322 -8192.20126966 11805.3287844 ]
New Q values:  [11074.76406317 -1609.84182322 -8192.20126966  8193.84796953]
Reward: -1  Episode Reward:  14
xxxxx
x   x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8535.62382924 11574.38818591]
------
Step:47, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8535.62382924 11574.38818591]
New Q values:  [-2527.46239811 -8521.23367799  8535.62382924  1657.47074031]
Reward: -10001  Episode Reward:  -9987
xxxxx
x   x
x . x
xg  x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 265.94016234  489.65789819 -782.40989094  -30.99112081]
------
Step:1, Action:South
State  181
Old Q Values:  [ 265.94016234  489.65789819 -782.40989094  -30.99112081]
New Q values:  [ 265.94016234 4068.8077714  -782.40989094  -30.99112081]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1069.12253966    26.73544252 12891.81537375   -35.88578819]
------
Step:2, Action:East
State  260
Old Q Values:  [ 1279.74734295 -2735.46306511 10094.38488648 -2601.74710518]
New Q values:  [ 1279.74734295 -2735.46306511  4772.76786294 -2601.74710518]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2432.04636114  1947.73487331]
------
Step:3, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2432.04636114  1947.73487331]
New Q values:  [   16.82637525 -5807.06396197  4300.64776341  1947.73487331]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11074.76406317 -1609.84182322 -8192.20126966  8193.84796953]
------
Step:4, Action:North
State  288
Old Q Values:  [11074.76406317 -1609.84182322 -8192.20126966  8193.84796953]
New Q values:  [ 6975.91791472 -1609.84182322 -8192.20126966  8193.84796953]
Reward: 9  Episode Reward:  36
xxxxx
x. .x
x .ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8468.70763151 4413.84084417 1542.2004081  3394.63016213]
------
Step:5, Action:North
State  208
Old Q Values:  [25672.4724477  21059.91498668 -4228.04879148  1851.08132227]
New Q values:  [30187.74308947 21059.91498668 -4228.04879148  1851.08132227]
Reward: 9  Episode Reward:  45
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254 14200.14474329  -180.00807518 66377.84703461]
------
Step:6, Action:West
State  130
Old Q Values:  [34597.52013254 14200.14474329  -180.00807518 66377.84703461]
New Q values:  [34597.52013254 14200.14474329  -180.00807518 66372.67568846]
Reward: -1  Episode Reward:  44
xxxxx
x.a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32083.9571164  132740.45624872]
------
Step:7, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   32083.9571164  132740.45624872]
New Q values:  [ -180.6         3557.6642036  32083.9571164  72811.72140818]
Reward: 9  Episode Reward:  53
xxxxx
xa  x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SE
[    0.         48393.28699369 65700.46302897     0.        ]
------
Step:8, Action:East
State  110
Old Q Values:  [ -239.29051573 -7257.27397003   378.83556103  -180.6       ]
New Q values:  [ -239.29051573 -7257.27397003   365.96192905  -180.6       ]
Reward: -1  Episode Reward:  52
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   716.75901548   429.84057838]
------
Step:9, Action:East
State  114
Old Q Values:  [ -180.6         3557.6642036  32083.9571164  72811.72140818]
New Q values:  [ -180.6         3557.6642036  32744.7855531  72811.72140818]
Reward: -1  Episode Reward:  51
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254 14200.14474329  -180.00807518 66372.67568846]
------
Step:10, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.64040273e+03 -3.22965309e-01  3.52174996e+02]
New Q values:  [ 7.64171987e+01  2.64040273e+03 -3.22965309e-01  3.55297703e+02]
Reward: -1  Episode Reward:  50
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   716.75901548   429.84057838]
------
Step:11, Action:East
State  114
Old Q Values:  [ -180.6         3557.6642036  32744.7855531  72811.72140818]
New Q values:  [ -180.6         3557.6642036  33009.11692778 72811.72140818]
Reward: -1  Episode Reward:  49
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254 14200.14474329  -180.00807518 66372.67568846]
------
Step:12, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.64040273e+03 -3.22965309e-01  3.55297703e+02]
New Q values:  [ 7.64171987e+01  2.64040273e+03 -3.22965309e-01  3.56546786e+02]
Reward: -1  Episode Reward:  48
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   716.75901548   429.84057838]
------
Step:13, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   716.75901548   429.84057838]
New Q values:  [ -281.736      -1150.91067548  1078.22442667   429.84057838]
Reward: -1  Episode Reward:  47
xxxxx
x  ax
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.64040273e+03 -3.22965309e-01  3.56546786e+02]
------
Step:14, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.64040273e+03 -3.22965309e-01  3.56546786e+02]
New Q values:  [ 7.64171987e+01  1.01118840e+04 -3.22965309e-01  3.56546786e+02]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30187.74308947 21059.91498668 -4228.04879148  1851.08132227]
------
Step:15, Action:North
State  208
Old Q Values:  [30187.74308947 21059.91498668 -4228.04879148  1851.08132227]
New Q values:  [15591.80906936 21059.91498668 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  45
xxxxx
xg ax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 11724.37277859 -2383.80019164   204.21722834]
------
Step:16, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.01118840e+04 -3.22965309e-01  3.56546786e+02]
New Q values:  [ 7.64171987e+01  1.03621281e+04 -3.22965309e-01  3.56546786e+02]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15591.80906936 21059.91498668 -4228.04879148  1851.08132227]
------
Step:17, Action:South
State  210
Old Q Values:  [8468.70763151 4413.84084417 1542.2004081  3394.63016213]
New Q values:  [8468.70763151 4223.09072853 1542.2004081  3394.63016213]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x . x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6975.91791472 -1609.84182322 -8192.20126966  8193.84796953]
------
Step:18, Action:West
State  288
Old Q Values:  [ 6975.91791472 -1609.84182322 -8192.20126966  8193.84796953]
New Q values:  [ 6975.91791472 -1609.84182322 -8192.20126966  5837.62633659]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8535.62382924  1657.47074031]
------
Step:19, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8535.62382924  1657.47074031]
New Q values:  [-2527.46239811 -8521.23367799  5506.42490611  1657.47074031]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6975.91791472 -1609.84182322 -8192.20126966  5837.62633659]
------
Step:20, Action:North
State  288
Old Q Values:  [ 6975.91791472 -1609.84182322 -8192.20126966  5837.62633659]
New Q values:  [ 9107.74166189 -1609.84182322 -8192.20126966  5837.62633659]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15591.80906936 21059.91498668 -4228.04879148  1851.08132227]
------
Step:21, Action:South
State  208
Old Q Values:  [15591.80906936 21059.91498668 -4228.04879148  1851.08132227]
New Q values:  [15591.80906936 11155.68849324 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9107.74166189 -1609.84182322 -8192.20126966  5837.62633659]
------
Step:22, Action:North
State  288
Old Q Values:  [ 9107.74166189 -1609.84182322 -8192.20126966  5837.62633659]
New Q values:  [ 8320.03938557 -1609.84182322 -8192.20126966  5837.62633659]
Reward: -1  Episode Reward:  38
xxxxx
x g x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15591.80906936 11155.68849324 -4228.04879148  1851.08132227]
------
Step:23, Action:North
State  208
Old Q Values:  [15591.80906936 11155.68849324 -4228.04879148  1851.08132227]
New Q values:  [ 9753.43546132 11155.68849324 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  37
xxxxx
xg ax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 11724.37277859 -2383.80019164   204.21722834]
------
Step:24, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.03621281e+04 -3.22965309e-01  3.56546786e+02]
New Q values:  [ 7.64171987e+01  7.49095779e+03 -3.22965309e-01  3.56546786e+02]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9753.43546132 11155.68849324 -4228.04879148  1851.08132227]
------
Step:25, Action:South
State  208
Old Q Values:  [ 9753.43546132 11155.68849324 -4228.04879148  1851.08132227]
New Q values:  [ 9753.43546132  6957.68721296 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  35
xxxxx
xg  x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8320.03938557 -1609.84182322 -8192.20126966  5837.62633659]
------
Step:26, Action:North
State  288
Old Q Values:  [ 8320.03938557 -1609.84182322 -8192.20126966  5837.62633659]
New Q values:  [ 6253.44639262 -1609.84182322 -8192.20126966  5837.62633659]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9753.43546132  6957.68721296 -4228.04879148  1851.08132227]
------
Step:27, Action:North
State  208
Old Q Values:  [ 9753.43546132  6957.68721296 -4228.04879148  1851.08132227]
New Q values:  [23812.57689107  6957.68721296 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254 14200.14474329  -180.00807518 66372.67568846]
------
Step:28, Action:West
State  136
Old Q Values:  [ -170.77177351 11724.37277859 -2383.80019164   204.21722834]
New Q values:  [ -170.77177351 11724.37277859 -2383.80019164 -5721.37520937]
Reward: -10001  Episode Reward:  -9968
xxxxx
x g x
x . x
x   x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1069.12253966    26.73544252 12891.81537375   -35.88578819]
------
Step:1, Action:East
State  261
Old Q Values:  [ 1069.12253966    26.73544252 12891.81537375   -35.88578819]
New Q values:  [1069.12253966   26.73544252  814.05362134  -35.88578819]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x...x
x g x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  7.49095779e+03 -3.22965309e-01  3.56546786e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  7.49095779e+03 -3.22965309e-01  3.56546786e+02]
New Q values:  [ 7.64171987e+01  1.01455562e+04 -3.22965309e-01  3.56546786e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23812.57689107  6957.68721296 -4228.04879148  1851.08132227]
------
Step:2, Action:North
State  210
Old Q Values:  [8468.70763151 4223.09072853 1542.2004081  3394.63016213]
New Q values:  [6430.54990756 4223.09072853 1542.2004081  3394.63016213]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.01455562e+04 -3.22965309e-01  3.56546786e+02]
------
Step:3, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.01455562e+04 -3.22965309e-01  3.56546786e+02]
New Q values:  [ 7.64171987e+01  5.98678745e+03 -3.22965309e-01  3.56546786e+02]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x..ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6430.54990756 4223.09072853 1542.2004081  3394.63016213]
------
Step:4, Action:North
State  208
Old Q Values:  [23812.57689107  6957.68721296 -4228.04879148  1851.08132227]
New Q values:  [11320.46699009  6957.68721296 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  6
xxxxx
x..ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.98678745e+03 -3.22965309e-01  3.56546786e+02]
------
Step:5, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  5.98678745e+03 -3.22965309e-01  3.56546786e+02]
New Q values:  [ 7.64171987e+01  4.32327995e+03 -3.22965309e-01  3.56546786e+02]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
x..ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6430.54990756 4223.09072853 1542.2004081  3394.63016213]
------
Step:6, Action:North
State  208
Old Q Values:  [11320.46699009  6957.68721296 -4228.04879148  1851.08132227]
New Q values:  [ 5824.57078118  6957.68721296 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  4
xxxxx
x..ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.32327995e+03 -3.22965309e-01  3.56546786e+02]
------
Step:7, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.32327995e+03 -3.22965309e-01  3.56546786e+02]
New Q values:  [ 7.64171987e+01  3.81601814e+03 -3.22965309e-01  3.56546786e+02]
Reward: -1  Episode Reward:  3
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5824.57078118  6957.68721296 -4228.04879148  1851.08132227]
------
Step:8, Action:South
State  208
Old Q Values:  [ 5824.57078118  6957.68721296 -4228.04879148  1851.08132227]
New Q values:  [ 5824.57078118  4664.50880297 -4228.04879148  1851.08132227]
Reward: 9  Episode Reward:  12
xxxxx
x.. x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6253.44639262 -1609.84182322 -8192.20126966  5837.62633659]
------
Step:9, Action:West
State  288
Old Q Values:  [ 6253.44639262 -1609.84182322 -8192.20126966  5837.62633659]
New Q values:  [ 6253.44639262 -1609.84182322 -8192.20126966  3986.37800647]
Reward: -1  Episode Reward:  11
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5506.42490611  1657.47074031]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5506.42490611  1657.47074031]
New Q values:  [-2527.46239811 -8521.23367799  4078.00388023  1657.47074031]
Reward: -1  Episode Reward:  10
xxxxx
x.. x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6253.44639262 -1609.84182322 -8192.20126966  3986.37800647]
------
Step:11, Action:North
State  288
Old Q Values:  [ 6253.44639262 -1609.84182322 -8192.20126966  3986.37800647]
New Q values:  [ 4248.1497914  -1609.84182322 -8192.20126966  3986.37800647]
Reward: -1  Episode Reward:  9
xxxxx
x.. x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5824.57078118  4664.50880297 -4228.04879148  1851.08132227]
------
Step:12, Action:North
State  210
Old Q Values:  [6430.54990756 4223.09072853 1542.2004081  3394.63016213]
New Q values:  [3716.42540625 4223.09072853 1542.2004081  3394.63016213]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.81601814e+03 -3.22965309e-01  3.56546786e+02]
------
Step:13, Action:South
State  130
Old Q Values:  [34597.52013254 14200.14474329  -180.00807518 66372.67568846]
New Q values:  [34597.52013254  6946.38511587  -180.00807518 66372.67568846]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3716.42540625 4223.09072853 1542.2004081  3394.63016213]
------
Step:14, Action:South
State  210
Old Q Values:  [3716.42540625 4223.09072853 1542.2004081  3394.63016213]
New Q values:  [3716.42540625 2963.08122883 1542.2004081  3394.63016213]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4248.1497914  -1609.84182322 -8192.20126966  3986.37800647]
------
Step:15, Action:North
State  288
Old Q Values:  [ 4248.1497914  -1609.84182322 -8192.20126966  3986.37800647]
New Q values:  [ 2813.58753844 -1609.84182322 -8192.20126966  3986.37800647]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3716.42540625 2963.08122883 1542.2004081  3394.63016213]
------
Step:16, Action:North
State  208
Old Q Values:  [ 5824.57078118  4664.50880297 -4228.04879148  1851.08132227]
New Q values:  [22241.03101901  4664.50880297 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  4
xxxxx
x..ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[34597.52013254  6946.38511587  -180.00807518 66372.67568846]
------
Step:17, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.81601814e+03 -3.22965309e-01  3.56546786e+02]
New Q values:  [ 7.64171987e+01  3.81601814e+03 -3.22965309e-01  4.71486042e+02]
Reward: 9  Episode Reward:  13
xxxxx
x.a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1078.22442667   429.84057838]
------
Step:18, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1078.22442667   429.84057838]
New Q values:  [ -281.736      -1150.91067548  1575.49521389   429.84057838]
Reward: -1  Episode Reward:  12
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.81601814e+03 -3.22965309e-01  4.71486042e+02]
------
Step:19, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.81601814e+03 -3.22965309e-01  4.71486042e+02]
New Q values:  [ 7.64171987e+01  2.64073488e+03 -3.22965309e-01  4.71486042e+02]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3716.42540625 2963.08122883 1542.2004081  3394.63016213]
------
Step:20, Action:North
State  210
Old Q Values:  [3716.42540625 2963.08122883 1542.2004081  3394.63016213]
New Q values:  [2278.19062635 2963.08122883 1542.2004081  3394.63016213]
Reward: -1  Episode Reward:  10
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.64073488e+03 -3.22965309e-01  4.71486042e+02]
------
Step:21, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.64073488e+03 -3.22965309e-01  4.71486042e+02]
New Q values:  [ 7.64171987e+01  2.07408300e+03 -3.22965309e-01  4.71486042e+02]
Reward: -1  Episode Reward:  9
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2278.19062635 2963.08122883 1542.2004081  3394.63016213]
------
Step:22, Action:West
State  210
Old Q Values:  [2278.19062635 2963.08122883 1542.2004081  3394.63016213]
New Q values:  [2278.19062635 2963.08122883 1542.2004081  4909.79067623]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  4.98341978e+03  1.18217954e+04  1.20371620e+03]
------
Step:23, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.70525880e+03 8.12250475e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 4.70525880e+03 9.92071120e+03 2.91043938e+03]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22241.03101901  4664.50880297 -4228.04879148  1851.08132227]
------
Step:24, Action:North
State  210
Old Q Values:  [2278.19062635 2963.08122883 1542.2004081  4909.79067623]
New Q values:  [1532.90115067 2963.08122883 1542.2004081  4909.79067623]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.07408300e+03 -3.22965309e-01  4.71486042e+02]
------
Step:25, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.07408300e+03 -3.22965309e-01  4.71486042e+02]
New Q values:  [ 7.64171987e+01  7.50134251e+03 -3.22965309e-01  4.71486042e+02]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22241.03101901  4664.50880297 -4228.04879148  1851.08132227]
------
Step:26, Action:North
State  208
Old Q Values:  [22241.03101901  4664.50880297 -4228.04879148  1851.08132227]
New Q values:  [11146.21515937  4664.50880297 -4228.04879148  1851.08132227]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  7.50134251e+03 -3.22965309e-01  4.71486042e+02]
------
Step:27, Action:West
State  136
Old Q Values:  [ -170.77177351 11724.37277859 -2383.80019164 -5721.37520937]
New Q values:  [ -170.77177351 11724.37277859 -2383.80019164 -2195.83588096]
Reward: -1  Episode Reward:  13
xxxxx
x.agx
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:28, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   658.45966432  -131.36682235]
New Q values:  [-9594.56523706 -8069.05606225   658.45966432   493.25301724]
Reward: 9  Episode Reward:  22
xxxxx
xag x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        1801.33248727   65.14560537    0.        ]
------
Step:29, Action:South
State  107
Old Q Values:  [-252.35169558 2834.00007675  106.74907788 -252.78192178]
New Q values:  [-252.35169558 3240.3509301   106.74907788 -252.78192178]
Reward: 9  Episode Reward:  31
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1354.56432284    0.         7004.50299801 -178.98      ]
------
Step:30, Action:North
State  187
Old Q Values:  [695.35201428   0.         526.18496922   0.        ]
New Q values:  [1249.64608474    0.          526.18496922    0.        ]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 3240.3509301   106.74907788 -252.78192178]
------
Step:31, Action:South
State  107
Old Q Values:  [-252.35169558 3240.3509301   106.74907788 -252.78192178]
New Q values:  [-252.35169558 3396.89127144  106.74907788 -252.78192178]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1354.56432284    0.         7004.50299801 -178.98      ]
------
Step:32, Action:North
State  184
Old Q Values:  [ -259.36514512     0.         14061.4268019      0.        ]
New Q values:  [  -67.19025311     0.         14061.4268019      0.        ]
Reward: -1  Episode Reward:  28
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   123.85268312  -180.6       ]
------
Step:33, Action:East
State  104
Old Q Values:  [-8652.84        6868.26603854  1078.33520689 -8652.84      ]
New Q values:  [-8652.84        6868.26603854   628.27198205 -8652.84      ]
Reward: -1  Episode Reward:  27
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   658.45966432   493.25301724]
------
Step:34, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   658.45966432   493.25301724]
New Q values:  [-9594.56523706 -8069.05606225  3780.0956993    493.25301724]
Reward: -1  Episode Reward:  26
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 11724.37277859 -2383.80019164 -2195.83588096]
------
Step:35, Action:South
State  136
Old Q Values:  [ -170.77177351 11724.37277859 -2383.80019164 -2195.83588096]
New Q values:  [ -170.77177351  5095.21160681 -2383.80019164 -2195.83588096]
Reward: -1  Episode Reward:  25
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1353.54165126   372.98523174 -8896.20691497   776.32197668]
------
Step:36, Action:North
State  216
Old Q Values:  [ 1353.54165126   372.98523174 -8896.20691497   776.32197668]
New Q values:  [ 2791.21941227   372.98523174 -8896.20691497   776.32197668]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  7.50134251e+03 -3.22965309e-01  4.71486042e+02]
------
Step:37, Action:South
State  130
Old Q Values:  [34597.52013254  6946.38511587  -180.00807518 66372.67568846]
New Q values:  [34597.52013254  4250.89124922  -180.00807518 66372.67568846]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1532.90115067 2963.08122883 1542.2004081  4909.79067623]
------
Step:38, Action:West
State  218
Old Q Values:  [1087.90115249 6735.13737869    0.          430.34233803]
New Q values:  [1087.90115249 6735.13737869    0.          403.5393639 ]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577   773.34142896     0.        ]
------
Step:39, Action:East
State  194
Old Q Values:  [-6.00000000e-01  4.98341978e+03  1.18217954e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01  4.98341978e+03  6.20105535e+03  1.20371620e+03]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1532.90115067 2963.08122883 1542.2004081  4909.79067623]
------
Step:40, Action:West
State  216
Old Q Values:  [ 2791.21941227   372.98523174 -8896.20691497   776.32197668]
New Q values:  [ 2791.21941227   372.98523174 -8896.20691497 11370.08908203]
Reward: -1  Episode Reward:  20
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  36867.20097118  1133.32256943  1141.49622464]
------
Step:41, Action:South
State  200
Old Q Values:  [   62.8218634  36867.20097118  1133.32256943  1141.49622464]
New Q values:  [   62.8218634  15969.68155254  1133.32256943  1141.49622464]
Reward: -1  Episode Reward:  19
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4078.00388023  1657.47074031]
------
Step:42, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4078.00388023  1657.47074031]
New Q values:  [-2527.46239811 -8521.23367799  2826.51495403  1657.47074031]
Reward: -1  Episode Reward:  18
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2813.58753844 -1609.84182322 -8192.20126966  3986.37800647]
------
Step:43, Action:West
State  288
Old Q Values:  [ 2813.58753844 -1609.84182322 -8192.20126966  3986.37800647]
New Q values:  [ 2813.58753844 -1609.84182322 -8192.20126966  2441.9056888 ]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2826.51495403  1657.47074031]
------
Step:44, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2826.51495403  1657.47074031]
New Q values:  [-2527.46239811 -8521.23367799  1974.08224314  1657.47074031]
Reward: -1  Episode Reward:  16
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2813.58753844 -1609.84182322 -8192.20126966  2441.9056888 ]
------
Step:45, Action:North
State  288
Old Q Values:  [ 2813.58753844 -1609.84182322 -8192.20126966  2441.9056888 ]
New Q values:  [ 3145.37622898 -1609.84182322 -8192.20126966  2441.9056888 ]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1087.90115249 6735.13737869    0.          403.5393639 ]
------
Step:46, Action:North
State  218
Old Q Values:  [1087.90115249 6735.13737869    0.          403.5393639 ]
New Q values:  [2684.96321276 6735.13737869    0.          403.5393639 ]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  7.50134251e+03 -3.22965309e-01  4.71486042e+02]
------
Step:47, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  7.50134251e+03 -3.22965309e-01  4.71486042e+02]
New Q values:  [ 7.64171987e+01  5.02047822e+03 -3.22965309e-01  4.71486042e+02]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2684.96321276 6735.13737869    0.          403.5393639 ]
------
Step:48, Action:North
State  216
Old Q Values:  [ 2791.21941227   372.98523174 -8896.20691497 11370.08908203]
New Q values:  [ 2622.03122969   372.98523174 -8896.20691497 11370.08908203]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.02047822e+03 -3.22965309e-01  4.71486042e+02]
------
Step:49, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  5.02047822e+03 -3.22965309e-01  4.71486042e+02]
New Q values:  [ 7.64171987e+01  5.02047822e+03 -3.22965309e-01  4.73200430e+02]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211   950.68670925]
------
Step:50, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211   950.68670925]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211  1398.74206513]
Reward: -1  Episode Reward:  10
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 3396.89127144  106.74907788 -252.78192178]
------
Step:51, Action:South
State  107
Old Q Values:  [-252.35169558 3396.89127144  106.74907788 -252.78192178]
New Q values:  [-252.35169558 3459.50740798  106.74907788 -252.78192178]
Reward: -1  Episode Reward:  9
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1354.56432284    0.         7004.50299801 -178.98      ]
------
Step:52, Action:North
State  187
Old Q Values:  [1249.64608474    0.          526.18496922    0.        ]
New Q values:  [1537.11065629    0.          526.18496922    0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 3459.50740798  106.74907788 -252.78192178]
------
Step:53, Action:South
State  107
Old Q Values:  [-252.35169558 3459.50740798  106.74907788 -252.78192178]
New Q values:  [-252.35169558 3484.5538626   106.74907788 -252.78192178]
Reward: -1  Episode Reward:  7
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1354.56432284    0.         7004.50299801 -178.98      ]
------
Step:54, Action:North
State  184
Old Q Values:  [  -67.19025311     0.         14061.4268019      0.        ]
New Q values:  [9.67970369e+00 0.00000000e+00 1.40614268e+04 0.00000000e+00]
Reward: -1  Episode Reward:  6
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   123.85268312  -180.6       ]
------
Step:55, Action:East
State  107
Old Q Values:  [-252.35169558 3484.5538626   106.74907788 -252.78192178]
New Q values:  [-252.35169558 3484.5538626   514.74819532 -252.78192178]
Reward: -1  Episode Reward:  5
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1575.49521389   429.84057838]
------
Step:56, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1575.49521389   429.84057838]
New Q values:  [ -281.736      -1150.91067548  2135.74155034   429.84057838]
Reward: -1  Episode Reward:  4
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.02047822e+03 -3.22965309e-01  4.73200430e+02]
------
Step:57, Action:South
State  130
Old Q Values:  [34597.52013254  4250.89124922  -180.00807518 66372.67568846]
New Q values:  [34597.52013254  3172.69370256  -180.00807518 66372.67568846]
Reward: -1  Episode Reward:  3
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1532.90115067 2963.08122883 1542.2004081  4909.79067623]
------
Step:58, Action:West
State  218
Old Q Values:  [2684.96321276 6735.13737869    0.          403.5393639 ]
New Q values:  [2684.96321276 6735.13737869    0.          392.81817425]
Reward: -1  Episode Reward:  2
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577   773.34142896     0.        ]
------
Step:59, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.87289448e+04 4.12827458e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 1.87289448e+04 3.67125105e+03 0.00000000e+00]
Reward: -1  Episode Reward:  1
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2684.96321276 6735.13737869    0.          392.81817425]
------
Step:60, Action:North
State  218
Old Q Values:  [2684.96321276 6735.13737869    0.          392.81817425]
New Q values:  [2579.52874989 6735.13737869    0.          392.81817425]
Reward: -1  Episode Reward:  0
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.02047822e+03 -3.22965309e-01  4.73200430e+02]
------
Step:61, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  5.02047822e+03 -3.22965309e-01  4.73200430e+02]
New Q values:  [ 7.64171987e+01  5.41861801e+03 -3.22965309e-01  4.73200430e+02]
Reward: -1  Episode Reward:  -1
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2622.03122969   372.98523174 -8896.20691497 11370.08908203]
------
Step:62, Action:North
State  218
Old Q Values:  [2579.52874989 6735.13737869    0.          392.81817425]
New Q values:  [2656.79690325 6735.13737869    0.          392.81817425]
Reward: -1  Episode Reward:  -2
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.41861801e+03 -3.22965309e-01  4.73200430e+02]
------
Step:63, Action:South
State  130
Old Q Values:  [34597.52013254  3172.69370256  -180.00807518 66372.67568846]
New Q values:  [34597.52013254  2741.41468389  -180.00807518 66372.67568846]
Reward: -1  Episode Reward:  -3
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1532.90115067 2963.08122883 1542.2004081  4909.79067623]
------
Step:64, Action:West
State  218
Old Q Values:  [2656.79690325 6735.13737869    0.          392.81817425]
New Q values:  [2656.79690325 6735.13737869    0.          388.52969839]
Reward: -1  Episode Reward:  -4
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577   773.34142896     0.        ]
------
Step:65, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.87289448e+04 3.67125105e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 1.87289448e+04 3.48844163e+03 0.00000000e+00]
Reward: -1  Episode Reward:  -5
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2656.79690325 6735.13737869    0.          388.52969839]
------
Step:66, Action:North
State  218
Old Q Values:  [2656.79690325 6735.13737869    0.          388.52969839]
New Q values:  [2687.7041646  6735.13737869    0.          388.52969839]
Reward: -1  Episode Reward:  -6
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.41861801e+03 -3.22965309e-01  4.73200430e+02]
------
Step:67, Action:South
State  130
Old Q Values:  [34597.52013254  2741.41468389  -180.00807518 66372.67568846]
New Q values:  [34597.52013254  2568.90307643  -180.00807518 66372.67568846]
Reward: -1  Episode Reward:  -7
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1532.90115067 2963.08122883 1542.2004081  4909.79067623]
------
Step:68, Action:West
State  218
Old Q Values:  [2687.7041646  6735.13737869    0.          388.52969839]
New Q values:  [2687.7041646  6735.13737869    0.          386.81430804]
Reward: -1  Episode Reward:  -8
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577   773.34142896     0.        ]
------
Step:69, Action:East
State  194
Old Q Values:  [-6.00000000e-01  4.98341978e+03  6.20105535e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  4.98341978e+03  3.95275934e+03  1.20371620e+03]
Reward: -1  Episode Reward:  -9
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1532.90115067 2963.08122883 1542.2004081  4909.79067623]
------
Step:70, Action:West
State  218
Old Q Values:  [2687.7041646  6735.13737869    0.          386.81430804]
New Q values:  [2687.7041646  6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  -10
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577   773.34142896     0.        ]
------
Step:71, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.87289448e+04 3.48844163e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 1.87289448e+04 3.41531787e+03 0.00000000e+00]
Reward: -1  Episode Reward:  -11
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2687.7041646  6735.13737869    0.          386.1281519 ]
------
Step:72, Action:North
State  216
Old Q Values:  [ 2622.03122969   372.98523174 -8896.20691497 11370.08908203]
New Q values:  [ 2673.79789518   372.98523174 -8896.20691497 11370.08908203]
Reward: -1  Episode Reward:  -12
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.41861801e+03 -3.22965309e-01  4.73200430e+02]
------
Step:73, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  5.41861801e+03 -3.22965309e-01  4.73200430e+02]
New Q values:  [ 7.64171987e+01  5.41861801e+03 -3.22965309e-01  6.08302791e+02]
Reward: -1  Episode Reward:  -13
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211  1398.74206513]
------
Step:74, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211  1398.74206513]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211  1604.26298483]
Reward: -1  Episode Reward:  -14
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 3484.5538626   514.74819532 -252.78192178]
------
Step:75, Action:South
State  105
Old Q Values:  [-180.6        1801.33248727   65.14560537    0.        ]
New Q values:  [-180.6        2821.28389431   65.14560537    0.        ]
Reward: -1  Episode Reward:  -15
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1354.56432284    0.         7004.50299801 -178.98      ]
------
Step:76, Action:East
State  185
Old Q Values:  [1354.56432284    0.         7004.50299801 -178.98      ]
New Q values:  [1354.56432284    0.         5134.26005376 -178.98      ]
Reward: -1  Episode Reward:  -16
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  7.77686285e+03 -8.87652194e+03  2.00341972e+02]
------
Step:77, Action:South
State  203
Old Q Values:  [3.60604218e+00 1.87289448e+04 3.41531787e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 1.39942508e+04 3.41531787e+03 0.00000000e+00]
Reward: -1  Episode Reward:  -17
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 21677.57620158 15403.69674032]
------
Step:78, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549 21677.57620158 15403.69674032]
New Q values:  [  870.35122762  -168.92307549 21677.57620158 78312.11216419]
Reward: 100009  Episode Reward:  99992
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11146.21515937  4664.50880297 -4228.04879148  1851.08132227]
------
Step:1, Action:North
State  210
Old Q Values:  [1532.90115067 2963.08122883 1542.2004081  4909.79067623]
New Q values:  [2244.14586357 2963.08122883 1542.2004081  4909.79067623]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.41861801e+03 -3.22965309e-01  6.08302791e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  5.41861801e+03 -3.22965309e-01  6.08302791e+02]
New Q values:  [ 7.64171987e+01  3.63978441e+03 -3.22965309e-01  6.08302791e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2244.14586357 2963.08122883 1542.2004081  4909.79067623]
------
Step:3, Action:West
State  210
Old Q Values:  [2244.14586357 2963.08122883 1542.2004081  4909.79067623]
New Q values:  [2244.14586357 2963.08122883 1542.2004081  3458.34220511]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x.a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  4.98341978e+03  3.95275934e+03  1.20371620e+03]
------
Step:4, Action:East
State  195
Old Q Values:  [   38.85388605 13650.02620119  5505.24212026  1101.59744825]
New Q values:  [   38.85388605 13650.02620119  3238.99950964  1101.59744825]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2244.14586357 2963.08122883 1542.2004081  3458.34220511]
------
Step:5, Action:West
State  208
Old Q Values:  [11146.21515937  4664.50880297 -4228.04879148  1851.08132227]
New Q values:  [11146.21515937  4664.50880297 -4228.04879148  3790.08788228]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 10167.51784457 -5545.76189056  1099.96026581]
------
Step:6, Action:South
State  195
Old Q Values:  [   38.85388605 13650.02620119  3238.99950964  1101.59744825]
New Q values:  [   38.85388605 28959.04412973  3238.99950964  1101.59744825]
Reward: 9  Episode Reward:  14
xxxxx
x.. x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 21677.57620158 78312.11216419]
------
Step:7, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549 21677.57620158 78312.11216419]
New Q values:  [  870.35122762  -168.92307549 21677.57620158 43475.47833374]
Reward: 9  Episode Reward:  23
xxxxx
x.. x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[40484.11156022  2256.66526474  4520.89517899  1875.31501677]
------
Step:8, Action:North
State  261
Old Q Values:  [1069.12253966   26.73544252  814.05362134  -35.88578819]
New Q values:  [ 3.37550489e+04  2.67354425e+01  8.14053621e+02 -3.58857882e+01]
Reward: 9  Episode Reward:  32
xxxxx
x.. x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[111073.33310002  22487.97851491   5558.62652977      0.        ]
------
Step:9, Action:North
State  181
Old Q Values:  [ 265.94016234 4068.8077714  -782.40989094  -30.99112081]
New Q values:  [ 546.43229851 4068.8077714  -782.40989094  -30.99112081]
Reward: 9  Episode Reward:  41
xxxxx
xa. x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1448.85411192    5.4           0.        ]
------
Step:10, Action:South
State  103
Old Q Values:  [ 221.30610858 1448.85411192    5.4           0.        ]
New Q values:  [ 221.30610858 2856.78992489    5.4           0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  849.14917943 7592.82760041    0.        ]
------
Step:11, Action:East
State  181
Old Q Values:  [ 546.43229851 4068.8077714  -782.40989094  -30.99112081]
New Q values:  [ 546.43229851 4068.8077714  2736.69139699  -30.99112081]
Reward: -1  Episode Reward:  39
xxxxx
x . x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 10167.51784457 -5545.76189056  1099.96026581]
------
Step:12, Action:South
State  195
Old Q Values:  [   38.85388605 28959.04412973  3238.99950964  1101.59744825]
New Q values:  [   38.85388605 24625.66115202  3238.99950964  1101.59744825]
Reward: -1  Episode Reward:  38
xxxxx
x . x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 21677.57620158 43475.47833374]
------
Step:13, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549 21677.57620158 43475.47833374]
New Q values:  [  870.35122762  -168.92307549 21677.57620158 27516.10601726]
Reward: -1  Episode Reward:  37
xxxxx
x . x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3.37550489e+04  2.67354425e+01  8.14053621e+02 -3.58857882e+01]
------
Step:14, Action:North
State  261
Old Q Values:  [ 3.37550489e+04  2.67354425e+01  8.14053621e+02 -3.58857882e+01]
New Q values:  [14722.06190977    26.73544252   814.05362134   -35.88578819]
Reward: -1  Episode Reward:  36
xxxxx
x .gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 546.43229851 4068.8077714  2736.69139699  -30.99112081]
------
Step:15, Action:South
State  181
Old Q Values:  [ 546.43229851 4068.8077714  2736.69139699  -30.99112081]
New Q values:  [ 546.43229851 6043.54168149 2736.69139699  -30.99112081]
Reward: -1  Episode Reward:  35
xxxxx
x . x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[14722.06190977    26.73544252   814.05362134   -35.88578819]
------
Step:16, Action:North
State  261
Old Q Values:  [14722.06190977    26.73544252   814.05362134   -35.88578819]
New Q values:  [8166.07304403   26.73544252  814.05362134  -35.88578819]
Reward: -1  Episode Reward:  34
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  849.14917943 7592.82760041    0.        ]
------
Step:17, Action:East
State  181
Old Q Values:  [ 546.43229851 6043.54168149 2736.69139699  -30.99112081]
New Q values:  [ 546.43229851 6043.54168149 4144.33191217  -30.99112081]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 10167.51784457 -5545.76189056  1099.96026581]
------
Step:18, Action:South
State  192
Old Q Values:  [3.89777037e-01 4.70525880e+03 9.92071120e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.47372819e+03 9.92071120e+03 2.91043938e+03]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1974.08224314  1657.47074031]
------
Step:19, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1974.08224314  1657.47074031]
New Q values:  [-2527.46239811 -8521.23367799  1738.64576595  1657.47074031]
Reward: 9  Episode Reward:  41
xxxxx
x . x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3145.37622898 -1609.84182322 -8192.20126966  2441.9056888 ]
------
Step:20, Action:North
State  288
Old Q Values:  [ 3145.37622898 -1609.84182322 -8192.20126966  2441.9056888 ]
New Q values:  [ 2295.05315313 -1609.84182322 -8192.20126966  2441.9056888 ]
Reward: -1  Episode Reward:  40
xxxxx
x . x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2244.14586357 2963.08122883 1542.2004081  3458.34220511]
------
Step:21, Action:West
State  208
Old Q Values:  [11146.21515937  4664.50880297 -4228.04879148  3790.08788228]
New Q values:  [11146.21515937  4664.50880297 -4228.04879148  4491.64851403]
Reward: -1  Episode Reward:  39
xxxxx
x . x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.47372819e+03 9.92071120e+03 2.91043938e+03]
------
Step:22, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.47372819e+03 9.92071120e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.47372819e+03 7.31154903e+03 2.91043938e+03]
Reward: -1  Episode Reward:  38
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11146.21515937  4664.50880297 -4228.04879148  4491.64851403]
------
Step:23, Action:North
State  208
Old Q Values:  [11146.21515937  4664.50880297 -4228.04879148  4491.64851403]
New Q values:  [12503.13959739  4664.50880297 -4228.04879148  4491.64851403]
Reward: -1  Episode Reward:  37
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 8775.70846068  6806.00740836 -8652.84       26817.51177882]
------
Step:24, Action:North
State  128
Old Q Values:  [ 8775.70846068  6806.00740836 -8652.84       26817.51177882]
New Q values:  [11374.93691792  6806.00740836 -8652.84       26817.51177882]
Reward: -301  Episode Reward:  -264
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792  6806.00740836 -8652.84       26817.51177882]
------
Step:25, Action:West
State  130
Old Q Values:  [34597.52013254  2568.90307643  -180.00807518 66372.67568846]
New Q values:  [ 34597.52013254   2568.90307643   -180.00807518 108397.98669784]
Reward: 100009  Episode Reward:  99745
xxxxx
x a x
xg  x
x   x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.63978441e+03 -3.22965309e-01  6.08302791e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.63978441e+03 -3.22965309e-01  6.08302791e+02]
New Q values:  [ 7.64171987e+01  2.49881642e+03 -3.22965309e-01  6.08302791e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2244.14586357 2963.08122883 1542.2004081  3458.34220511]
------
Step:2, Action:West
State  210
Old Q Values:  [2244.14586357 2963.08122883 1542.2004081  3458.34220511]
New Q values:  [2244.14586357 2963.08122883 1542.2004081  8776.43522765]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 24625.66115202  3238.99950964  1101.59744825]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831 10167.51784457 -5545.76189056  1099.96026581]
New Q values:  [-5922.26708831 12327.238943   -5545.76189056  1099.96026581]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 21677.57620158 27516.10601726]
------
Step:4, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549 21677.57620158 27516.10601726]
New Q values:  [  870.35122762  -168.92307549 21677.57620158 23157.07587497]
Reward: 9  Episode Reward:  36
xxxxx
x..gx
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[40484.11156022  2256.66526474  4520.89517899  1875.31501677]
------
Step:5, Action:North
State  257
Old Q Values:  [40484.11156022  2256.66526474  4520.89517899  1875.31501677]
New Q values:  [49521.04455409  2256.66526474  4520.89517899  1875.31501677]
Reward: 9  Episode Reward:  45
xxxxx
x.. x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[111073.33310002  22487.97851491   5558.62652977      0.        ]
------
Step:6, Action:North
State  181
Old Q Values:  [ 546.43229851 6043.54168149 4144.33191217  -30.99112081]
New Q values:  [1081.00989687 6043.54168149 4144.33191217  -30.99112081]
Reward: 9  Episode Reward:  54
xxxxx
xa. x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2856.78992489    5.4           0.        ]
------
Step:7, Action:South
State  103
Old Q Values:  [ 221.30610858 2856.78992489    5.4           0.        ]
New Q values:  [ 221.30610858 3419.96425008    5.4           0.        ]
Reward: -1  Episode Reward:  53
xxxxx
x . x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  849.14917943 7592.82760041    0.        ]
------
Step:8, Action:East
State  183
Old Q Values:  [ 877.23516594  849.14917943 7592.82760041    0.        ]
New Q values:  [  877.23516594   849.14917943 10424.22938577     0.        ]
Reward: -1  Episode Reward:  52
xxxxx
x . x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 24625.66115202  3238.99950964  1101.59744825]
------
Step:9, Action:South
State  193
Old Q Values:  [-5922.26708831 12327.238943   -5545.76189056  1099.96026581]
New Q values:  [-5922.26708831 11877.41833969 -5545.76189056  1099.96026581]
Reward: -1  Episode Reward:  51
xxxxx
x . x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 21677.57620158 23157.07587497]
------
Step:10, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1738.64576595  1657.47074031]
New Q values:  [-2527.46239811 -8521.23367799  1738.64576595  3112.21020933]
Reward: -1  Episode Reward:  50
xxxxx
x . x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[8166.07304403   26.73544252  814.05362134  -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [8166.07304403   26.73544252  814.05362134  -35.88578819]
New Q values:  [6393.09803334   26.73544252  814.05362134  -35.88578819]
Reward: -1  Episode Reward:  49
xxxxx
x . x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  877.23516594   849.14917943 10424.22938577     0.        ]
------
Step:12, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  4969.94460373     0.        ]
New Q values:  [    0.         -5969.29177534  3482.40377611     0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x . x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  4.98341978e+03  3.95275934e+03  1.20371620e+03]
------
Step:13, Action:South
State  194
Old Q Values:  [-6.00000000e-01  4.98341978e+03  3.95275934e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.92643098e+03  3.95275934e+03  1.20371620e+03]
Reward: -1  Episode Reward:  47
xxxxx
x . x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1738.64576595  3112.21020933]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1738.64576595  3112.21020933]
New Q values:  [-2527.46239811 -8521.23367799  1427.43001302  3112.21020933]
Reward: -1  Episode Reward:  46
xxxxx
x . x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2295.05315313 -1609.84182322 -8192.20126966  2441.9056888 ]
------
Step:15, Action:West
State  288
Old Q Values:  [ 2295.05315313 -1609.84182322 -8192.20126966  2441.9056888 ]
New Q values:  [ 2295.05315313 -1609.84182322 -8192.20126966 -4090.17466168]
Reward: -10001  Episode Reward:  -9955
xxxxx
x . x
x   x
x g x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -7257.27397003   365.96192905  -180.6       ]
------
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869 2008.83507124  181.21829111 -120.29354603]
New Q values:  [-177.44732869 2008.83507124  718.60978155 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2135.74155034   429.84057838]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2135.74155034   429.84057838]
New Q values:  [ -281.736      -1150.91067548  1609.34154747   429.84057838]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.49881642e+03 -3.22965309e-01  6.08302791e+02]
------
Step:3, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.49881642e+03 -3.22965309e-01  6.08302791e+02]
New Q values:  [ 7.64171987e+01  4.41595329e+03 -3.22965309e-01  6.08302791e+02]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2673.79789518   372.98523174 -8896.20691497 11370.08908203]
------
Step:4, Action:North
State  218
Old Q Values:  [2687.7041646  6735.13737869    0.          386.1281519 ]
New Q values:  [2399.26765416 6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.41595329e+03 -3.22965309e-01  6.08302791e+02]
------
Step:5, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.41595329e+03 -3.22965309e-01  6.08302791e+02]
New Q values:  [ 7.64171987e+01  5.17680804e+03 -3.22965309e-01  6.08302791e+02]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2673.79789518   372.98523174 -8896.20691497 11370.08908203]
------
Step:6, Action:North
State  216
Old Q Values:  [ 2673.79789518   372.98523174 -8896.20691497 11370.08908203]
New Q values:  [ 2621.96157078   372.98523174 -8896.20691497 11370.08908203]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.17680804e+03 -3.22965309e-01  6.08302791e+02]
------
Step:7, Action:South
State  136
Old Q Values:  [ -170.77177351  5095.21160681 -2383.80019164 -2195.83588096]
New Q values:  [ -170.77177351  5448.51136733 -2383.80019164 -2195.83588096]
Reward: -1  Episode Reward:  23
xxxxx
xg  x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2621.96157078   372.98523174 -8896.20691497 11370.08908203]
------
Step:8, Action:West
State  216
Old Q Values:  [ 2621.96157078   372.98523174 -8896.20691497 11370.08908203]
New Q values:  [ 2621.96157078   372.98523174 -8896.20691497  9344.34009857]
Reward: 9  Episode Reward:  32
xxxxx
x g x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  15969.68155254  1133.32256943  1141.49622464]
------
Step:9, Action:South
State  200
Old Q Values:  [   62.8218634  15969.68155254  1133.32256943  1141.49622464]
New Q values:  [  62.8218634  7326.93568382 1133.32256943 1141.49622464]
Reward: 9  Episode Reward:  41
xxxxx
xg  x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1427.43001302  3112.21020933]
------
Step:10, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  4300.64776341  1947.73487331]
New Q values:  [   16.82637525 -5807.06396197  4300.64776341  2702.42335933]
Reward: 9  Episode Reward:  50
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6393.09803334   26.73544252  814.05362134  -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [6393.09803334   26.73544252  814.05362134  -35.88578819]
New Q values:  [4044.50163829   26.73544252  814.05362134  -35.88578819]
Reward: -1  Episode Reward:  49
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849 1191.83911787 4959.54141652  154.04646645]
------
Step:12, Action:East
State  189
Old Q Values:  [  64.81505849 1191.83911787 4959.54141652  154.04646645]
New Q values:  [  64.81505849 1191.83911787 2464.37647091  154.04646645]
Reward: -1  Episode Reward:  48
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         1603.86634769 1341.90130646  441.58769553]
------
Step:13, Action:South
State  204
Old Q Values:  [   0.         1603.86634769 1341.90130646  441.58769553]
New Q values:  [   0.         1931.1408681  1341.90130646  441.58769553]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  4300.64776341  2702.42335933]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1427.43001302  3112.21020933]
New Q values:  [-2527.46239811 -8521.23367799 61264.88795115  3112.21020933]
Reward: 100009  Episode Reward:  100056
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  3.95275934e+03  1.20371620e+03]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.92643098e+03  3.95275934e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.92643098e+03  4.21943431e+03  1.20371620e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2244.14586357 2963.08122883 1542.2004081  8776.43522765]
------
Step:2, Action:West
State  208
Old Q Values:  [12503.13959739  4664.50880297 -4228.04879148  4491.64851403]
New Q values:  [12503.13959739  4664.50880297 -4228.04879148 -2010.47588559]
Reward: -10001  Episode Reward:  -9992
xxxxx
x...x
x.g x
x ..x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1609.34154747   429.84057838]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3780.0956993    493.25301724]
New Q values:  [-9594.56523706 -8069.05606225  3151.99168992   493.25301724]
Reward: 9  Episode Reward:  9
xxxxx
x.gax
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5448.51136733 -2383.80019164 -2195.83588096]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  5.17680804e+03 -3.22965309e-01  6.08302791e+02]
New Q values:  [ 7.64171987e+01  4.87942525e+03 -3.22965309e-01  6.08302791e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2621.96157078   372.98523174 -8896.20691497  9344.34009857]
------
Step:3, Action:North
State  216
Old Q Values:  [ 2621.96157078   372.98523174 -8896.20691497  9344.34009857]
New Q values:  [ 2512.01220227   372.98523174 -8896.20691497  9344.34009857]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.87942525e+03 -3.22965309e-01  6.08302791e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.87942525e+03 -3.22965309e-01  6.08302791e+02]
New Q values:  [ 7.64171987e+01  4.58410067e+03 -3.22965309e-01  6.08302791e+02]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x. ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2244.14586357 2963.08122883 1542.2004081  8776.43522765]
------
Step:5, Action:West
State  216
Old Q Values:  [ 2512.01220227   372.98523174 -8896.20691497  9344.34009857]
New Q values:  [ 2512.01220227   372.98523174 -8896.20691497  5935.21674457]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  7326.93568382 1133.32256943 1141.49622464]
------
Step:6, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.47372819e+03 7.31154903e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.93743577e+04 7.31154903e+03 2.91043938e+03]
Reward: 9  Episode Reward:  24
xxxxx
x.  x
x.g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 61264.88795115  3112.21020933]
------
Step:7, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549 21677.57620158 23157.07587497]
New Q values:  [  870.35122762  -168.92307549  9364.94642657 23157.07587497]
Reward: 9  Episode Reward:  33
xxxxx
x.  x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2295.05315313 -1609.84182322 -8192.20126966 -4090.17466168]
------
Step:8, Action:South
State  288
Old Q Values:  [ 2295.05315313 -1609.84182322 -8192.20126966 -4090.17466168]
New Q values:  [ 2295.05315313  -136.02078335 -8192.20126966 -4090.17466168]
Reward: -301  Episode Reward:  -268
xxxxx
x.  x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2295.05315313  -136.02078335 -8192.20126966 -4090.17466168]
------
Step:9, Action:North
State  288
Old Q Values:  [ 2295.05315313  -136.02078335 -8192.20126966 -4090.17466168]
New Q values:  [ 4668.36314047  -136.02078335 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  -269
xxxxx
x.g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12503.13959739  4664.50880297 -4228.04879148 -2010.47588559]
------
Step:10, Action:North
State  208
Old Q Values:  [12503.13959739  4664.50880297 -4228.04879148 -2010.47588559]
New Q values:  [ 6635.20924916  4664.50880297 -4228.04879148 -2010.47588559]
Reward: -1  Episode Reward:  -270
xxxxx
xg ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5448.51136733 -2383.80019164 -2195.83588096]
------
Step:11, Action:South
State  130
Old Q Values:  [ 34597.52013254   2568.90307643   -180.00807518 108397.98669784]
New Q values:  [ 34597.52013254   3017.52400532   -180.00807518 108397.98669784]
Reward: -1  Episode Reward:  -271
xxxxx
x.  x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6635.20924916  4664.50880297 -4228.04879148 -2010.47588559]
------
Step:12, Action:North
State  208
Old Q Values:  [ 6635.20924916  4664.50880297 -4228.04879148 -2010.47588559]
New Q values:  [ 4028.71389973  4664.50880297 -4228.04879148 -2010.47588559]
Reward: -1  Episode Reward:  -272
xxxxx
x. ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.58410067e+03 -3.22965309e-01  6.08302791e+02]
------
Step:13, Action:South
State  136
Old Q Values:  [ -170.77177351  5448.51136733 -2383.80019164 -2195.83588096]
New Q values:  [ -170.77177351  3578.15718783 -2383.80019164 -2195.83588096]
Reward: -1  Episode Reward:  -273
xxxxx
x.g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4028.71389973  4664.50880297 -4228.04879148 -2010.47588559]
------
Step:14, Action:South
State  208
Old Q Values:  [ 4028.71389973  4664.50880297 -4228.04879148 -2010.47588559]
New Q values:  [ 4028.71389973  3265.71246333 -4228.04879148 -2010.47588559]
Reward: -1  Episode Reward:  -274
xxxxx
x.  x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4668.36314047  -136.02078335 -8192.20126966 -4090.17466168]
------
Step:15, Action:North
State  288
Old Q Values:  [ 4668.36314047  -136.02078335 -8192.20126966 -4090.17466168]
New Q values:  [ 4499.67582448  -136.02078335 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  -275
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2244.14586357 2963.08122883 1542.2004081  8776.43522765]
------
Step:16, Action:West
State  210
Old Q Values:  [2244.14586357 2963.08122883 1542.2004081  8776.43522765]
New Q values:  [2244.14586357 2963.08122883 1542.2004081  4775.80438276]
Reward: -1  Episode Reward:  -276
xxxxx
x.  x
x.a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  4.21943431e+03  1.20371620e+03]
------
Step:17, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.92643098e+03  4.21943431e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.92643098e+03  3.11991504e+03  1.20371620e+03]
Reward: -1  Episode Reward:  -277
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2244.14586357 2963.08122883 1542.2004081  4775.80438276]
------
Step:18, Action:West
State  208
Old Q Values:  [ 4028.71389973  3265.71246333 -4228.04879148 -2010.47588559]
New Q values:  [ 4028.71389973  3265.71246333 -4228.04879148  -992.4830555 ]
Reward: -10001  Episode Reward:  -10278
xxxxx
x.  x
x.g x
x.  x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   311.04734264]
------
Step:1, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3151.99168992   493.25301724]
New Q values:  [-9594.56523706 -8069.05606225  3151.99168992   304.31833359]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  338.72375565   -8.57207238 -180.6       ]
------
Step:2, Action:South
State  108
Old Q Values:  [-8463.16477134  3074.39916879   492.53480369     0.        ]
New Q values:  [-8463.16477134  3341.96369955   492.53480369     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xg  x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -746.35376992  1811.65086161  7022.68010678 -4966.32149798]
------
Step:3, Action:East
State  188
Old Q Values:  [-6523.78898263  7141.69709761 11385.99275116     0.        ]
New Q values:  [-6523.78898263  7141.69709761  6757.87780561     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  7326.93568382 1133.32256943 1141.49622464]
------
Step:4, Action:South
State  198
Old Q Values:  [-2.78872080e-01 -7.91881263e+03  1.26155064e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -1.87193072e+03  1.26155064e+04  0.00000000e+00]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  4300.64776341  2702.42335933]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 61264.88795115  3112.21020933]
New Q values:  [-2527.46239811 -8521.23367799 25861.2579278   3112.21020933]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x  .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4499.67582448  -136.02078335 -8192.20126966 -4090.17466168]
------
Step:6, Action:North
State  288
Old Q Values:  [ 4499.67582448  -136.02078335 -8192.20126966 -4090.17466168]
New Q values:  [ 3825.8115434   -136.02078335 -8192.20126966 -4090.17466168]
Reward: 9  Episode Reward:  54
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2399.26765416 6735.13737869    0.          386.1281519 ]
------
Step:7, Action:North
State  216
Old Q Values:  [ 2512.01220227   372.98523174 -8896.20691497  5935.21674457]
New Q values:  [ 2379.43508098   372.98523174 -8896.20691497  5935.21674457]
Reward: -1  Episode Reward:  53
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.58410067e+03 -3.22965309e-01  6.08302791e+02]
------
Step:8, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  4.58410067e+03 -3.22965309e-01  6.08302791e+02]
New Q values:  [ 7.64171987e+01  4.58410067e+03 -3.22965309e-01  7.25523581e+02]
Reward: -1  Episode Reward:  52
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1609.34154747   429.84057838]
------
Step:9, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1609.34154747   429.84057838]
New Q values:  [ -281.736      -1150.91067548  2018.36681906   429.84057838]
Reward: -1  Episode Reward:  51
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.58410067e+03 -3.22965309e-01  7.25523581e+02]
------
Step:10, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.58410067e+03 -3.22965309e-01  7.25523581e+02]
New Q values:  [ 7.64171987e+01  3.61360529e+03 -3.22965309e-01  7.25523581e+02]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2379.43508098   372.98523174 -8896.20691497  5935.21674457]
------
Step:11, Action:North
State  216
Old Q Values:  [ 2379.43508098   372.98523174 -8896.20691497  5935.21674457]
New Q values:  [ 2024.62118874   372.98523174 -8896.20691497  5935.21674457]
Reward: -1  Episode Reward:  49
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3578.15718783 -2383.80019164 -2195.83588096]
------
Step:12, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.61360529e+03 -3.22965309e-01  7.25523581e+02]
New Q values:  [ 7.64171987e+01  3.22540714e+03 -3.22965309e-01  7.25523581e+02]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2024.62118874   372.98523174 -8896.20691497  5935.21674457]
------
Step:13, Action:North
State  216
Old Q Values:  [ 2024.62118874   372.98523174 -8896.20691497  5935.21674457]
New Q values:  [ 1882.69563184   372.98523174 -8896.20691497  5935.21674457]
Reward: -1  Episode Reward:  47
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3578.15718783 -2383.80019164 -2195.83588096]
------
Step:14, Action:South
State  136
Old Q Values:  [ -170.77177351  3578.15718783 -2383.80019164 -2195.83588096]
New Q values:  [ -170.77177351  3211.2278985  -2383.80019164 -2195.83588096]
Reward: -1  Episode Reward:  46
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1882.69563184   372.98523174 -8896.20691497  5935.21674457]
------
Step:15, Action:West
State  216
Old Q Values:  [ 1882.69563184   372.98523174 -8896.20691497  5935.21674457]
New Q values:  [ 1882.69563184   372.98523174 -8896.20691497  4571.56740298]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  7326.93568382 1133.32256943 1141.49622464]
------
Step:16, Action:South
State  200
Old Q Values:  [  62.8218634  7326.93568382 1133.32256943 1141.49622464]
New Q values:  [   62.8218634  10688.55165187  1133.32256943  1141.49622464]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 25861.2579278   3112.21020933]
------
Step:17, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 25861.2579278   3112.21020933]
New Q values:  [-2527.46239811 -8521.23367799 11491.64663414  3112.21020933]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3825.8115434   -136.02078335 -8192.20126966 -4090.17466168]
------
Step:18, Action:North
State  288
Old Q Values:  [ 3825.8115434   -136.02078335 -8192.20126966 -4090.17466168]
New Q values:  [ 2901.19483825  -136.02078335 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1882.69563184   372.98523174 -8896.20691497  4571.56740298]
------
Step:19, Action:North
State  218
Old Q Values:  [2399.26765416 6735.13737869    0.          386.1281519 ]
New Q values:  [1926.72920349 6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.22540714e+03 -3.22965309e-01  7.25523581e+02]
------
Step:20, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.22540714e+03 -3.22965309e-01  7.25523581e+02]
New Q values:  [ 7.64171987e+01  2.66103308e+03 -3.22965309e-01  7.25523581e+02]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1882.69563184   372.98523174 -8896.20691497  4571.56740298]
------
Step:21, Action:North
State  218
Old Q Values:  [1926.72920349 6735.13737869    0.          386.1281519 ]
New Q values:  [1568.40160439 6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.66103308e+03 -3.22965309e-01  7.25523581e+02]
------
Step:22, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.66103308e+03 -3.22965309e-01  7.25523581e+02]
New Q values:  [ 7.64171987e+01  2.43528345e+03 -3.22965309e-01  7.25523581e+02]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1882.69563184   372.98523174 -8896.20691497  4571.56740298]
------
Step:23, Action:North
State  216
Old Q Values:  [ 1882.69563184   372.98523174 -8896.20691497  4571.56740298]
New Q values:  [ 1483.0632882    372.98523174 -8896.20691497  4571.56740298]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.43528345e+03 -3.22965309e-01  7.25523581e+02]
------
Step:24, Action:South
State  136
Old Q Values:  [ -170.77177351  3211.2278985  -2383.80019164 -2195.83588096]
New Q values:  [ -170.77177351  2655.36138029 -2383.80019164 -2195.83588096]
Reward: -1  Episode Reward:  36
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1483.0632882    372.98523174 -8896.20691497  4571.56740298]
------
Step:25, Action:West
State  216
Old Q Values:  [ 1483.0632882    372.98523174 -8896.20691497  4571.56740298]
New Q values:  [ 1483.0632882    372.98523174 -8896.20691497  5034.59245675]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  10688.55165187  1133.32256943  1141.49622464]
------
Step:26, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  7.77686285e+03 -8.87652194e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  1.00572679e+04 -8.87652194e+03  2.00341972e+02]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  9364.94642657 23157.07587497]
------
Step:27, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  9364.94642657 23157.07587497]
New Q values:  [  870.35122762  -168.92307549  9364.94642657 84124.54371622]
Reward: 100009  Episode Reward:  100043
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211  1604.26298483]
------
Step:1, Action:West
State  121
Old Q Values:  [    0.             0.         -8868.36952603   311.04734264]
New Q values:  [    0.             0.         -8868.36952603   231.43606375]
Reward: 9  Episode Reward:  9
xxxxx
xa gx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  338.72375565   -8.57207238 -180.6       ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 2008.83507124  718.60978155 -120.29354603]
New Q values:  [-177.44732869 2621.99653294  718.60978155 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1081.00989687 6043.54168149 4144.33191217  -30.99112081]
------
Step:3, Action:South
State  189
Old Q Values:  [  64.81505849 1191.83911787 2464.37647091  154.04646645]
New Q values:  [  64.81505849 1695.48613864 2464.37647091  154.04646645]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4044.50163829   26.73544252  814.05362134  -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [4044.50163829   26.73544252  814.05362134  -35.88578819]
New Q values:  [3430.26315976   26.73544252  814.05362134  -35.88578819]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1081.00989687 6043.54168149 4144.33191217  -30.99112081]
------
Step:5, Action:South
State  189
Old Q Values:  [  64.81505849 1695.48613864 2464.37647091  154.04646645]
New Q values:  [  64.81505849 1706.67340338 2464.37647091  154.04646645]
Reward: -1  Episode Reward:  25
xxxxx
x  gx
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3430.26315976   26.73544252  814.05362134  -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [3430.26315976   26.73544252  814.05362134  -35.88578819]
New Q values:  [3184.56776835   26.73544252  814.05362134  -35.88578819]
Reward: -1  Episode Reward:  24
xxxxx
x  .x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1081.00989687 6043.54168149 4144.33191217  -30.99112081]
------
Step:7, Action:South
State  189
Old Q Values:  [  64.81505849 1706.67340338 2464.37647091  154.04646645]
New Q values:  [  64.81505849 1637.43969186 2464.37647091  154.04646645]
Reward: -1  Episode Reward:  23
xxxxx
x  .x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3184.56776835   26.73544252  814.05362134  -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [3184.56776835   26.73544252  814.05362134  -35.88578819]
New Q values:  [4400.49592307   26.73544252  814.05362134  -35.88578819]
Reward: -1  Episode Reward:  22
xxxxx
x  .x
xa. x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  877.23516594   849.14917943 10424.22938577     0.        ]
------
Step:9, Action:East
State  190
Old Q Values:  [ 1.04129094e+00 -5.70379540e+03  2.66795469e+02  0.00000000e+00]
New Q values:  [ 1.04129094e+00 -5.70379540e+03  3.89677010e+03  0.00000000e+00]
Reward: 9  Episode Reward:  31
xxxxx
x  .x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -1.87193072e+03  1.26155064e+04  0.00000000e+00]
------
Step:10, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -1.87193072e+03  1.26155064e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -1.87193072e+03  6.47834386e+03  0.00000000e+00]
Reward: -1  Episode Reward:  30
xxxxx
x  .x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2244.14586357 2963.08122883 1542.2004081  4775.80438276]
------
Step:11, Action:West
State  216
Old Q Values:  [ 1483.0632882    372.98523174 -8896.20691497  5034.59245675]
New Q values:  [ 1483.0632882    372.98523174 -8896.20691497 -2758.90767856]
Reward: -10001  Episode Reward:  -9971
xxxxx
x  .x
x g x
x ..x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4028.71389973  3265.71246333 -4228.04879148  -992.4830555 ]
------
Step:1, Action:North
State  216
Old Q Values:  [ 1483.0632882    372.98523174 -8896.20691497 -2758.90767856]
New Q values:  [ 1329.21035075   372.98523174 -8896.20691497 -2758.90767856]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.43528345e+03 -3.22965309e-01  7.25523581e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.43528345e+03 -3.22965309e-01  7.25523581e+02]
New Q values:  [ 7.64171987e+01  2.18212755e+03 -3.22965309e-01  7.25523581e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4028.71389973  3265.71246333 -4228.04879148  -992.4830555 ]
------
Step:3, Action:North
State  210
Old Q Values:  [2244.14586357 2963.08122883 1542.2004081  4775.80438276]
New Q values:  [1551.69661059 2963.08122883 1542.2004081  4775.80438276]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.18212755e+03 -3.22965309e-01  7.25523581e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.18212755e+03 -3.22965309e-01  7.25523581e+02]
New Q values:  [ 7.64171987e+01  2.30499234e+03 -3.22965309e-01  7.25523581e+02]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x. ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1551.69661059 2963.08122883 1542.2004081  4775.80438276]
------
Step:5, Action:West
State  210
Old Q Values:  [1551.69661059 2963.08122883 1542.2004081  4775.80438276]
New Q values:  [1551.69661059 2963.08122883 1542.2004081  2845.69626423]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
x.a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  3.11991504e+03  1.20371620e+03]
------
Step:6, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.92643098e+03  3.11991504e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.92643098e+03  2.13629038e+03  1.20371620e+03]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
x. ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1551.69661059 2963.08122883 1542.2004081  2845.69626423]
------
Step:7, Action:South
State  208
Old Q Values:  [ 4028.71389973  3265.71246333 -4228.04879148  -992.4830555 ]
New Q values:  [ 4028.71389973  2182.04343681 -4228.04879148  -992.4830555 ]
Reward: 9  Episode Reward:  13
xxxxx
x.. x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2901.19483825  -136.02078335 -8192.20126966 -4090.17466168]
------
Step:8, Action:North
State  288
Old Q Values:  [ 2901.19483825  -136.02078335 -8192.20126966 -4090.17466168]
New Q values:  [ 2048.80230395  -136.02078335 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  12
xxxxx
x.. x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1551.69661059 2963.08122883 1542.2004081  2845.69626423]
------
Step:9, Action:South
State  210
Old Q Values:  [1551.69661059 2963.08122883 1542.2004081  2845.69626423]
New Q values:  [1551.69661059 1799.27318272 1542.2004081  2845.69626423]
Reward: -1  Episode Reward:  11
xxxxx
x.. x
x.  x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2048.80230395  -136.02078335 -8192.20126966 -4090.17466168]
------
Step:10, Action:North
State  288
Old Q Values:  [ 2048.80230395  -136.02078335 -8192.20126966 -4090.17466168]
New Q values:  [ 1672.62980085  -136.02078335 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  10
xxxxx
x.. x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1551.69661059 1799.27318272 1542.2004081  2845.69626423]
------
Step:11, Action:West
State  208
Old Q Values:  [ 4028.71389973  2182.04343681 -4228.04879148  -992.4830555 ]
New Q values:  [ 4028.71389973  2182.04343681 -4228.04879148  5414.71407654]
Reward: -1  Episode Reward:  9
xxxxx
x.. x
xga x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.93743577e+04 7.31154903e+03 2.91043938e+03]
------
Step:12, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.93743577e+04 7.31154903e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.12026371e+04 7.31154903e+03 2.91043938e+03]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 11491.64663414  3112.21020933]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 11491.64663414  3112.21020933]
New Q values:  [-2527.46239811 -8521.23367799  5097.84759391  3112.21020933]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1672.62980085  -136.02078335 -8192.20126966 -4090.17466168]
------
Step:14, Action:North
State  288
Old Q Values:  [ 1672.62980085  -136.02078335 -8192.20126966 -4090.17466168]
New Q values:  [ 2292.8661433   -136.02078335 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  16
xxxxx
xg. x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4028.71389973  2182.04343681 -4228.04879148  5414.71407654]
------
Step:15, Action:West
State  208
Old Q Values:  [ 4028.71389973  2182.04343681 -4228.04879148  5414.71407654]
New Q values:  [ 4028.71389973  2182.04343681 -4228.04879148  5526.07674718]
Reward: -1  Episode Reward:  15
xxxxx
x.g x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.12026371e+04 7.31154903e+03 2.91043938e+03]
------
Step:16, Action:South
State  193
Old Q Values:  [-5922.26708831 11877.41833969 -5545.76189056  1099.96026581]
New Q values:  [-5922.26708831 29987.73045074 -5545.76189056  1099.96026581]
Reward: -1  Episode Reward:  14
xxxxx
x..gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  9364.94642657 84124.54371622]
------
Step:17, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5097.84759391  3112.21020933]
New Q values:  [-2527.46239811 -8521.23367799  5097.84759391 16106.59744996]
Reward: 9  Episode Reward:  23
xxxxx
x.g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[49521.04455409  2256.66526474  4520.89517899  1875.31501677]
------
Step:18, Action:North
State  257
Old Q Values:  [49521.04455409  2256.66526474  4520.89517899  1875.31501677]
New Q values:  [53135.81775164  2256.66526474  4520.89517899  1875.31501677]
Reward: 9  Episode Reward:  32
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[111073.33310002  22487.97851491   5558.62652977      0.        ]
------
Step:19, Action:North
State  177
Old Q Values:  [111073.33310002  22487.97851491   5558.62652977      0.        ]
New Q values:  [55441.03820116 22487.97851491  5558.62652977     0.        ]
Reward: 9  Episode Reward:  41
xxxxx
xag x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:NE
[    0.         36687.68320385     0.             0.        ]
------
Step:20, Action:South
State  103
Old Q Values:  [ 221.30610858 3419.96425008    5.4           0.        ]
New Q values:  [ 221.30610858 3180.44820448    5.4           0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x . x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1081.00989687 6043.54168149 4144.33191217  -30.99112081]
------
Step:21, Action:South
State  183
Old Q Values:  [  877.23516594   849.14917943 10424.22938577     0.        ]
New Q values:  [  877.23516594  1659.20844869 10424.22938577     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x . x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4400.49592307   26.73544252  814.05362134  -35.88578819]
------
Step:22, Action:North
State  261
Old Q Values:  [4400.49592307   26.73544252  814.05362134  -35.88578819]
New Q values:  [4886.86718496   26.73544252  814.05362134  -35.88578819]
Reward: -1  Episode Reward:  38
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  877.23516594  1659.20844869 10424.22938577     0.        ]
------
Step:23, Action:East
State  181
Old Q Values:  [1081.00989687 6043.54168149 4144.33191217  -30.99112081]
New Q values:  [ 1081.00989687  6043.54168149 10653.45190009   -30.99112081]
Reward: -1  Episode Reward:  37
xxxxx
x . x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 29987.73045074 -5545.76189056  1099.96026581]
------
Step:24, Action:South
State  193
Old Q Values:  [-5922.26708831 29987.73045074 -5545.76189056  1099.96026581]
New Q values:  [-5922.26708831 37231.85529516 -5545.76189056  1099.96026581]
Reward: -1  Episode Reward:  36
xxxxx
x . x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  9364.94642657 84124.54371622]
------
Step:25, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  9364.94642657 84124.54371622]
New Q values:  [  870.35122762  -168.92307549  9364.94642657 35115.27764197]
Reward: -1  Episode Reward:  35
xxxxx
x .gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4886.86718496   26.73544252  814.05362134  -35.88578819]
------
Step:26, Action:North
State  261
Old Q Values:  [4886.86718496   26.73544252  814.05362134  -35.88578819]
New Q values:  [5150.18244401   26.73544252  814.05362134  -35.88578819]
Reward: -1  Episode Reward:  34
xxxxx
x . x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1081.00989687  6043.54168149 10653.45190009   -30.99112081]
------
Step:27, Action:East
State  181
Old Q Values:  [ 1081.00989687  6043.54168149 10653.45190009   -30.99112081]
New Q values:  [ 1081.00989687  6043.54168149 15430.33734858   -30.99112081]
Reward: -1  Episode Reward:  33
xxxxx
x .gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 37231.85529516 -5545.76189056  1099.96026581]
------
Step:28, Action:South
State  193
Old Q Values:  [-5922.26708831 37231.85529516 -5545.76189056  1099.96026581]
New Q values:  [-5922.26708831 25426.72541066 -5545.76189056  1099.96026581]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  9364.94642657 35115.27764197]
------
Step:29, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5097.84759391 16106.59744996]
New Q values:  [-2527.46239811 -8521.23367799  5097.84759391  7987.09371319]
Reward: -1  Episode Reward:  31
xxxxx
x . x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5150.18244401   26.73544252  814.05362134  -35.88578819]
------
Step:30, Action:North
State  261
Old Q Values:  [5150.18244401   26.73544252  814.05362134  -35.88578819]
New Q values:  [6688.57418218   26.73544252  814.05362134  -35.88578819]
Reward: -1  Episode Reward:  30
xxxxx
x . x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1081.00989687  6043.54168149 15430.33734858   -30.99112081]
------
Step:31, Action:East
State  181
Old Q Values:  [ 1081.00989687  6043.54168149 15430.33734858   -30.99112081]
New Q values:  [1081.00989687 6043.54168149 3532.326056    -30.99112081]
Reward: -10001  Episode Reward:  -9971
xxxxx
x . x
x g x
x   x
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2292.8661433   -136.02078335 -8192.20126966 -4090.17466168]
------
Step:1, Action:North
State  288
Old Q Values:  [ 2292.8661433   -136.02078335 -8192.20126966 -4090.17466168]
New Q values:  [ 1776.25533659  -136.02078335 -8192.20126966 -4090.17466168]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1551.69661059 1799.27318272 1542.2004081  2845.69626423]
------
Step:2, Action:West
State  210
Old Q Values:  [1551.69661059 1799.27318272 1542.2004081  2845.69626423]
New Q values:  [1551.69661059 1799.27318272 1542.2004081  8531.3768513 ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.a x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 24625.66115202  3238.99950964  1101.59744825]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831 25426.72541066 -5545.76189056  1099.96026581]
New Q values:  [-5922.26708831 20710.67345686 -5545.76189056  1099.96026581]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x. gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  9364.94642657 35115.27764197]
------
Step:4, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  9364.94642657 35115.27764197]
New Q values:  [  870.35122762  -168.92307549  9364.94642657 29986.25638228]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x.  x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[53135.81775164  2256.66526474  4520.89517899  1875.31501677]
------
Step:5, Action:North
State  261
Old Q Values:  [6688.57418218   26.73544252  814.05362134  -35.88578819]
New Q values:  [4493.89217732   26.73544252  814.05362134  -35.88578819]
Reward: 9  Episode Reward:  35
xxxxx
x...x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1081.00989687 6043.54168149 3532.326056    -30.99112081]
------
Step:6, Action:South
State  177
Old Q Values:  [55441.03820116 22487.97851491  5558.62652977     0.        ]
New Q values:  [55441.03820116 24935.33673146  5558.62652977     0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x..gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[53135.81775164  2256.66526474  4520.89517899  1875.31501677]
------
Step:7, Action:North
State  261
Old Q Values:  [4493.89217732   26.73544252  814.05362134  -35.88578819]
New Q values:  [3610.01937538   26.73544252  814.05362134  -35.88578819]
Reward: -1  Episode Reward:  33
xxxxx
x...x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1081.00989687 6043.54168149 3532.326056    -30.99112081]
------
Step:8, Action:South
State  181
Old Q Values:  [1081.00989687 6043.54168149 3532.326056    -30.99112081]
New Q values:  [1081.00989687 3499.82248521 3532.326056    -30.99112081]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3610.01937538   26.73544252  814.05362134  -35.88578819]
------
Step:9, Action:North
State  257
Old Q Values:  [53135.81775164  2256.66526474  4520.89517899  1875.31501677]
New Q values:  [37886.03856101  2256.66526474  4520.89517899  1875.31501677]
Reward: -1  Episode Reward:  31
xxxxx
x.g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[55441.03820116 24935.33673146  5558.62652977     0.        ]
------
Step:10, Action:North
State  181
Old Q Values:  [1081.00989687 3499.82248521 3532.326056    -30.99112081]
New Q values:  [ 675.92350432 3499.82248521 3532.326056    -30.99112081]
Reward: 9  Episode Reward:  40
xxxxx
xa.gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[  0.         793.73181857   0.           0.        ]
------
Step:11, Action:South
State  101
Old Q Values:  [  0.         793.73181857   0.           0.        ]
New Q values:  [   0.         1376.59054423    0.            0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 3499.82248521 3532.326056    -30.99112081]
------
Step:12, Action:East
State  180
Old Q Values:  [ -746.35376992  1811.65086161  7022.68010678 -4966.32149798]
New Q values:  [ -746.35376992  1811.65086161  6169.26315928 -4966.32149798]
Reward: -1  Episode Reward:  38
xxxxx
xg..x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.12026371e+04 7.31154903e+03 2.91043938e+03]
------
Step:13, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.12026371e+04 7.31154903e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 6.87658294e+03 7.31154903e+03 2.91043938e+03]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5097.84759391  7987.09371319]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5097.84759391  7987.09371319]
New Q values:  [-2527.46239811 -8521.23367799  5097.84759391  4626.06784416]
Reward: -1  Episode Reward:  36
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1279.74734295 -2735.46306511  4772.76786294 -2601.74710518]
------
Step:15, Action:East
State  261
Old Q Values:  [3610.01937538   26.73544252  814.05362134  -35.88578819]
New Q values:  [3610.01937538   26.73544252 1615.21577756  -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  4300.64776341  2702.42335933]
------
Step:16, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549  9364.94642657 29986.25638228]
New Q values:  [  870.35122762  -168.92307549  4278.2551716  29986.25638228]
Reward: -1  Episode Reward:  34
xxxxx
x .gx
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1776.25533659  -136.02078335 -8192.20126966 -4090.17466168]
------
Step:17, Action:North
State  288
Old Q Values:  [ 1776.25533659  -136.02078335 -8192.20126966 -4090.17466168]
New Q values:  [-3632.27484121  -136.02078335 -8192.20126966 -4090.17466168]
Reward: -10001  Episode Reward:  -9967
xxxxx
x ..x
x  gx
x   x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3632.27484121  -136.02078335 -8192.20126966 -4090.17466168]
------
Step:1, Action:South
State  288
Old Q Values:  [-3632.27484121  -136.02078335 -8192.20126966 -4090.17466168]
New Q values:  [-3632.27484121  -275.81454835 -8192.20126966 -4090.17466168]
Reward: -301  Episode Reward:  -301
xxxxx
x...x
x...x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3632.27484121  -275.81454835 -8192.20126966 -4090.17466168]
------
Step:2, Action:South
State  288
Old Q Values:  [-3632.27484121  -275.81454835 -8192.20126966 -4090.17466168]
New Q values:  [-3632.27484121  -373.67018384 -8192.20126966 -4090.17466168]
Reward: -301  Episode Reward:  -602
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3632.27484121  -373.67018384 -8192.20126966 -4090.17466168]
------
Step:3, Action:South
State  288
Old Q Values:  [-3632.27484121  -373.67018384 -8192.20126966 -4090.17466168]
New Q values:  [-3632.27484121 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: -10301  Episode Reward:  -10903
xxxxx
x...x
x...x
x. gx
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 3499.82248521 3532.326056    -30.99112081]
------
Step:1, Action:East
State  189
Old Q Values:  [  64.81505849 1637.43969186 2464.37647091  154.04646645]
New Q values:  [  64.81505849 1637.43969186 4008.33095893  154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.00572679e+04 -8.87652194e+03  2.00341972e+02]
------
Step:2, Action:South
State  196
Old Q Values:  [-2469.90645144  1114.39070335  4092.85112914   239.04887894]
New Q values:  [-2469.90645144  1741.35061036  4092.85112914   239.04887894]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  4300.64776341  2702.42335933]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5097.84759391  4626.06784416]
New Q values:  [-2527.46239811 -8521.23367799   954.8565852   4626.06784416]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
x g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3632.27484121 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:4, Action:North
State  288
Old Q Values:  [-3632.27484121 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [ 1111.90311891 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: 9  Episode Reward:  36
xxxxx
x. .x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1551.69661059 1799.27318272 1542.2004081  8531.3768513 ]
------
Step:5, Action:West
State  210
Old Q Values:  [1551.69661059 1799.27318272 1542.2004081  8531.3768513 ]
New Q values:  [1551.69661059 1799.27318272 1542.2004081  4289.88003321]
Reward: -1  Episode Reward:  35
xxxxx
x. .x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  2.13629038e+03  1.20371620e+03]
------
Step:6, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.87658294e+03 7.31154903e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 4.13785353e+03 7.31154903e+03 2.91043938e+03]
Reward: -1  Episode Reward:  34
xxxxx
x. .x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   954.8565852   4626.06784416]
------
Step:7, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  4300.64776341  2702.42335933]
New Q values:  [   16.82637525 -5807.06396197  4300.64776341  2518.19970261]
Reward: 9  Episode Reward:  43
xxxxx
xg .x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1279.74734295 -2735.46306511  4772.76786294 -2601.74710518]
------
Step:8, Action:East
State  257
Old Q Values:  [37886.03856101  2256.66526474  4520.89517899  1875.31501677]
New Q values:  [37886.03856101  2256.66526474  3195.57842484  1875.31501677]
Reward: -1  Episode Reward:  42
xxxxx
x.g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   954.8565852   4626.06784416]
------
Step:9, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4278.2551716  29986.25638228]
New Q values:  [  870.35122762  -168.92307549  4278.2551716  23359.71412122]
Reward: -1  Episode Reward:  41
xxxxx
x. gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[37886.03856101  2256.66526474  3195.57842484  1875.31501677]
------
Step:10, Action:North
State  257
Old Q Values:  [37886.03856101  2256.66526474  3195.57842484  1875.31501677]
New Q values:  [31786.12688475  2256.66526474  3195.57842484  1875.31501677]
Reward: -1  Episode Reward:  40
xxxxx
x. .x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[55441.03820116 24935.33673146  5558.62652977     0.        ]
------
Step:11, Action:North
State  177
Old Q Values:  [55441.03820116 24935.33673146  5558.62652977     0.        ]
New Q values:  [33188.12024162 24935.33673146  5558.62652977     0.        ]
Reward: 9  Episode Reward:  49
xxxxx
xa gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:NE
[    0.         36687.68320385     0.             0.        ]
------
Step:12, Action:South
State  103
Old Q Values:  [ 221.30610858 3180.44820448    5.4           0.        ]
New Q values:  [ 221.30610858 2331.27709859    5.4           0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x  .x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 3499.82248521 3532.326056    -30.99112081]
------
Step:13, Action:East
State  177
Old Q Values:  [33188.12024162 24935.33673146  5558.62652977     0.        ]
New Q values:  [33188.12024162 24935.33673146  8436.05264896     0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 20710.67345686 -5545.76189056  1099.96026581]
------
Step:14, Action:South
State  196
Old Q Values:  [-2469.90645144  1741.35061036  4092.85112914   239.04887894]
New Q values:  [-2469.90645144  1986.13457317  4092.85112914   239.04887894]
Reward: -1  Episode Reward:  46
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  4300.64776341  2518.19970261]
------
Step:15, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  4300.64776341  2518.19970261]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  2518.19970261]
Reward: -1  Episode Reward:  45
xxxxx
xg .x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1111.90311891 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:16, Action:North
State  288
Old Q Values:  [ 1111.90311891 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [ 2101.98427172 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  44
xxxxx
x  .x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4028.71389973  2182.04343681 -4228.04879148  5526.07674718]
------
Step:17, Action:West
State  208
Old Q Values:  [ 4028.71389973  2182.04343681 -4228.04879148  5526.07674718]
New Q values:  [ 4028.71389973  2182.04343681 -4228.04879148  3437.68603761]
Reward: -1  Episode Reward:  43
xxxxx
xg .x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1986.13457317  4092.85112914   239.04887894]
------
Step:18, Action:East
State  196
Old Q Values:  [-2469.90645144  1986.13457317  4092.85112914   239.04887894]
New Q values:  [-2469.90645144  1986.13457317  2845.15462157   239.04887894]
Reward: -1  Episode Reward:  42
xxxxx
x g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4028.71389973  2182.04343681 -4228.04879148  3437.68603761]
------
Step:19, Action:North
State  208
Old Q Values:  [ 4028.71389973  2182.04343681 -4228.04879148  3437.68603761]
New Q values:  [63662.13909354  2182.04343681 -4228.04879148  3437.68603761]
Reward: 90009  Episode Reward:  90051
xxxxx
x  gx
x   x
x   x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2621.99653294  718.60978155 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 2331.27709859    5.4           0.        ]
New Q values:  [ 221.30610858 1997.60865624    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 3499.82248521 3532.326056    -30.99112081]
------
Step:2, Action:East
State  181
Old Q Values:  [ 675.92350432 3499.82248521 3532.326056    -30.99112081]
New Q values:  [ 675.92350432 3499.82248521 4435.51079297  -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x a.x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.00572679e+04 -8.87652194e+03  2.00341972e+02]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144  1986.13457317  2845.15462157   239.04887894]
New Q values:  [-2469.90645144  1555.31374005  2845.15462157   239.04887894]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  2518.19970261]
------
Step:4, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4278.2551716  23359.71412122]
New Q values:  [  870.35122762  -168.92307549  4278.2551716  10432.2914611 ]
Reward: 9  Episode Reward:  36
xxxxx
x .gx
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3610.01937538   26.73544252 1615.21577756  -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [3610.01937538   26.73544252 1615.21577756  -35.88578819]
New Q values:  [2774.06098804   26.73544252 1615.21577756  -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x .gx
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 3499.82248521 4435.51079297  -30.99112081]
------
Step:6, Action:East
State  181
Old Q Values:  [ 675.92350432 3499.82248521 4435.51079297  -30.99112081]
New Q values:  [ 675.92350432 3499.82248521 7986.80635424  -30.99112081]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 20710.67345686 -5545.76189056  1099.96026581]
------
Step:7, Action:South
State  196
Old Q Values:  [-2469.90645144  1555.31374005  2845.15462157   239.04887894]
New Q values:  [-2469.90645144  1376.9854068   2845.15462157   239.04887894]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  2518.19970261]
------
Step:8, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  2518.19970261]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  1838.89817746]
Reward: -1  Episode Reward:  32
xxxxx
x g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2774.06098804   26.73544252 1615.21577756  -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [2774.06098804   26.73544252 1615.21577756  -35.88578819]
New Q values:  [3505.06630149   26.73544252 1615.21577756  -35.88578819]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 3499.82248521 7986.80635424  -30.99112081]
------
Step:10, Action:South
State  181
Old Q Values:  [ 675.92350432 3499.82248521 7986.80635424  -30.99112081]
New Q values:  [ 675.92350432 2450.84888453 7986.80635424  -30.99112081]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3505.06630149   26.73544252 1615.21577756  -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [3505.06630149   26.73544252 1615.21577756  -35.88578819]
New Q values:  [3797.46842687   26.73544252 1615.21577756  -35.88578819]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 2450.84888453 7986.80635424  -30.99112081]
------
Step:12, Action:South
State  181
Old Q Values:  [ 675.92350432 2450.84888453 7986.80635424  -30.99112081]
New Q values:  [ 675.92350432 2118.98008187 7986.80635424  -30.99112081]
Reward: -1  Episode Reward:  28
xxxxx
x g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3797.46842687   26.73544252 1615.21577756  -35.88578819]
------
Step:13, Action:North
State  260
Old Q Values:  [ 1279.74734295 -2735.46306511  4772.76786294 -2601.74710518]
New Q values:  [ 2362.07788497 -2735.46306511  4772.76786294 -2601.74710518]
Reward: -1  Episode Reward:  27
xxxxx
xg..x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -746.35376992  1811.65086161  6169.26315928 -4966.32149798]
------
Step:14, Action:East
State  180
Old Q Values:  [ -746.35376992  1811.65086161  6169.26315928 -4966.32149798]
New Q values:  [ -746.35376992  1811.65086161  3320.65165018 -4966.32149798]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1376.9854068   2845.15462157   239.04887894]
------
Step:15, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.13785353e+03 7.31154903e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 4.13785353e+03 2.20286613e+04 2.91043938e+03]
Reward: 9  Episode Reward:  35
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[63662.13909354  2182.04343681 -4228.04879148  3437.68603761]
------
Step:16, Action:North
State  208
Old Q Values:  [63662.13909354  2182.04343681 -4228.04879148  3437.68603761]
New Q values:  [33515.50917106  2182.04343681 -4228.04879148  3437.68603761]
Reward: 9  Episode Reward:  44
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792  6806.00740836 -8652.84       26817.51177882]
------
Step:17, Action:North
State  130
Old Q Values:  [ 34597.52013254   3017.52400532   -180.00807518 108397.98669784]
New Q values:  [ 46177.80406237   3017.52400532   -180.00807518 108397.98669784]
Reward: -301  Episode Reward:  -257
xxxxx
x .ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 46177.80406237   3017.52400532   -180.00807518 108397.98669784]
------
Step:18, Action:West
State  128
Old Q Values:  [11374.93691792  6806.00740836 -8652.84       26817.51177882]
New Q values:  [11374.93691792  6806.00740836 -8652.84       98017.3336304 ]
Reward: 90009  Episode Reward:  89752
xxxxx
x g x
x   x
x   x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  3151.99168992   304.31833359]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3151.99168992   304.31833359]
New Q values:  [-9594.56523706 -8069.05606225  2062.80509006   304.31833359]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2655.36138029 -2383.80019164 -2195.83588096]
------
Step:2, Action:South
State  136
Old Q Values:  [ -170.77177351  2655.36138029 -2383.80019164 -2195.83588096]
New Q values:  [ -170.77177351  1466.30765734 -2383.80019164 -2195.83588096]
Reward: 9  Episode Reward:  18
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1329.21035075   372.98523174 -8896.20691497 -2758.90767856]
------
Step:3, Action:North
State  216
Old Q Values:  [ 1329.21035075   372.98523174 -8896.20691497 -2758.90767856]
New Q values:  [ 1222.58184081   372.98523174 -8896.20691497 -2758.90767856]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.30499234e+03 -3.22965309e-01  7.25523581e+02]
------
Step:4, Action:South
State  136
Old Q Values:  [ -170.77177351  1466.30765734 -2383.80019164 -2195.83588096]
New Q values:  [ -170.77177351   952.69761518 -2383.80019164 -2195.83588096]
Reward: -1  Episode Reward:  16
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1222.58184081   372.98523174 -8896.20691497 -2758.90767856]
------
Step:5, Action:North
State  216
Old Q Values:  [ 1222.58184081   372.98523174 -8896.20691497 -2758.90767856]
New Q values:  [ 1179.93043684   372.98523174 -8896.20691497 -2758.90767856]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.30499234e+03 -3.22965309e-01  7.25523581e+02]
------
Step:6, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.30499234e+03 -3.22965309e-01  7.25523581e+02]
New Q values:  [ 7.64171987e+01  1.27537607e+03 -3.22965309e-01  7.25523581e+02]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1179.93043684   372.98523174 -8896.20691497 -2758.90767856]
------
Step:7, Action:North
State  216
Old Q Values:  [ 1179.93043684   372.98523174 -8896.20691497 -2758.90767856]
New Q values:  [  853.98499426   372.98523174 -8896.20691497 -2758.90767856]
Reward: -1  Episode Reward:  13
xxxxx
x  ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.27537607e+03 -3.22965309e-01  7.25523581e+02]
------
Step:8, Action:West
State  136
Old Q Values:  [ -170.77177351   952.69761518 -2383.80019164 -2195.83588096]
New Q values:  [ -170.77177351   952.69761518 -2383.80019164  -809.50353326]
Reward: -1  Episode Reward:  12
xxxxx
x agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   231.43606375]
------
Step:9, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211  1604.26298483]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211  1427.70415382]
Reward: -1  Episode Reward:  11
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2621.99653294  718.60978155 -120.29354603]
------
Step:10, Action:South
State  111
Old Q Values:  [-177.44732869 2621.99653294  718.60978155 -120.29354603]
New Q values:  [-177.44732869 4181.46742891  718.60978155 -120.29354603]
Reward: 9  Episode Reward:  20
xxxxx
x   x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  877.23516594  1659.20844869 10424.22938577     0.        ]
------
Step:11, Action:East
State  189
Old Q Values:  [  64.81505849 1637.43969186 4008.33095893  154.04646645]
New Q values:  [  64.81505849 1637.43969186 4625.91275414  154.04646645]
Reward: 9  Episode Reward:  29
xxxxx
x   x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.00572679e+04 -8.87652194e+03  2.00341972e+02]
------
Step:12, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  1.00572679e+04 -8.87652194e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  7.15799460e+03 -8.87652194e+03  2.00341972e+02]
Reward: 9  Episode Reward:  38
xxxxx
x  gx
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4278.2551716  10432.2914611 ]
------
Step:13, Action:West
State  277
Old Q Values:  [  1.64433      0.         -29.77444073 651.72608099]
New Q values:  [   1.64433       0.          -29.77444073 1405.33096046]
Reward: 9  Episode Reward:  47
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3797.46842687   26.73544252 1615.21577756  -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [3797.46842687   26.73544252 1615.21577756  -35.88578819]
New Q values:  [2906.16119699   26.73544252 1615.21577756  -35.88578819]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849 1637.43969186 4625.91275414  154.04646645]
------
Step:15, Action:South
State  191
Old Q Values:  [  3.06655861 206.84750324 650.32545056   0.        ]
New Q values:  [  3.06655861 953.9873604  650.32545056   0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2906.16119699   26.73544252 1615.21577756  -35.88578819]
------
Step:16, Action:North
State  261
Old Q Values:  [2906.16119699   26.73544252 1615.21577756  -35.88578819]
New Q values:  [2549.63830504   26.73544252 1615.21577756  -35.88578819]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849 1637.43969186 4625.91275414  154.04646645]
------
Step:17, Action:South
State  189
Old Q Values:  [  64.81505849 1637.43969186 4625.91275414  154.04646645]
New Q values:  [  64.81505849 1419.26736826 4625.91275414  154.04646645]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2549.63830504   26.73544252 1615.21577756  -35.88578819]
------
Step:18, Action:North
State  260
Old Q Values:  [ 2362.07788497 -2735.46306511  4772.76786294 -2601.74710518]
New Q values:  [ 3086.74028327 -2735.46306511  4772.76786294 -2601.74710518]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  7141.69709761  6757.87780561     0.        ]
------
Step:19, Action:South
State  189
Old Q Values:  [  64.81505849 1419.26736826 4625.91275414  154.04646645]
New Q values:  [  64.81505849 1331.99843881 4625.91275414  154.04646645]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2549.63830504   26.73544252 1615.21577756  -35.88578819]
------
Step:20, Action:North
State  260
Old Q Values:  [ 3086.74028327 -2735.46306511  4772.76786294 -2601.74710518]
New Q values:  [ 3376.60524259 -2735.46306511  4772.76786294 -2601.74710518]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  7141.69709761  6757.87780561     0.        ]
------
Step:21, Action:South
State  189
Old Q Values:  [  64.81505849 1331.99843881 4625.91275414  154.04646645]
New Q values:  [  64.81505849 1297.09086704 4625.91275414  154.04646645]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2549.63830504   26.73544252 1615.21577756  -35.88578819]
------
Step:22, Action:North
State  261
Old Q Values:  [2549.63830504   26.73544252 1615.21577756  -35.88578819]
New Q values:  [2407.02914826   26.73544252 1615.21577756  -35.88578819]
Reward: -1  Episode Reward:  38
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849 1297.09086704 4625.91275414  154.04646645]
------
Step:23, Action:East
State  189
Old Q Values:  [  64.81505849 1297.09086704 4625.91275414  154.04646645]
New Q values:  [  64.81505849 1297.09086704 1971.63799487  154.04646645]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         406.24297738   0.         198.38683706]
------
Step:24, Action:South
State  195
Old Q Values:  [   38.85388605 24625.66115202  3238.99950964  1101.59744825]
New Q values:  [   38.85388605 12979.35189914  3238.99950964  1101.59744825]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4278.2551716  10432.2914611 ]
------
Step:25, Action:West
State  277
Old Q Values:  [   1.64433       0.          -29.77444073 1405.33096046]
New Q values:  [   1.64433       0.          -29.77444073 1283.64112866]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2407.02914826   26.73544252 1615.21577756  -35.88578819]
------
Step:26, Action:North
State  261
Old Q Values:  [2407.02914826   26.73544252 1615.21577756  -35.88578819]
New Q values:  [1553.70305777   26.73544252 1615.21577756  -35.88578819]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849 1297.09086704 1971.63799487  154.04646645]
------
Step:27, Action:South
State  189
Old Q Values:  [  64.81505849 1297.09086704 1971.63799487  154.04646645]
New Q values:  [  64.81505849 1002.80108008 1971.63799487  154.04646645]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1553.70305777   26.73544252 1615.21577756  -35.88578819]
------
Step:28, Action:East
State  261
Old Q Values:  [1553.70305777   26.73544252 1615.21577756  -35.88578819]
New Q values:  [1553.70305777   26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  32
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.          -29.77444073 1283.64112866]
------
Step:29, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  1838.89817746]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  1201.07018831]
Reward: -1  Episode Reward:  31
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1553.70305777   26.73544252 1030.57864962  -35.88578819]
------
Step:30, Action:North
State  261
Old Q Values:  [1553.70305777   26.73544252 1030.57864962  -35.88578819]
New Q values:  [1212.37262157   26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849 1002.80108008 1971.63799487  154.04646645]
------
Step:31, Action:South
State  188
Old Q Values:  [-6523.78898263  7141.69709761  6757.87780561     0.        ]
New Q values:  [-6523.78898263  4287.90919792  6757.87780561     0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3376.60524259 -2735.46306511  4772.76786294 -2601.74710518]
------
Step:32, Action:East
State  260
Old Q Values:  [ 3376.60524259 -2735.46306511  4772.76786294 -2601.74710518]
New Q values:  [ 3376.60524259 -2735.46306511  2524.47615748 -2601.74710518]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  1201.07018831]
------
Step:33, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   954.8565852   4626.06784416]
New Q values:  [-2527.46239811 -8521.23367799 61017.9379156   4626.06784416]
Reward: 100009  Episode Reward:  100037
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1212.37262157   26.73544252 1030.57864962  -35.88578819]
------
Step:1, Action:North
State  260
Old Q Values:  [ 3376.60524259 -2735.46306511  2524.47615748 -2601.74710518]
New Q values:  [ 2400.76322987 -2735.46306511  2524.47615748 -2601.74710518]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3482.40377611     0.        ]
------
Step:2, Action:East
State  180
Old Q Values:  [ -746.35376992  1811.65086161  3320.65165018 -4966.32149798]
New Q values:  [ -746.35376992  1811.65086161  7942.25906201 -4966.32149798]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.13785353e+03 2.20286613e+04 2.91043938e+03]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.13785353e+03 2.20286613e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 4.13785353e+03 1.88715173e+04 2.91043938e+03]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[33515.50917106  2182.04343681 -4228.04879148  3437.68603761]
------
Step:4, Action:North
State  208
Old Q Values:  [33515.50917106  2182.04343681 -4228.04879148  3437.68603761]
New Q values:  [45930.99967778  2182.04343681 -4228.04879148  3437.68603761]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
xg  x
x  .x
xxxxx
Step:5, Action:South
State  128
Old Q Values:  [11374.93691792  6806.00740836 -8652.84       98017.3336304 ]
New Q values:  [11374.93691792 16501.10286668 -8652.84       98017.3336304 ]
Reward: -1  Episode Reward:  35
xxxxx
xg. x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[45930.99967778  2182.04343681 -4228.04879148  3437.68603761]
------
Step:6, Action:North
State  208
Old Q Values:  [45930.99967778  2182.04343681 -4228.04879148  3437.68603761]
New Q values:  [47776.99996023  2182.04343681 -4228.04879148  3437.68603761]
Reward: -1  Episode Reward:  34
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 16501.10286668 -8652.84       98017.3336304 ]
------
Step:7, Action:South
State  128
Old Q Values:  [11374.93691792 16501.10286668 -8652.84       98017.3336304 ]
New Q values:  [11374.93691792 20932.94113474 -8652.84       98017.3336304 ]
Reward: -1  Episode Reward:  33
xxxxx
x..gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[47776.99996023  2182.04343681 -4228.04879148  3437.68603761]
------
Step:8, Action:West
State  208
Old Q Values:  [47776.99996023  2182.04343681 -4228.04879148  3437.68603761]
New Q values:  [47776.99996023  2182.04343681 -4228.04879148  7035.92960122]
Reward: -1  Episode Reward:  32
xxxxx
x.g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.13785353e+03 1.88715173e+04 2.91043938e+03]
------
Step:9, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.13785353e+03 1.88715173e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 4.13785353e+03 2.18811069e+04 2.91043938e+03]
Reward: -1  Episode Reward:  31
xxxxx
xg. x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[47776.99996023  2182.04343681 -4228.04879148  7035.92960122]
------
Step:10, Action:North
State  208
Old Q Values:  [47776.99996023  2182.04343681 -4228.04879148  7035.92960122]
New Q values:  [48515.40007321  2182.04343681 -4228.04879148  7035.92960122]
Reward: -1  Episode Reward:  30
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 20932.94113474 -8652.84       98017.3336304 ]
------
Step:11, Action:South
State  128
Old Q Values:  [11374.93691792 20932.94113474 -8652.84       98017.3336304 ]
New Q values:  [11374.93691792 22927.19647586 -8652.84       98017.3336304 ]
Reward: -1  Episode Reward:  29
xxxxx
xg. x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[48515.40007321  2182.04343681 -4228.04879148  7035.92960122]
------
Step:12, Action:North
State  208
Old Q Values:  [48515.40007321  2182.04343681 -4228.04879148  7035.92960122]
New Q values:  [48810.76011841  2182.04343681 -4228.04879148  7035.92960122]
Reward: -1  Episode Reward:  28
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 22927.19647586 -8652.84       98017.3336304 ]
------
Step:13, Action:South
State  130
Old Q Values:  [ 46177.80406237   3017.52400532   -180.00807518 108397.98669784]
New Q values:  [ 46177.80406237  15849.63763765   -180.00807518 108397.98669784]
Reward: -1  Episode Reward:  27
xxxxx
x.. x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[48810.76011841  2182.04343681 -4228.04879148  7035.92960122]
------
Step:14, Action:North
State  208
Old Q Values:  [48810.76011841  2182.04343681 -4228.04879148  7035.92960122]
New Q values:  [52043.10005671  2182.04343681 -4228.04879148  7035.92960122]
Reward: -1  Episode Reward:  26
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 46177.80406237  15849.63763765   -180.00807518 108397.98669784]
------
Step:15, Action:West
State  136
Old Q Values:  [ -170.77177351   952.69761518 -2383.80019164  -809.50353326]
New Q values:  [ -170.77177351   952.69761518 -2383.80019164   223.59316872]
Reward: 9  Episode Reward:  35
xxxxx
xga x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 1806.64860675  963.6944397 ]
------
Step:16, Action:East
State  112
Old Q Values:  [     0.           3629.92591876   6789.02994987 110949.76306292]
New Q values:  [     0.           3629.92591876  32120.21206907 110949.76306292]
Reward: -1  Episode Reward:  34
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 22927.19647586 -8652.84       98017.3336304 ]
------
Step:17, Action:South
State  136
Old Q Values:  [ -170.77177351   952.69761518 -2383.80019164   223.59316872]
New Q values:  [ -170.77177351   636.67454435 -2383.80019164   223.59316872]
Reward: -1  Episode Reward:  33
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  853.98499426   372.98523174 -8896.20691497 -2758.90767856]
------
Step:18, Action:North
State  208
Old Q Values:  [52043.10005671  2182.04343681 -4228.04879148  7035.92960122]
New Q values:  [50221.84011181  2182.04343681 -4228.04879148  7035.92960122]
Reward: -1  Episode Reward:  32
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 22927.19647586 -8652.84       98017.3336304 ]
------
Step:19, Action:South
State  136
Old Q Values:  [ -170.77177351   636.67454435 -2383.80019164   223.59316872]
New Q values:  [ -170.77177351   510.26531602 -2383.80019164   223.59316872]
Reward: -1  Episode Reward:  31
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  853.98499426   372.98523174 -8896.20691497 -2758.90767856]
------
Step:20, Action:North
State  208
Old Q Values:  [50221.84011181  2182.04343681 -4228.04879148  7035.92960122]
New Q values:  [49493.33613384  2182.04343681 -4228.04879148  7035.92960122]
Reward: -1  Episode Reward:  30
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 22927.19647586 -8652.84       98017.3336304 ]
------
Step:21, Action:South
State  136
Old Q Values:  [ -170.77177351   510.26531602 -2383.80019164   223.59316872]
New Q values:  [ -170.77177351   459.70162468 -2383.80019164   223.59316872]
Reward: -1  Episode Reward:  29
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  853.98499426   372.98523174 -8896.20691497 -2758.90767856]
------
Step:22, Action:North
State  208
Old Q Values:  [49493.33613384  2182.04343681 -4228.04879148  7035.92960122]
New Q values:  [52316.13046289  2182.04343681 -4228.04879148  7035.92960122]
Reward: -1  Episode Reward:  28
xxxxx
x. ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 46177.80406237  15849.63763765   -180.00807518 108397.98669784]
------
Step:23, Action:West
State  130
Old Q Values:  [ 46177.80406237  15849.63763765   -180.00807518 108397.98669784]
New Q values:  [46177.80406237 15849.63763765  -180.00807518 65202.11110159]
Reward: -1  Episode Reward:  27
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  33009.11692778 72811.72140818]
------
Step:24, Action:West
State  126
Old Q Values:  [   0.          331.64678262 2473.27977453 1134.18629274]
New Q values:  [   0.          331.64678262 2473.27977453 1713.51474577]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4181.46742891  718.60978155 -120.29354603]
------
Step:25, Action:South
State  110
Old Q Values:  [ -239.29051573 -7257.27397003   365.96192905  -180.6       ]
New Q values:  [ -239.29051573 -1734.4785591    365.96192905  -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -5.70379540e+03  3.89677010e+03  0.00000000e+00]
------
Step:26, Action:East
State  190
Old Q Values:  [ 1.04129094e+00 -5.70379540e+03  3.89677010e+03  0.00000000e+00]
New Q values:  [ 1.04129094e+00 -5.70379540e+03  1.79487036e+03  0.00000000e+00]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[  0.         789.20773489 599.85796923   0.        ]
------
Step:27, Action:South
State  206
Old Q Values:  [  0.         789.20773489 599.85796923   0.        ]
New Q values:  [    0.         -5068.94789373   599.85796923     0.        ]
Reward: -10001  Episode Reward:  -9967
xxxxx
x   x
x   x
x g.x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 61017.9379156   4626.06784416]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 61017.9379156   4626.06784416]
New Q values:  [-2527.46239811 -8521.23367799 25043.17044775  4626.06784416]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2101.98427172 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:2, Action:North
State  288
Old Q Values:  [ 2101.98427172 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [ 2133.15771865 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1551.69661059 1799.27318272 1542.2004081  4289.88003321]
------
Step:3, Action:West
State  210
Old Q Values:  [1551.69661059 1799.27318272 1542.2004081  4289.88003321]
New Q values:  [1551.69661059 1799.27318272 1542.2004081  2599.28130597]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  2.13629038e+03  1.20371620e+03]
------
Step:4, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.13785353e+03 2.18811069e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 4.13785353e+03 2.44466819e+04 2.91043938e+03]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[52316.13046289  2182.04343681 -4228.04879148  7035.92960122]
------
Step:5, Action:North
State  208
Old Q Values:  [52316.13046289  2182.04343681 -4228.04879148  7035.92960122]
New Q values:  [21069.76267256  2182.04343681 -4228.04879148  7035.92960122]
Reward: 9  Episode Reward:  35
xxxxx
x gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   459.70162468 -2383.80019164   223.59316872]
------
Step:6, Action:South
State  136
Old Q Values:  [ -170.77177351   459.70162468 -2383.80019164   223.59316872]
New Q values:  [ -170.77177351  6504.20945164 -2383.80019164   223.59316872]
Reward: -1  Episode Reward:  34
xxxxx
x .gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21069.76267256  2182.04343681 -4228.04879148  7035.92960122]
------
Step:7, Action:West
State  208
Old Q Values:  [21069.76267256  2182.04343681 -4228.04879148  7035.92960122]
New Q values:  [21069.76267256  2182.04343681 -4228.04879148 10147.7764105 ]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.13785353e+03 2.44466819e+04 2.91043938e+03]
------
Step:8, Action:East
State  193
Old Q Values:  [-5922.26708831 20710.67345686 -5545.76189056  1099.96026581]
New Q values:  [-5922.26708831 20710.67345686  4102.02404555  1099.96026581]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21069.76267256  2182.04343681 -4228.04879148 10147.7764105 ]
------
Step:9, Action:West
State  208
Old Q Values:  [21069.76267256  2182.04343681 -4228.04879148 10147.7764105 ]
New Q values:  [21069.76267256  2182.04343681 -4228.04879148 10271.71260126]
Reward: -1  Episode Reward:  31
xxxxx
x . x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 20710.67345686  4102.02404555  1099.96026581]
------
Step:10, Action:South
State  193
Old Q Values:  [-5922.26708831 20710.67345686  4102.02404555  1099.96026581]
New Q values:  [-5922.26708831 11413.35682107  4102.02404555  1099.96026581]
Reward: -1  Episode Reward:  30
xxxxx
x .gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4278.2551716  10432.2914611 ]
------
Step:11, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4278.2551716  10432.2914611 ]
New Q values:  [  870.35122762  -168.92307549  4278.2551716  13714.15464987]
Reward: 9  Episode Reward:  39
xxxxx
x . x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[31786.12688475  2256.66526474  3195.57842484  1875.31501677]
------
Step:12, Action:North
State  261
Old Q Values:  [1212.37262157   26.73544252 1030.57864962  -35.88578819]
New Q values:  [2886.3909549    26.73544252 1030.57864962  -35.88578819]
Reward: 9  Episode Reward:  48
xxxxx
x . x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 2118.98008187 7986.80635424  -30.99112081]
------
Step:13, Action:South
State  183
Old Q Values:  [  877.23516594  1659.20844869 10424.22938577     0.        ]
New Q values:  [  877.23516594  1529.00066595 10424.22938577     0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x . x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2886.3909549    26.73544252 1030.57864962  -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [2886.3909549    26.73544252 1030.57864962  -35.88578819]
New Q values:  [4281.22519769   26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  46
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  877.23516594  1529.00066595 10424.22938577     0.        ]
------
Step:15, Action:East
State  181
Old Q Values:  [ 675.92350432 2118.98008187 7986.80635424  -30.99112081]
New Q values:  [ 675.92350432 2118.98008187 6618.12958802  -30.99112081]
Reward: -1  Episode Reward:  45
xxxxx
x . x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 11413.35682107  4102.02404555  1099.96026581]
------
Step:16, Action:South
State  192
Old Q Values:  [3.89777037e-01 4.13785353e+03 2.44466819e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 9.16749255e+03 2.44466819e+04 2.91043938e+03]
Reward: -1  Episode Reward:  44
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 25043.17044775  4626.06784416]
------
Step:17, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 25043.17044775  4626.06784416]
New Q values:  [-2527.46239811 -8521.23367799 10656.6154947   4626.06784416]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2133.15771865 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:18, Action:North
State  288
Old Q Values:  [ 2133.15771865 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [ 7173.59188923 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  42
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21069.76267256  2182.04343681 -4228.04879148 10271.71260126]
------
Step:19, Action:West
State  208
Old Q Values:  [21069.76267256  2182.04343681 -4228.04879148 10271.71260126]
New Q values:  [21069.76267256  2182.04343681 -4228.04879148 11442.08961052]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.16749255e+03 2.44466819e+04 2.91043938e+03]
------
Step:20, Action:East
State  192
Old Q Values:  [3.89777037e-01 9.16749255e+03 2.44466819e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 9.16749255e+03 1.60990016e+04 2.91043938e+03]
Reward: -1  Episode Reward:  40
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21069.76267256  2182.04343681 -4228.04879148 11442.08961052]
------
Step:21, Action:North
State  208
Old Q Values:  [21069.76267256  2182.04343681 -4228.04879148 11442.08961052]
New Q values:  [27987.9383995   2182.04343681 -4228.04879148 11442.08961052]
Reward: -1  Episode Reward:  39
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 15849.63763765  -180.00807518 65202.11110159]
------
Step:22, Action:West
State  128
Old Q Values:  [11374.93691792 22927.19647586 -8652.84       98017.3336304 ]
New Q values:  [ 11374.93691792  22927.19647586  -8652.84       132497.26237104]
Reward: 100009  Episode Reward:  100048
xxxxx
xga x
x   x
x   x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4278.2551716  13714.15464987]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10656.6154947   4626.06784416]
New Q values:  [-2527.46239811 -8521.23367799 10656.6154947   3140.19469697]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4281.22519769   26.73544252 1030.57864962  -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [4281.22519769   26.73544252 1030.57864962  -35.88578819]
New Q values:  [3703.32895548   26.73544252 1030.57864962  -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 2118.98008187 6618.12958802  -30.99112081]
------
Step:3, Action:East
State  177
Old Q Values:  [33188.12024162 24935.33673146  8436.05264896     0.        ]
New Q values:  [33188.12024162 24935.33673146  6803.82810591     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 11413.35682107  4102.02404555  1099.96026581]
------
Step:4, Action:South
State  192
Old Q Values:  [3.89777037e-01 9.16749255e+03 1.60990016e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 6.86338167e+03 1.60990016e+04 2.91043938e+03]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 10656.6154947   3140.19469697]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10656.6154947   3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  6420.12376465  3140.19469697]
Reward: 9  Episode Reward:  35
xxxxx
x.g x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7173.59188923 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:6, Action:North
State  288
Old Q Values:  [ 7173.59188923 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [11271.21827554 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: 9  Episode Reward:  44
xxxxx
x.. x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27987.9383995   2182.04343681 -4228.04879148 11442.08961052]
------
Step:7, Action:North
State  210
Old Q Values:  [1551.69661059 1799.27318272 1542.2004081  2599.28130597]
New Q values:  [20180.71197471  1799.27318272  1542.2004081   2599.28130597]
Reward: -1  Episode Reward:  43
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 15849.63763765  -180.00807518 65202.11110159]
------
Step:8, Action:West
State  130
Old Q Values:  [46177.80406237 15849.63763765  -180.00807518 65202.11110159]
New Q values:  [46177.80406237 15849.63763765  -180.00807518 47929.76086309]
Reward: 9  Episode Reward:  52
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  33009.11692778 72811.72140818]
------
Step:9, Action:West
State  114
Old Q Values:  [ -180.6         3557.6642036  33009.11692778 72811.72140818]
New Q values:  [  -180.6          3557.6642036   33009.11692778 108840.22747196]
Reward: 100009  Episode Reward:  100061
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3703.32895548   26.73544252 1030.57864962  -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [3703.32895548   26.73544252 1030.57864962  -35.88578819]
New Q values:  [3472.1704586    26.73544252 1030.57864962  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 2118.98008187 6618.12958802  -30.99112081]
------
Step:2, Action:East
State  181
Old Q Values:  [ 675.92350432 2118.98008187 6618.12958802  -30.99112081]
New Q values:  [ 675.92350432 2118.98008187 6076.65888153  -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 11413.35682107  4102.02404555  1099.96026581]
------
Step:3, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.86338167e+03 1.60990016e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 3.36672168e+03 1.60990016e+04 2.91043938e+03]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  1201.07018831]
------
Step:4, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549  4278.2551716  13714.15464987]
New Q values:  [  870.35122762  -168.92307549  5098.0675513  13714.15464987]
Reward: 9  Episode Reward:  36
xxxxx
x..gx
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11271.21827554 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:5, Action:North
State  288
Old Q Values:  [11271.21827554 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [ 6910.26883007 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: -9991  Episode Reward:  -9955
xxxxx
x.. x
x  gx
x   x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4181.46742891  718.60978155 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 1997.60865624    5.4           0.        ]
New Q values:  [ 221.30610858 2627.44112695    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 2118.98008187 6076.65888153  -30.99112081]
------
Step:2, Action:South
State  181
Old Q Values:  [ 675.92350432 2118.98008187 6076.65888153  -30.99112081]
New Q values:  [ 675.92350432 1894.64317033 6076.65888153  -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3472.1704586    26.73544252 1030.57864962  -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [3472.1704586    26.73544252 1030.57864962  -35.88578819]
New Q values:  [3211.2658479    26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 1894.64317033 6076.65888153  -30.99112081]
------
Step:4, Action:South
State  183
Old Q Values:  [  877.23516594  1529.00066595 10424.22938577     0.        ]
New Q values:  [  877.23516594  1574.38002075 10424.22938577     0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x . x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3211.2658479    26.73544252 1030.57864962  -35.88578819]
------
Step:5, Action:North
State  260
Old Q Values:  [ 2400.76322987 -2735.46306511  2524.47615748 -2601.74710518]
New Q values:  [ 2004.42642478 -2735.46306511  2524.47615748 -2601.74710518]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xa. x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3482.40377611     0.        ]
------
Step:6, Action:East
State  183
Old Q Values:  [  877.23516594  1574.38002075 10424.22938577     0.        ]
New Q values:  [ 877.23516594 1574.38002075 5053.021047      0.        ]
Reward: 9  Episode Reward:  24
xxxxx
x ..x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  2.13629038e+03  1.20371620e+03]
------
Step:7, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.92643098e+03  2.13629038e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.92643098e+03  6.90812975e+03  1.20371620e+03]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[20180.71197471  1799.27318272  1542.2004081   2599.28130597]
------
Step:8, Action:North
State  216
Old Q Values:  [  853.98499426   372.98523174 -8896.20691497 -2758.90767856]
New Q values:  [  729.60681722   372.98523174 -8896.20691497 -2758.90767856]
Reward: 9  Episode Reward:  32
xxxxx
x .ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.27537607e+03 -3.22965309e-01  7.25523581e+02]
------
Step:9, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.27537607e+03 -3.22965309e-01  7.25523581e+02]
New Q values:  [ 7.64171987e+01  2.53009164e+03 -3.22965309e-01  7.25523581e+02]
Reward: -1  Episode Reward:  31
xxxxx
x . x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1568.40160439 6735.13737869    0.          386.1281519 ]
------
Step:10, Action:South
State  208
Old Q Values:  [27987.9383995   2182.04343681 -4228.04879148 11442.08961052]
New Q values:  [27987.9383995   2951.29802374 -4228.04879148 11442.08961052]
Reward: 9  Episode Reward:  40
xxxxx
x . x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6910.26883007 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:11, Action:North
State  288
Old Q Values:  [ 6910.26883007 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [11159.88905188 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  39
xxxxx
xg. x
x  ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27987.9383995   2951.29802374 -4228.04879148 11442.08961052]
------
Step:12, Action:North
State  208
Old Q Values:  [27987.9383995   2951.29802374 -4228.04879148 11442.08961052]
New Q values:  [25573.50361873  2951.29802374 -4228.04879148 11442.08961052]
Reward: -1  Episode Reward:  38
xxxxx
x .ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 15849.63763765  -180.00807518 47929.76086309]
------
Step:13, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.53009164e+03 -3.22965309e-01  7.25523581e+02]
New Q values:  [ 7.64171987e+01  2.53009164e+03 -3.22965309e-01  9.01119478e+02]
Reward: 9  Episode Reward:  47
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2018.36681906   429.84057838]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2018.36681906   429.84057838]
New Q values:  [ -281.736      -1150.91067548  1565.77421951   429.84057838]
Reward: -1  Episode Reward:  46
xxxxx
x  ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.53009164e+03 -3.22965309e-01  9.01119478e+02]
------
Step:15, Action:South
State  136
Old Q Values:  [ -170.77177351  6504.20945164 -2383.80019164   223.59316872]
New Q values:  [ -170.77177351  2819.96582582 -2383.80019164   223.59316872]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
x  ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  729.60681722   372.98523174 -8896.20691497 -2758.90767856]
------
Step:16, Action:North
State  216
Old Q Values:  [  729.60681722   372.98523174 -8896.20691497 -2758.90767856]
New Q values:  [ 1137.23247464   372.98523174 -8896.20691497 -2758.90767856]
Reward: -1  Episode Reward:  44
xxxxx
x gax
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2819.96582582 -2383.80019164   223.59316872]
------
Step:17, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.53009164e+03 -3.22965309e-01  9.01119478e+02]
New Q values:  [ 7.64171987e+01  1.35260640e+03 -3.22965309e-01  9.01119478e+02]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1137.23247464   372.98523174 -8896.20691497 -2758.90767856]
------
Step:18, Action:North
State  216
Old Q Values:  [ 1137.23247464   372.98523174 -8896.20691497 -2758.90767856]
New Q values:  [  860.07490933   372.98523174 -8896.20691497 -2758.90767856]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.35260640e+03 -3.22965309e-01  9.01119478e+02]
------
Step:19, Action:South
State  136
Old Q Values:  [ -170.77177351  2819.96582582 -2383.80019164   223.59316872]
New Q values:  [ -170.77177351  1385.40880313 -2383.80019164   223.59316872]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x  ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  860.07490933   372.98523174 -8896.20691497 -2758.90767856]
------
Step:20, Action:North
State  216
Old Q Values:  [  860.07490933   372.98523174 -8896.20691497 -2758.90767856]
New Q values:  [  749.2118832    372.98523174 -8896.20691497 -2758.90767856]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.35260640e+03 -3.22965309e-01  9.01119478e+02]
------
Step:21, Action:South
State  136
Old Q Values:  [ -170.77177351  1385.40880313 -2383.80019164   223.59316872]
New Q values:  [ -170.77177351   778.32708621 -2383.80019164   223.59316872]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
x  ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  749.2118832    372.98523174 -8896.20691497 -2758.90767856]
------
Step:22, Action:North
State  216
Old Q Values:  [  749.2118832    372.98523174 -8896.20691497 -2758.90767856]
New Q values:  [  704.86667276   372.98523174 -8896.20691497 -2758.90767856]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.35260640e+03 -3.22965309e-01  9.01119478e+02]
------
Step:23, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.35260640e+03 -3.22965309e-01  9.01119478e+02]
New Q values:  [ 7.64171987e+01  2.56098377e+03 -3.22965309e-01  9.01119478e+02]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1568.40160439 6735.13737869    0.          386.1281519 ]
------
Step:24, Action:South
State  210
Old Q Values:  [20180.71197471  1799.27318272  1542.2004081   2599.28130597]
New Q values:  [20180.71197471  4067.07598865  1542.2004081   2599.28130597]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11159.88905188 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:25, Action:North
State  288
Old Q Values:  [11159.88905188 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [ 6483.89683436 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1568.40160439 6735.13737869    0.          386.1281519 ]
------
Step:26, Action:North
State  210
Old Q Values:  [20180.71197471  4067.07598865  1542.2004081   2599.28130597]
New Q values:  [22450.61304881  4067.07598865  1542.2004081   2599.28130597]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 15849.63763765  -180.00807518 47929.76086309]
------
Step:27, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.56098377e+03 -3.22965309e-01  9.01119478e+02]
New Q values:  [ 7.64171987e+01  2.56098377e+03 -3.22965309e-01  7.88159037e+02]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   330.15825211  1427.70415382]
------
Step:28, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211  1427.70415382]
New Q values:  [ -253.44886264 -1902.20915811   330.15825211  1824.9218902 ]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4181.46742891  718.60978155 -120.29354603]
------
Step:29, Action:South
State  111
Old Q Values:  [-177.44732869 4181.46742891  718.60978155 -120.29354603]
New Q values:  [-177.44732869 1958.18317968  718.60978155 -120.29354603]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 953.9873604  650.32545056   0.        ]
------
Step:30, Action:South
State  179
Old Q Values:  [    0.         17203.58890013 35300.23152799     0.        ]
New Q values:  [    0.         16416.67362548 35300.23152799     0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[31786.12688475  2256.66526474  3195.57842484  1875.31501677]
------
Step:31, Action:North
State  261
Old Q Values:  [3211.2658479    26.73544252 1030.57864962  -35.88578819]
New Q values:  [1570.10254728   26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 953.9873604  650.32545056   0.        ]
------
Step:32, Action:South
State  179
Old Q Values:  [    0.         16416.67362548 35300.23152799     0.        ]
New Q values:  [    0.         16101.90751562 35300.23152799     0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[31786.12688475  2256.66526474  3195.57842484  1875.31501677]
------
Step:33, Action:North
State  260
Old Q Values:  [ 2004.42642478 -2735.46306511  2524.47615748 -2601.74710518]
New Q values:  [ 1339.63167762 -2735.46306511  2524.47615748 -2601.74710518]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xa  x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -5.70379540e+03  1.79487036e+03  0.00000000e+00]
------
Step:34, Action:East
State  188
Old Q Values:  [-6523.78898263  4287.90919792  6757.87780561     0.        ]
New Q values:  [-6523.78898263  4287.90919792  5909.1166178      0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xga x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  10688.55165187  1133.32256943  1141.49622464]
------
Step:35, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.36672168e+03 1.60990016e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 6.32781258e+04 1.60990016e+04 2.91043938e+03]
Reward: 100009  Episode Reward:  100035
xxxxx
xg  x
x   x
x a x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6483.89683436 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:1, Action:North
State  288
Old Q Values:  [ 6483.89683436 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [10271.00981936 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25573.50361873  2951.29802374 -4228.04879148 11442.08961052]
------
Step:2, Action:North
State  208
Old Q Values:  [25573.50361873  2951.29802374 -4228.04879148 11442.08961052]
New Q values:  [10468.29957335  2951.29802374 -4228.04879148 11442.08961052]
Reward: 9  Episode Reward:  18
xxxxx
x.gax
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   778.32708621 -2383.80019164   223.59316872]
------
Step:3, Action:South
State  136
Old Q Values:  [ -170.77177351   778.32708621 -2383.80019164   223.59316872]
New Q values:  [ -170.77177351  3743.35771764 -2383.80019164   223.59316872]
Reward: -1  Episode Reward:  17
xxxxx
xg. x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10468.29957335  2951.29802374 -4228.04879148 11442.08961052]
------
Step:4, Action:West
State  216
Old Q Values:  [  704.86667276   372.98523174 -8896.20691497 -2758.90767856]
New Q values:  [  704.86667276   372.98523174 -8896.20691497  2108.40242414]
Reward: 9  Episode Reward:  26
xxxxx
xg. x
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  10688.55165187  1133.32256943  1141.49622464]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.32781258e+04 1.60990016e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 2.72426874e+04 1.60990016e+04 2.91043938e+03]
Reward: 9  Episode Reward:  35
xxxxx
x.. x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6420.12376465  3140.19469697]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6420.12376465  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  5648.75245167  3140.19469697]
Reward: -1  Episode Reward:  34
xxxxx
xg. x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10271.00981936 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:7, Action:North
State  288
Old Q Values:  [10271.00981936 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [ 7540.4308109  -6442.16912869 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  33
xxxxx
x.g x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10468.29957335  2951.29802374 -4228.04879148 11442.08961052]
------
Step:8, Action:West
State  208
Old Q Values:  [10468.29957335  2951.29802374 -4228.04879148 11442.08961052]
New Q values:  [10468.29957335  2951.29802374 -4228.04879148  6749.04207914]
Reward: -10001  Episode Reward:  -9968
xxxxx
x.. x
x g x
x.  x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 1894.64317033 6076.65888153  -30.99112081]
------
Step:1, Action:East
State  183
Old Q Values:  [ 877.23516594 1574.38002075 5053.021047      0.        ]
New Q values:  [ 877.23516594 1574.38002075 5920.41398854    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 12979.35189914  3238.99950964  1101.59744825]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831 11413.35682107  4102.02404555  1099.96026581]
New Q values:  [-5922.26708831  8684.98912339  4102.02404555  1099.96026581]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  5098.0675513  13714.15464987]
------
Step:3, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  5098.0675513  13714.15464987]
New Q values:  [ 870.35122762 -168.92307549 5098.0675513  5962.09262413]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1570.10254728   26.73544252 1030.57864962  -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [1570.10254728   26.73544252 1030.57864962  -35.88578819]
New Q values:  [2403.56521547   26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  26
xxxxx
x...x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1574.38002075 5920.41398854    0.        ]
------
Step:5, Action:East
State  181
Old Q Values:  [ 675.92350432 1894.64317033 6076.65888153  -30.99112081]
New Q values:  [ 675.92350432 1894.64317033 4602.86978755  -30.99112081]
Reward: -10001  Episode Reward:  -9975
xxxxx
x...x
x g x
x  .x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1958.18317968  718.60978155 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 2627.44112695    5.4           0.        ]
New Q values:  [ 221.30610858 2437.23738705    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 1894.64317033 4602.86978755  -30.99112081]
------
Step:2, Action:South
State  183
Old Q Values:  [ 877.23516594 1574.38002075 5920.41398854    0.        ]
New Q values:  [ 877.23516594 1356.22157294 5920.41398854    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x . x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2403.56521547   26.73544252 1030.57864962  -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [2403.56521547   26.73544252 1030.57864962  -35.88578819]
New Q values:  [2341.68702245   26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 1894.64317033 4602.86978755  -30.99112081]
------
Step:4, Action:South
State  183
Old Q Values:  [ 877.23516594 1356.22157294 5920.41398854    0.        ]
New Q values:  [ 877.23516594 1244.39473591 5920.41398854    0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x . x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2341.68702245   26.73544252 1030.57864962  -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [2341.68702245   26.73544252 1030.57864962  -35.88578819]
New Q values:  [2712.19900554   26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xa. x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1244.39473591 5920.41398854    0.        ]
------
Step:6, Action:East
State  181
Old Q Values:  [ 675.92350432 1894.64317033 4602.86978755  -30.99112081]
New Q values:  [ 675.92350432 1894.64317033 4452.04465203  -30.99112081]
Reward: 9  Episode Reward:  24
xxxxx
x ..x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8684.98912339  4102.02404555  1099.96026581]
------
Step:7, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.72426874e+04 1.60990016e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.15184440e+04 1.60990016e+04 2.91043938e+03]
Reward: 9  Episode Reward:  33
xxxxx
x ..x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  1201.07018831]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5648.75245167  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  4527.03022394  3140.19469697]
Reward: 9  Episode Reward:  42
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7540.4308109  -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:9, Action:North
State  288
Old Q Values:  [ 7540.4308109  -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [ 6156.06219637 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10468.29957335  2951.29802374 -4228.04879148  6749.04207914]
------
Step:10, Action:North
State  208
Old Q Values:  [10468.29957335  2951.29802374 -4228.04879148  6749.04207914]
New Q values:  [43941.89854065  2951.29802374 -4228.04879148  6749.04207914]
Reward: 9  Episode Reward:  50
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 11374.93691792  22927.19647586  -8652.84       132497.26237104]
------
Step:11, Action:South
State  128
Old Q Values:  [ 11374.93691792  22927.19647586  -8652.84       132497.26237104]
New Q values:  [ 11374.93691792  22352.84815254  -8652.84       132497.26237104]
Reward: -1  Episode Reward:  49
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[43941.89854065  2951.29802374 -4228.04879148  6749.04207914]
------
Step:12, Action:West
State  208
Old Q Values:  [43941.89854065  2951.29802374 -4228.04879148  6749.04207914]
New Q values:  [43941.89854065  2951.29802374 -4228.04879148  7528.71730019]
Reward: -1  Episode Reward:  48
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.15184440e+04 1.60990016e+04 2.91043938e+03]
------
Step:13, Action:East
State  193
Old Q Values:  [-5922.26708831  8684.98912339  4102.02404555  1099.96026581]
New Q values:  [-5922.26708831  8684.98912339 14822.77918041  1099.96026581]
Reward: -1  Episode Reward:  47
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[43941.89854065  2951.29802374 -4228.04879148  7528.71730019]
------
Step:14, Action:West
State  208
Old Q Values:  [43941.89854065  2951.29802374 -4228.04879148  7528.71730019]
New Q values:  [43941.89854065  2951.29802374 -4228.04879148  7840.58738861]
Reward: -1  Episode Reward:  46
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.15184440e+04 1.60990016e+04 2.91043938e+03]
------
Step:15, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.15184440e+04 1.60990016e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.15184440e+04 1.96215702e+04 2.91043938e+03]
Reward: -1  Episode Reward:  45
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[43941.89854065  2951.29802374 -4228.04879148  7840.58738861]
------
Step:16, Action:North
State  208
Old Q Values:  [43941.89854065  2951.29802374 -4228.04879148  7840.58738861]
New Q values:  [31955.08767519  2951.29802374 -4228.04879148  7840.58738861]
Reward: -1  Episode Reward:  44
xxxxx
x .ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 15849.63763765  -180.00807518 47929.76086309]
------
Step:17, Action:West
State  130
Old Q Values:  [46177.80406237 15849.63763765  -180.00807518 47929.76086309]
New Q values:  [ 46177.80406237  15849.63763765   -180.00807518 111829.37258683]
Reward: 100009  Episode Reward:  100053
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1565.77421951   429.84057838]
------
Step:1, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   330.15825211  1824.9218902 ]
New Q values:  [ -253.44886264 -1902.20915811   905.75843272  1824.9218902 ]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.56098377e+03 -3.22965309e-01  7.88159037e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.56098377e+03 -3.22965309e-01  7.88159037e+02]
New Q values:  [ 7.64171987e+01  2.56098377e+03 -3.22965309e-01  8.62140182e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   905.75843272  1824.9218902 ]
------
Step:3, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   905.75843272  1824.9218902 ]
New Q values:  [ -253.44886264 -1902.20915811   905.75843272  1780.73491486]
Reward: 9  Episode Reward:  17
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 3484.5538626   514.74819532 -252.78192178]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 1958.18317968  718.60978155 -120.29354603]
New Q values:  [-177.44732869 2564.79746843  718.60978155 -120.29354603]
Reward: 9  Episode Reward:  26
xxxxx
x   x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1244.39473591 5920.41398854    0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [ 877.23516594 1244.39473591 5920.41398854    0.        ]
New Q values:  [ 877.23516594 1244.39473591 4311.06875342    0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -1.87193072e+03  6.47834386e+03  0.00000000e+00]
------
Step:6, Action:East
State  200
Old Q Values:  [   62.8218634  10688.55165187  1133.32256943  1141.49622464]
New Q values:  [   62.8218634  10688.55165187  1091.24975501  1141.49622464]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  704.86667276   372.98523174 -8896.20691497  2108.40242414]
------
Step:7, Action:North
State  216
Old Q Values:  [  704.86667276   372.98523174 -8896.20691497  2108.40242414]
New Q values:  [ 1049.64180097   372.98523174 -8896.20691497  2108.40242414]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.56098377e+03 -3.22965309e-01  8.62140182e+02]
------
Step:8, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.56098377e+03 -3.22965309e-01  8.62140182e+02]
New Q values:  [ 7.64171987e+01  1.65631424e+03 -3.22965309e-01  8.62140182e+02]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1049.64180097   372.98523174 -8896.20691497  2108.40242414]
------
Step:9, Action:North
State  216
Old Q Values:  [ 1049.64180097   372.98523174 -8896.20691497  2108.40242414]
New Q values:  [ 1542.26403568   372.98523174 -8896.20691497  2108.40242414]
Reward: -1  Episode Reward:  31
xxxxx
x gax
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3743.35771764 -2383.80019164   223.59316872]
------
Step:10, Action:South
State  136
Old Q Values:  [ -170.77177351  3743.35771764 -2383.80019164   223.59316872]
New Q values:  [ -170.77177351  2129.2638143  -2383.80019164   223.59316872]
Reward: -1  Episode Reward:  30
xxxxx
x  gx
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1542.26403568   372.98523174 -8896.20691497  2108.40242414]
------
Step:11, Action:West
State  216
Old Q Values:  [ 1542.26403568   372.98523174 -8896.20691497  2108.40242414]
New Q values:  [ 1542.26403568   372.98523174 -8896.20691497  2990.15934938]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  7.15799460e+03 -8.87652194e+03  2.00341972e+02]
------
Step:12, Action:South
State  200
Old Q Values:  [   62.8218634  10688.55165187  1091.24975501  1141.49622464]
New Q values:  [  62.8218634  5638.92972793 1091.24975501 1141.49622464]
Reward: 9  Episode Reward:  38
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4527.03022394  3140.19469697]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4527.03022394  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  3663.03074848  3140.19469697]
Reward: 9  Episode Reward:  47
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6156.06219637 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:14, Action:North
State  288
Old Q Values:  [ 6156.06219637 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [ 3358.87268336 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1542.26403568   372.98523174 -8896.20691497  2990.15934938]
------
Step:15, Action:North
State  216
Old Q Values:  [ 1542.26403568   372.98523174 -8896.20691497  2990.15934938]
New Q values:  [ 1113.19988519   372.98523174 -8896.20691497  2990.15934938]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.65631424e+03 -3.22965309e-01  8.62140182e+02]
------
Step:16, Action:West
State  136
Old Q Values:  [ -170.77177351  2129.2638143  -2383.80019164   223.59316872]
New Q values:  [ -170.77177351  2129.2638143  -2383.80019164   158.26808661]
Reward: -1  Episode Reward:  44
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   231.43606375]
------
Step:17, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2062.80509006   304.31833359]
New Q values:  [-9594.56523706 -8069.05606225  2062.80509006   967.51250173]
Reward: -1  Episode Reward:  43
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        2821.28389431   65.14560537    0.        ]
------
Step:18, Action:South
State  105
Old Q Values:  [-180.6        2821.28389431   65.14560537    0.        ]
New Q values:  [-180.6        2668.19157385   65.14560537    0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1354.56432284    0.         5134.26005376 -178.98      ]
------
Step:19, Action:East
State  185
Old Q Values:  [1354.56432284    0.         5134.26005376 -178.98      ]
New Q values:  [1354.56432284    0.         3744.78293988 -178.98      ]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  5638.92972793 1091.24975501 1141.49622464]
------
Step:20, Action:South
State  200
Old Q Values:  [  62.8218634  5638.92972793 1091.24975501 1141.49622464]
New Q values:  [  62.8218634  3353.88111572 1091.24975501 1141.49622464]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3663.03074848  3140.19469697]
------
Step:21, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3663.03074848  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  2472.2741044   3140.19469697]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3358.87268336 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:22, Action:North
State  288
Old Q Values:  [ 3358.87268336 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [ 2239.99687816 -6442.16912869 -8192.20126966 -4090.17466168]
Reward: -1  Episode Reward:  38
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1113.19988519   372.98523174 -8896.20691497  2990.15934938]
------
Step:23, Action:West
State  216
Old Q Values:  [ 1113.19988519   372.98523174 -8896.20691497  2990.15934938]
New Q values:  [ 1113.19988519   372.98523174 -8896.20691497  2201.62807447]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  3353.88111572 1091.24975501 1141.49622464]
------
Step:24, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  7.15799460e+03 -8.87652194e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  4.65122563e+03 -8.87652194e+03  2.00341972e+02]
Reward: -1  Episode Reward:  36
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 5098.0675513  5962.09262413]
------
Step:25, Action:West
State  273
Old Q Values:  [ 870.35122762 -168.92307549 5098.0675513  5962.09262413]
New Q values:  [  870.35122762  -168.92307549  5098.0675513  71926.07511508]
Reward: 100009  Episode Reward:  100045
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2712.19900554   26.73544252 1030.57864962  -35.88578819]
------
Step:1, Action:North
State  260
Old Q Values:  [ 1339.63167762 -2735.46306511  2524.47615748 -2601.74710518]
New Q values:  [ 1585.97380388 -2735.46306511  2524.47615748 -2601.74710518]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3482.40377611     0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 877.23516594 1244.39473591 4311.06875342    0.        ]
New Q values:  [ 877.23516594 1244.39473591 3802.26642511    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  6.90812975e+03  1.20371620e+03]
------
Step:3, Action:East
State  195
Old Q Values:  [   38.85388605 12979.35189914  3238.99950964  1101.59744825]
New Q values:  [   38.85388605 12979.35189914  8036.1837185   1101.59744825]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[22450.61304881  4067.07598865  1542.2004081   2599.28130597]
------
Step:4, Action:North
State  208
Old Q Values:  [31955.08767519  2951.29802374 -4228.04879148  7840.58738861]
New Q values:  [46336.24684612  2951.29802374 -4228.04879148  7840.58738861]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 46177.80406237  15849.63763765   -180.00807518 111829.37258683]
------
Step:5, Action:West
State  130
Old Q Values:  [ 46177.80406237  15849.63763765   -180.00807518 111829.37258683]
New Q values:  [46177.80406237 15849.63763765  -180.00807518 84530.45184715]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.32644343e+05]
------
Step:6, Action:West
State  127
Old Q Values:  [   0.            1.67014986  384.73306724 1262.47834256]
New Q values:  [   0.            1.67014986  384.73306724 1279.83057755]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2564.79746843  718.60978155 -120.29354603]
------
Step:7, Action:South
State  109
Old Q Values:  [-241.10880094  338.72375565   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  726.38090072   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  53
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849 1002.80108008 1971.63799487  154.04646645]
------
Step:8, Action:East
State  189
Old Q Values:  [  64.81505849 1002.80108008 1971.63799487  154.04646645]
New Q values:  [  64.81505849 1002.80108008  909.92809116  154.04646645]
Reward: -1  Episode Reward:  52
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         406.24297738   0.         198.38683706]
------
Step:9, Action:South
State  204
Old Q Values:  [   0.         1931.1408681  1341.90130646  441.58769553]
New Q values:  [   0.         1387.82535955 1341.90130646  441.58769553]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  1201.07018831]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2472.2741044   3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799 61666.30870521  3140.19469697]
Reward: 100009  Episode Reward:  100060
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2239.99687816 -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:1, Action:North
State  288
Old Q Values:  [ 2239.99687816 -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [14802.2728051  -6442.16912869 -8192.20126966 -4090.17466168]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[46336.24684612  2951.29802374 -4228.04879148  7840.58738861]
------
Step:2, Action:North
State  208
Old Q Values:  [46336.24684612  2951.29802374 -4228.04879148  7840.58738861]
New Q values:  [13178.67788274  2951.29802374 -4228.04879148  7840.58738861]
Reward: -9991  Episode Reward:  -9982
xxxxx
x .gx
x.. x
x.. x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13178.67788274  2951.29802374 -4228.04879148  7840.58738861]
------
Step:1, Action:West
State  216
Old Q Values:  [ 1113.19988519   372.98523174 -8896.20691497  2201.62807447]
New Q values:  [ 1113.19988519   372.98523174 -8896.20691497  6772.52228586]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.15184440e+04 1.96215702e+04 2.91043938e+03]
------
Step:2, Action:East
State  193
Old Q Values:  [-5922.26708831  8684.98912339 14822.77918041  1099.96026581]
New Q values:  [-5922.26708831  8684.98912339  9882.11503699  1099.96026581]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13178.67788274  2951.29802374 -4228.04879148  7840.58738861]
------
Step:3, Action:West
State  208
Old Q Values:  [13178.67788274  2951.29802374 -4228.04879148  7840.58738861]
New Q values:  [13178.67788274  2951.29802374 -4228.04879148  6100.26946654]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8684.98912339  9882.11503699  1099.96026581]
------
Step:4, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.15184440e+04 1.96215702e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 2.31126702e+04 1.96215702e+04 2.91043938e+03]
Reward: 9  Episode Reward:  16
xxxxx
x.. x
x.g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 61666.30870521  3140.19469697]
------
Step:5, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549  5098.0675513  71926.07511508]
New Q values:  [  870.35122762  -168.92307549  6485.30886205 71926.07511508]
Reward: 9  Episode Reward:  25
xxxxx
x.. x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14802.2728051  -6442.16912869 -8192.20126966 -4090.17466168]
------
Step:6, Action:West
State  288
Old Q Values:  [14802.2728051  -6442.16912869 -8192.20126966 -4090.17466168]
New Q values:  [14802.2728051  -6442.16912869 -8192.20126966 16863.22274689]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 61666.30870521  3140.19469697]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 61666.30870521  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799 29724.89030615  3140.19469697]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14802.2728051  -6442.16912869 -8192.20126966 16863.22274689]
------
Step:8, Action:West
State  288
Old Q Values:  [14802.2728051  -6442.16912869 -8192.20126966 16863.22274689]
New Q values:  [14802.2728051  -6442.16912869 -8192.20126966 15662.1561906 ]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 29724.89030615  3140.19469697]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 29724.89030615  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799 16588.00297964  3140.19469697]
Reward: -1  Episode Reward:  21
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14802.2728051  -6442.16912869 -8192.20126966 15662.1561906 ]
------
Step:10, Action:West
State  288
Old Q Values:  [14802.2728051  -6442.16912869 -8192.20126966 15662.1561906 ]
New Q values:  [14802.2728051  -6442.16912869 -8192.20126966 11240.66337013]
Reward: -1  Episode Reward:  20
xxxxx
xg. x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 16588.00297964  3140.19469697]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 16588.00297964  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799 11075.28303339  3140.19469697]
Reward: -1  Episode Reward:  19
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14802.2728051  -6442.16912869 -8192.20126966 11240.66337013]
------
Step:12, Action:North
State  288
Old Q Values:  [14802.2728051  -6442.16912869 -8192.20126966 11240.66337013]
New Q values:  [ 9873.91248686 -6442.16912869 -8192.20126966 11240.66337013]
Reward: -1  Episode Reward:  18
xxxxx
xg. x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13178.67788274  2951.29802374 -4228.04879148  6100.26946654]
------
Step:13, Action:North
State  208
Old Q Values:  [13178.67788274  2951.29802374 -4228.04879148  6100.26946654]
New Q values:  [ 5909.65029738  2951.29802374 -4228.04879148  6100.26946654]
Reward: -1  Episode Reward:  17
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2129.2638143  -2383.80019164   158.26808661]
------
Step:14, Action:South
State  130
Old Q Values:  [46177.80406237 15849.63763765  -180.00807518 84530.45184715]
New Q values:  [46177.80406237  8169.33589502  -180.00807518 84530.45184715]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5909.65029738  2951.29802374 -4228.04879148  6100.26946654]
------
Step:15, Action:North
State  208
Old Q Values:  [ 5909.65029738  2951.29802374 -4228.04879148  6100.26946654]
New Q values:  [27722.3956731   2951.29802374 -4228.04879148  6100.26946654]
Reward: -1  Episode Reward:  15
xxxxx
x..ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237  8169.33589502  -180.00807518 84530.45184715]
------
Step:16, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  1.65631424e+03 -3.22965309e-01  8.62140182e+02]
New Q values:  [ 7.64171987e+01  1.65631424e+03 -3.22965309e-01  8.19988339e+02]
Reward: 9  Episode Reward:  24
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1565.77421951   429.84057838]
------
Step:17, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1565.77421951   429.84057838]
New Q values:  [ -281.736      -1150.91067548  1122.60395873   429.84057838]
Reward: -1  Episode Reward:  23
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.65631424e+03 -3.22965309e-01  8.19988339e+02]
------
Step:18, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.65631424e+03 -3.22965309e-01  8.19988339e+02]
New Q values:  [ 7.64171987e+01  8.97864440e+03 -3.22965309e-01  8.19988339e+02]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27722.3956731   2951.29802374 -4228.04879148  6100.26946654]
------
Step:19, Action:North
State  208
Old Q Values:  [27722.3956731   2951.29802374 -4228.04879148  6100.26946654]
New Q values:  [36447.49382339  2951.29802374 -4228.04879148  6100.26946654]
Reward: -1  Episode Reward:  21
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237  8169.33589502  -180.00807518 84530.45184715]
------
Step:20, Action:West
State  130
Old Q Values:  [46177.80406237  8169.33589502  -180.00807518 84530.45184715]
New Q values:  [46177.80406237  8169.33589502  -180.00807518 66463.64898045]
Reward: -1  Episode Reward:  20
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   33009.11692778 108840.22747196]
------
Step:21, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1122.60395873   429.84057838]
New Q values:  [ -281.736      -1150.91067548  1122.60395873   214.49203629]
Reward: 9  Episode Reward:  29
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   123.85268312  -180.6       ]
------
Step:22, Action:East
State  106
Old Q Values:  [ -180.6        -8952.15415062   123.85268312  -180.6       ]
New Q values:  [ -180.6        -8952.15415062   385.72226087  -180.6       ]
Reward: -1  Episode Reward:  28
xxxxx
x a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1122.60395873   214.49203629]
------
Step:23, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1122.60395873   214.49203629]
New Q values:  [ -281.736      -1150.91067548  3142.03490244   214.49203629]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  8.97864440e+03 -3.22965309e-01  8.19988339e+02]
------
Step:24, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  8.97864440e+03 -3.22965309e-01  8.19988339e+02]
New Q values:  [ 7.64171987e+01  1.03260417e+04 -3.22965309e-01  8.19988339e+02]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[22450.61304881  4067.07598865  1542.2004081   2599.28130597]
------
Step:25, Action:North
State  210
Old Q Values:  [22450.61304881  4067.07598865  1542.2004081   2599.28130597]
New Q values:  [12077.4577215   4067.07598865  1542.2004081   2599.28130597]
Reward: -1  Episode Reward:  25
xxxxx
x  ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.03260417e+04 -3.22965309e-01  8.19988339e+02]
------
Step:26, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.03260417e+04 -3.22965309e-01  8.19988339e+02]
New Q values:  [ 7.64171987e+01  7.75305399e+03 -3.22965309e-01  8.19988339e+02]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[12077.4577215   4067.07598865  1542.2004081   2599.28130597]
------
Step:27, Action:North
State  208
Old Q Values:  [36447.49382339  2951.29802374 -4228.04879148  6100.26946654]
New Q values:  [16904.31372508  2951.29802374 -4228.04879148  6100.26946654]
Reward: -1  Episode Reward:  23
xxxxx
x  ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  7.75305399e+03 -3.22965309e-01  8.19988339e+02]
------
Step:28, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  7.75305399e+03 -3.22965309e-01  8.19988339e+02]
New Q values:  [ 7.64171987e+01  7.75305399e+03 -3.22965309e-01  1.27000581e+03]
Reward: -1  Episode Reward:  22
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3142.03490244   214.49203629]
------
Step:29, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2062.80509006   967.51250173]
New Q values:  [-9594.56523706 -8069.05606225  1463.30118031   967.51250173]
Reward: -1  Episode Reward:  21
xxxxx
x gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2129.2638143  -2383.80019164   158.26808661]
------
Step:30, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  7.75305399e+03 -3.22965309e-01  1.27000581e+03]
New Q values:  [ 7.64171987e+01  8.17191571e+03 -3.22965309e-01  1.27000581e+03]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16904.31372508  2951.29802374 -4228.04879148  6100.26946654]
------
Step:31, Action:North
State  210
Old Q Values:  [12077.4577215   4067.07598865  1542.2004081   2599.28130597]
New Q values:  [7281.95780215 4067.07598865 1542.2004081  2599.28130597]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  8.17191571e+03 -3.22965309e-01  1.27000581e+03]
------
Step:32, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  8.17191571e+03 -3.22965309e-01  1.27000581e+03]
New Q values:  [ 7.64171987e+01  5.45275363e+03 -3.22965309e-01  1.27000581e+03]
Reward: -1  Episode Reward:  18
xxxxx
x   x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7281.95780215 4067.07598865 1542.2004081  2599.28130597]
------
Step:33, Action:North
State  216
Old Q Values:  [ 1113.19988519   372.98523174 -8896.20691497  6772.52228586]
New Q values:  [ 2080.50604169   372.98523174 -8896.20691497  6772.52228586]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.45275363e+03 -3.22965309e-01  1.27000581e+03]
------
Step:34, Action:South
State  136
Old Q Values:  [ -170.77177351  2129.2638143  -2383.80019164   158.26808661]
New Q values:  [ -170.77177351  5922.39964324 -2383.80019164   158.26808661]
Reward: -1  Episode Reward:  16
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16904.31372508  2951.29802374 -4228.04879148  6100.26946654]
------
Step:35, Action:North
State  216
Old Q Values:  [ 2080.50604169   372.98523174 -8896.20691497  6772.52228586]
New Q values:  [ 2467.42850429   372.98523174 -8896.20691497  6772.52228586]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.45275363e+03 -3.22965309e-01  1.27000581e+03]
------
Step:36, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  5.45275363e+03 -3.22965309e-01  1.27000581e+03]
New Q values:  [ 7.64171987e+01  4.36508879e+03 -3.22965309e-01  1.27000581e+03]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x. ax
xg  x
xxxxx
Step:37, Action:South
State  210
Old Q Values:  [7281.95780215 4067.07598865 1542.2004081  2599.28130597]
New Q values:  [7281.95780215 4998.4294065  1542.2004081  2599.28130597]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9873.91248686 -6442.16912869 -8192.20126966 11240.66337013]
------
Step:38, Action:North
State  288
Old Q Values:  [ 9873.91248686 -6442.16912869 -8192.20126966 11240.66337013]
New Q values:  [ 6133.55233539 -6442.16912869 -8192.20126966 11240.66337013]
Reward: -1  Episode Reward:  12
xxxxx
x   x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7281.95780215 4998.4294065  1542.2004081  2599.28130597]
------
Step:39, Action:North
State  210
Old Q Values:  [7281.95780215 4998.4294065  1542.2004081  2599.28130597]
New Q values:  [4221.7097581  4998.4294065  1542.2004081  2599.28130597]
Reward: -1  Episode Reward:  11
xxxxx
x  ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.36508879e+03 -3.22965309e-01  1.27000581e+03]
------
Step:40, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.36508879e+03 -3.22965309e-01  1.27000581e+03]
New Q values:  [ 7.64171987e+01  3.24496434e+03 -3.22965309e-01  1.27000581e+03]
Reward: -1  Episode Reward:  10
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4221.7097581  4998.4294065  1542.2004081  2599.28130597]
------
Step:41, Action:North
State  210
Old Q Values:  [4221.7097581  4998.4294065  1542.2004081  2599.28130597]
New Q values:  [2661.57320472 4998.4294065  1542.2004081  2599.28130597]
Reward: -1  Episode Reward:  9
xxxxx
x  ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.24496434e+03 -3.22965309e-01  1.27000581e+03]
------
Step:42, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.24496434e+03 -3.22965309e-01  1.27000581e+03]
New Q values:  [ 7.64171987e+01  6.36867985e+03 -3.22965309e-01  1.27000581e+03]
Reward: -1  Episode Reward:  8
xxxxx
x   x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16904.31372508  2951.29802374 -4228.04879148  6100.26946654]
------
Step:43, Action:North
State  208
Old Q Values:  [16904.31372508  2951.29802374 -4228.04879148  6100.26946654]
New Q values:  [ 8671.72944588  2951.29802374 -4228.04879148  6100.26946654]
Reward: -1  Episode Reward:  7
xxxxx
x  ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  6.36867985e+03 -3.22965309e-01  1.27000581e+03]
------
Step:44, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  6.36867985e+03 -3.22965309e-01  1.27000581e+03]
New Q values:  [ 7.64171987e+01  6.36867985e+03 -3.22965309e-01  1.45001279e+03]
Reward: -1  Episode Reward:  6
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3142.03490244   214.49203629]
------
Step:45, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1463.30118031   967.51250173]
New Q values:  [-9594.56523706 -8069.05606225  2361.4403651    967.51250173]
Reward: -1  Episode Reward:  5
xxxxx
x gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5922.39964324 -2383.80019164   158.26808661]
------
Step:46, Action:South
State  136
Old Q Values:  [ -170.77177351  5922.39964324 -2383.80019164   158.26808661]
New Q values:  [ -170.77177351  4969.87869106 -2383.80019164   158.26808661]
Reward: -1  Episode Reward:  4
xxxxx
x  gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8671.72944588  2951.29802374 -4228.04879148  6100.26946654]
------
Step:47, Action:West
State  208
Old Q Values:  [ 8671.72944588  2951.29802374 -4228.04879148  6100.26946654]
New Q values:  [ 8671.72944588  2951.29802374 -4228.04879148  9373.30884915]
Reward: -1  Episode Reward:  3
xxxxx
x g x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.31126702e+04 1.96215702e+04 2.91043938e+03]
------
Step:48, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.31126702e+04 1.96215702e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.25670530e+04 1.96215702e+04 2.91043938e+03]
Reward: -1  Episode Reward:  2
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 11075.28303339  3140.19469697]
------
Step:49, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 11075.28303339  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  7801.71222439  3140.19469697]
Reward: -1  Episode Reward:  1
xxxxx
x g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6133.55233539 -6442.16912869 -8192.20126966 11240.66337013]
------
Step:50, Action:West
State  288
Old Q Values:  [ 6133.55233539 -6442.16912869 -8192.20126966 11240.66337013]
New Q values:  [ 6133.55233539 -6442.16912869 -8192.20126966  6836.17901537]
Reward: -1  Episode Reward:  0
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7801.71222439  3140.19469697]
------
Step:51, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549  6485.30886205 71926.07511508]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 71926.07511508]
Reward: -1  Episode Reward:  -1
xxxxx
x   x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6133.55233539 -6442.16912869 -8192.20126966  6836.17901537]
------
Step:52, Action:West
State  288
Old Q Values:  [ 6133.55233539 -6442.16912869 -8192.20126966  6836.17901537]
New Q values:  [ 6133.55233539 -6442.16912869 -8192.20126966 24311.69414067]
Reward: -1  Episode Reward:  -2
xxxxx
x   x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4644.37724943 71926.07511508]
------
Step:53, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 71926.07511508]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 38311.66811146]
Reward: 9  Episode Reward:  7
xxxxx
x   x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[31786.12688475  2256.66526474  3195.57842484  1875.31501677]
------
Step:54, Action:North
State  257
Old Q Values:  [31786.12688475  2256.66526474  3195.57842484  1875.31501677]
New Q values:  [82676.28682639  2256.66526474  3195.57842484  1875.31501677]
Reward: 100009  Episode Reward:  100016
xxxxx
x   x
xag x
x   x
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -746.35376992  1811.65086161  7942.25906201 -4966.32149798]
------
Step:1, Action:East
State  181
Old Q Values:  [ 675.92350432 1894.64317033 4452.04465203  -30.99112081]
New Q values:  [ 675.92350432 1894.64317033 2639.76424729  -30.99112081]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1376.9854068   2845.15462157   239.04887894]
------
Step:2, Action:East
State  200
Old Q Values:  [  62.8218634  3353.88111572 1091.24975501 1141.49622464]
New Q values:  [  62.8218634  3353.88111572 2473.65658776 1141.49622464]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2467.42850429   372.98523174 -8896.20691497  6772.52228586]
------
Step:3, Action:North
State  216
Old Q Values:  [ 2467.42850429   372.98523174 -8896.20691497  6772.52228586]
New Q values:  [ 2902.97535756   372.98523174 -8896.20691497  6772.52228586]
Reward: 9  Episode Reward:  27
xxxxx
x .ax
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  6.36867985e+03 -3.22965309e-01  1.45001279e+03]
------
Step:4, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  6.36867985e+03 -3.22965309e-01  1.45001279e+03]
New Q values:  [ 7.64171987e+01  6.36867985e+03 -3.22965309e-01  1.11962559e+03]
Reward: 9  Episode Reward:  36
xxxxx
x a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   905.75843272  1780.73491486]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   905.75843272  1780.73491486]
New Q values:  [ -253.44886264 -1902.20915811   905.75843272  1481.13320647]
Reward: -1  Episode Reward:  35
xxxxx
xa  x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2564.79746843  718.60978155 -120.29354603]
------
Step:6, Action:South
State  109
Old Q Values:  [-241.10880094  726.38090072   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  590.79268431   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
xa  x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  64.81505849 1002.80108008  909.92809116  154.04646645]
------
Step:7, Action:South
State  189
Old Q Values:  [  64.81505849 1002.80108008  909.92809116  154.04646645]
New Q values:  [  64.81505849 1220.1801337   909.92809116  154.04646645]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2712.19900554   26.73544252 1030.57864962  -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [2712.19900554   26.73544252 1030.57864962  -35.88578819]
New Q values:  [1370.47581034   26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 953.9873604  650.32545056   0.        ]
------
Step:9, Action:South
State  191
Old Q Values:  [  3.06655861 953.9873604  650.32545056   0.        ]
New Q values:  [  3.06655861 792.13768726 650.32545056   0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1370.47581034   26.73544252 1030.57864962  -35.88578819]
------
Step:10, Action:North
State  260
Old Q Values:  [ 1585.97380388 -2735.46306511  2524.47615748 -2601.74710518]
New Q values:  [ 1172.25062926 -2735.46306511  2524.47615748 -2601.74710518]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xa  x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -5.70379540e+03  1.79487036e+03  0.00000000e+00]
------
Step:11, Action:East
State  191
Old Q Values:  [  3.06655861 792.13768726 650.32545056   0.        ]
New Q values:  [  3.06655861 792.13768726 439.48757099   0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[    0.         -5068.94789373   599.85796923     0.        ]
------
Step:12, Action:East
State  202
Old Q Values:  [    0.         -7041.23396577   773.34142896     0.        ]
New Q values:  [    0.         -7041.23396577  2329.27778519     0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1568.40160439 6735.13737869    0.          386.1281519 ]
------
Step:13, Action:South
State  210
Old Q Values:  [2661.57320472 4998.4294065  1542.2004081  2599.28130597]
New Q values:  [2661.57320472 9298.2800048  1542.2004081  2599.28130597]
Reward: 9  Episode Reward:  47
xxxxx
x   x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6133.55233539 -6442.16912869 -8192.20126966 24311.69414067]
------
Step:14, Action:North
State  288
Old Q Values:  [ 6133.55233539 -6442.16912869 -8192.20126966 24311.69414067]
New Q values:  [ 4473.36214776 -6442.16912869 -8192.20126966 24311.69414067]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1568.40160439 6735.13737869    0.          386.1281519 ]
------
Step:15, Action:South
State  216
Old Q Values:  [ 2902.97535756   372.98523174 -8896.20691497  6772.52228586]
New Q values:  [ 2902.97535756  7442.1023349  -8896.20691497  6772.52228586]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4473.36214776 -6442.16912869 -8192.20126966 24311.69414067]
------
Step:16, Action:West
State  288
Old Q Values:  [ 4473.36214776 -6442.16912869 -8192.20126966 24311.69414067]
New Q values:  [ 4473.36214776 -6442.16912869 -8192.20126966 72070.59132359]
Reward: 100009  Episode Reward:  100054
xxxxx
x   x
x g x
x a x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  590.79268431   -8.57207238 -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [-241.10880094  590.79268431   -8.57207238 -180.6       ]
New Q values:  [-241.10880094 1033.64634791   -8.57207238 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x g x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 1894.64317033 2639.76424729  -30.99112081]
------
Step:2, Action:East
State  180
Old Q Values:  [ -746.35376992  1811.65086161  7942.25906201 -4966.32149798]
New Q values:  [ -746.35376992  1811.65086161  4188.46795952 -4966.32149798]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  3353.88111572 2473.65658776 1141.49622464]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144  1376.9854068   2845.15462157   239.04887894]
New Q values:  [-2469.90645144  1172.16317503  2845.15462157   239.04887894]
Reward: 9  Episode Reward:  27
xxxxx
x g x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  1201.07018831]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7801.71222439  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799 24747.26228683  3140.19469697]
Reward: 9  Episode Reward:  36
xxxxx
x . x
x g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4473.36214776 -6442.16912869 -8192.20126966 72070.59132359]
------
Step:5, Action:West
State  288
Old Q Values:  [ 4473.36214776 -6442.16912869 -8192.20126966 72070.59132359]
New Q values:  [ 4473.36214776 -6442.16912869 -8192.20126966 36251.81521548]
Reward: -1  Episode Reward:  35
xxxxx
x . x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 24747.26228683  3140.19469697]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 24747.26228683  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799 20773.84947938  3140.19469697]
Reward: -1  Episode Reward:  34
xxxxx
xg. x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4473.36214776 -6442.16912869 -8192.20126966 36251.81521548]
------
Step:7, Action:West
State  288
Old Q Values:  [ 4473.36214776 -6442.16912869 -8192.20126966 36251.81521548]
New Q values:  [ 4473.36214776 -6442.16912869 -8192.20126966 20732.28093001]
Reward: -1  Episode Reward:  33
xxxxx
x . x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 20773.84947938  3140.19469697]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 20773.84947938  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799 14528.62407075  3140.19469697]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x  .x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4473.36214776 -6442.16912869 -8192.20126966 20732.28093001]
------
Step:9, Action:West
State  288
Old Q Values:  [ 4473.36214776 -6442.16912869 -8192.20126966 20732.28093001]
New Q values:  [ 4473.36214776 -6442.16912869 -8192.20126966  6650.89959323]
Reward: -10001  Episode Reward:  -9969
xxxxx
x . x
x  .x
x.g x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 675.92350432 1894.64317033 2639.76424729  -30.99112081]
------
Step:1, Action:South
State  181
Old Q Values:  [ 675.92350432 1894.64317033 2639.76424729  -30.99112081]
New Q values:  [ 675.92350432 1174.40001123 2639.76424729  -30.99112081]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1370.47581034   26.73544252 1030.57864962  -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [1370.47581034   26.73544252 1030.57864962  -35.88578819]
New Q values:  [1688.27025167   26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa .x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1244.39473591 3802.26642511    0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [ 877.23516594 1244.39473591 3802.26642511    0.        ]
New Q values:  [ 877.23516594 1244.39473591 3592.74549379    0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  6.90812975e+03  1.20371620e+03]
------
Step:4, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.25670530e+04 1.96215702e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.25670530e+04 1.06660207e+04 2.91043938e+03]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8671.72944588  2951.29802374 -4228.04879148  9373.30884915]
------
Step:5, Action:North
State  208
Old Q Values:  [ 8671.72944588  2951.29802374 -4228.04879148  9373.30884915]
New Q values:  [23413.18647249  2951.29802374 -4228.04879148  9373.30884915]
Reward: 9  Episode Reward:  25
xxxxx
x..ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237  8169.33589502  -180.00807518 66463.64898045]
------
Step:6, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  6.36867985e+03 -3.22965309e-01  1.11962559e+03]
New Q values:  [ 7.64171987e+01  6.36867985e+03 -3.22965309e-01  3.31053185e+04]
Reward: 9  Episode Reward:  34
xxxxx
x.a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   33009.11692778 108840.22747196]
------
Step:7, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2361.4403651    967.51250173]
New Q values:  [-9594.56523706 -8069.05606225  2361.4403651    702.49890507]
Reward: 9  Episode Reward:  43
xxxxx
xag x
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1033.64634791   -8.57207238 -180.6       ]
------
Step:8, Action:South
State  108
Old Q Values:  [-8463.16477134  3341.96369955   492.53480369     0.        ]
New Q values:  [-8463.16477134  3108.92046516   492.53480369     0.        ]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  4287.90919792  5909.1166178      0.        ]
------
Step:9, Action:East
State  188
Old Q Values:  [-6523.78898263  4287.90919792  5909.1166178      0.        ]
New Q values:  [-6523.78898263  4287.90919792  3369.21098184     0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xga x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  3353.88111572 2473.65658776 1141.49622464]
------
Step:10, Action:South
State  206
Old Q Values:  [    0.         -5068.94789373   599.85796923     0.        ]
New Q values:  [    0.         -1406.21014518   599.85796923     0.        ]
Reward: 9  Episode Reward:  50
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  1201.07018831]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 14528.62407075  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799 67812.11950627  3140.19469697]
Reward: 100009  Episode Reward:  100059
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23413.18647249  2951.29802374 -4228.04879148  9373.30884915]
------
Step:1, Action:North
State  216
Old Q Values:  [ 2902.97535756  7442.1023349  -8896.20691497  6772.52228586]
New Q values:  [ 2657.55375034  7442.1023349  -8896.20691497  6772.52228586]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  4969.87869106 -2383.80019164   158.26808661]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  6.36867985e+03 -3.22965309e-01  3.31053185e+04]
New Q values:  [ 7.64171987e+01  4.77950264e+03 -3.22965309e-01  3.31053185e+04]
Reward: -1  Episode Reward:  8
xxxxx
x . x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2657.55375034  7442.1023349  -8896.20691497  6772.52228586]
------
Step:3, Action:South
State  210
Old Q Values:  [2661.57320472 9298.2800048  1542.2004081  2599.28130597]
New Q values:  [2661.57320472 5719.98187989 1542.2004081  2599.28130597]
Reward: 9  Episode Reward:  17
xxxxx
x . x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4473.36214776 -6442.16912869 -8192.20126966  6650.89959323]
------
Step:4, Action:North
State  288
Old Q Values:  [ 4473.36214776 -6442.16912869 -8192.20126966  6650.89959323]
New Q values:  [ 3504.73942307 -6442.16912869 -8192.20126966  6650.89959323]
Reward: -1  Episode Reward:  16
xxxxx
x . x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2661.57320472 5719.98187989 1542.2004081  2599.28130597]
------
Step:5, Action:South
State  210
Old Q Values:  [2661.57320472 5719.98187989 1542.2004081  2599.28130597]
New Q values:  [2661.57320472 4282.66262992 1542.2004081  2599.28130597]
Reward: -1  Episode Reward:  15
xxxxx
x . x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3504.73942307 -6442.16912869 -8192.20126966  6650.89959323]
------
Step:6, Action:West
State  288
Old Q Values:  [ 3504.73942307 -6442.16912869 -8192.20126966  6650.89959323]
New Q values:  [ 3504.73942307 -6442.16912869 -8192.20126966 17009.39568917]
Reward: -9991  Episode Reward:  -9976
xxxxx
x . x
x.. x
x.g x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.25670530e+04 1.06660207e+04 2.91043938e+03]
------
Step:1, Action:South
State  196
Old Q Values:  [-2469.90645144  1172.16317503  2845.15462157   239.04887894]
New Q values:  [-2469.90645144  1090.23428232  2845.15462157   239.04887894]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  1201.07018831]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 67812.11950627  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799 32233.06650926  3140.19469697]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3504.73942307 -6442.16912869 -8192.20126966 17009.39568917]
------
Step:3, Action:West
State  288
Old Q Values:  [ 3504.73942307 -6442.16912869 -8192.20126966 17009.39568917]
New Q values:  [ 3504.73942307 -6442.16912869 -8192.20126966 16473.07822845]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 32233.06650926  3140.19469697]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 32233.06650926  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799 17834.55007224  3140.19469697]
Reward: -1  Episode Reward:  16
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3504.73942307 -6442.16912869 -8192.20126966 16473.07822845]
------
Step:5, Action:West
State  288
Old Q Values:  [ 3504.73942307 -6442.16912869 -8192.20126966 16473.07822845]
New Q values:  [ 3504.73942307 -6442.16912869 -8192.20126966 11938.99631305]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 17834.55007224  3140.19469697]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 17834.55007224  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799 10714.91892281  3140.19469697]
Reward: -1  Episode Reward:  14
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3504.73942307 -6442.16912869 -8192.20126966 11938.99631305]
------
Step:7, Action:West
State  288
Old Q Values:  [ 3504.73942307 -6442.16912869 -8192.20126966 11938.99631305]
New Q values:  [ 3504.73942307 -6442.16912869 -8192.20126966  7989.47420206]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 10714.91892281  3140.19469697]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10714.91892281  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  6682.20982974  3140.19469697]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
x. .x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3504.73942307 -6442.16912869 -8192.20126966  7989.47420206]
------
Step:9, Action:West
State  288
Old Q Values:  [ 3504.73942307 -6442.16912869 -8192.20126966  7989.47420206]
New Q values:  [ 3504.73942307 -6442.16912869 -8192.20126966  5199.85262975]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6682.20982974  3140.19469697]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6682.20982974  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  4232.23972082  3140.19469697]
Reward: -1  Episode Reward:  10
xxxxx
x ..x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3504.73942307 -6442.16912869 -8192.20126966  5199.85262975]
------
Step:11, Action:West
State  288
Old Q Values:  [ 3504.73942307 -6442.16912869 -8192.20126966  5199.85262975]
New Q values:  [ 3504.73942307 -6442.16912869 -8192.20126966 13572.84148534]
Reward: -1  Episode Reward:  9
xxxxx
x ..x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4644.37724943 38311.66811146]
------
Step:12, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 38311.66811146]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 15836.54832008]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x. .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1688.27025167   26.73544252 1030.57864962  -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [1688.27025167   26.73544252 1030.57864962  -35.88578819]
New Q values:  [1758.5317488    26.73544252 1030.57864962  -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xa .x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1244.39473591 3592.74549379    0.        ]
------
Step:14, Action:East
State  183
Old Q Values:  [ 877.23516594 1244.39473591 3592.74549379    0.        ]
New Q values:  [ 877.23516594 1244.39473591 2630.85861385    0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  14.86214194 3981.2013878   549.89931413 1915.70494401]
------
Step:15, Action:South
State  198
Old Q Values:  [-2.78872080e-01 -1.87193072e+03  6.47834386e+03  0.00000000e+00]
New Q values:  [-2.78872080e-01 -6.13340328e+03  6.47834386e+03  0.00000000e+00]
Reward: -10001  Episode Reward:  -9975
xxxxx
x ..x
x  .x
x g x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2564.79746843  718.60978155 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 2437.23738705    5.4           0.        ]
New Q values:  [ 221.30610858 1769.55253897    5.4           0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1244.39473591 2630.85861385    0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 877.23516594 1244.39473591 2630.85861385    0.        ]
New Q values:  [ 877.23516594 1244.39473591 3130.18236928    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  6.90812975e+03  1.20371620e+03]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.92643098e+03  6.90812975e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.92643098e+03  4.04745069e+03  1.20371620e+03]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2661.57320472 4282.66262992 1542.2004081  2599.28130597]
------
Step:4, Action:South
State  210
Old Q Values:  [2661.57320472 4282.66262992 1542.2004081  2599.28130597]
New Q values:  [2661.57320472 5790.31749757 1542.2004081  2599.28130597]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3504.73942307 -6442.16912869 -8192.20126966 13572.84148534]
------
Step:5, Action:North
State  288
Old Q Values:  [ 3504.73942307 -6442.16912869 -8192.20126966 13572.84148534]
New Q values:  [ 8425.25171097 -6442.16912869 -8192.20126966 13572.84148534]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23413.18647249  2951.29802374 -4228.04879148  9373.30884915]
------
Step:6, Action:North
State  216
Old Q Values:  [ 2657.55375034  7442.1023349  -8896.20691497  6772.52228586]
New Q values:  [ 2559.38510746  7442.1023349  -8896.20691497  6772.52228586]
Reward: 9  Episode Reward:  34
xxxxx
x gax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  4969.87869106 -2383.80019164   158.26808661]
------
Step:7, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.77950264e+03 -3.22965309e-01  3.31053185e+04]
New Q values:  [ 7.64171987e+01  4.14383176e+03 -3.22965309e-01  3.31053185e+04]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2559.38510746  7442.1023349  -8896.20691497  6772.52228586]
------
Step:8, Action:South
State  210
Old Q Values:  [2661.57320472 5790.31749757 1542.2004081  2599.28130597]
New Q values:  [2661.57320472 6387.37944463 1542.2004081  2599.28130597]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8425.25171097 -6442.16912869 -8192.20126966 13572.84148534]
------
Step:9, Action:North
State  288
Old Q Values:  [ 8425.25171097 -6442.16912869 -8192.20126966 13572.84148534]
New Q values:  [ 5390.041898   -6442.16912869 -8192.20126966 13572.84148534]
Reward: -1  Episode Reward:  31
xxxxx
x . x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1568.40160439 6735.13737869    0.          386.1281519 ]
------
Step:10, Action:North
State  210
Old Q Values:  [2661.57320472 6387.37944463 1542.2004081  2599.28130597]
New Q values:  [21003.12397602  6387.37944463  1542.2004081   2599.28130597]
Reward: -1  Episode Reward:  30
xxxxx
x .ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237  8169.33589502  -180.00807518 66463.64898045]
------
Step:11, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  4.14383176e+03 -3.22965309e-01  3.31053185e+04]
New Q values:  [ 7.64171987e+01  4.14383176e+03 -3.22965309e-01  1.36918674e+04]
Reward: 9  Episode Reward:  39
xxxxx
x a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   905.75843272  1481.13320647]
------
Step:12, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   905.75843272  1481.13320647]
New Q values:  [ -253.44886264 -1902.20915811   905.75843272  1637.21944137]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 3484.5538626   514.74819532 -252.78192178]
------
Step:13, Action:South
State  105
Old Q Values:  [-180.6        2668.19157385   65.14560537    0.        ]
New Q values:  [-180.6        2190.11151151   65.14560537    0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1354.56432284    0.         3744.78293988 -178.98      ]
------
Step:14, Action:East
State  185
Old Q Values:  [1354.56432284    0.         3744.78293988 -178.98      ]
New Q values:  [1354.56432284    0.         2503.47751067 -178.98      ]
Reward: -1  Episode Reward:  36
xxxxx
x g x
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  3353.88111572 2473.65658776 1141.49622464]
------
Step:15, Action:South
State  200
Old Q Values:  [  62.8218634  3353.88111572 2473.65658776 1141.49622464]
New Q values:  [  62.8218634  2616.62436253 2473.65658776 1141.49622464]
Reward: 9  Episode Reward:  45
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4232.23972082  3140.19469697]
------
Step:16, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4232.23972082  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  5764.14833393  3140.19469697]
Reward: -1  Episode Reward:  44
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5390.041898   -6442.16912869 -8192.20126966 13572.84148534]
------
Step:17, Action:West
State  288
Old Q Values:  [ 5390.041898   -6442.16912869 -8192.20126966 13572.84148534]
New Q values:  [ 5390.041898   -6442.16912869 -8192.20126966  7157.78109431]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5764.14833393  3140.19469697]
------
Step:18, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5764.14833393  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  4452.39366187  3140.19469697]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5390.041898   -6442.16912869 -8192.20126966  7157.78109431]
------
Step:19, Action:West
State  288
Old Q Values:  [ 5390.041898   -6442.16912869 -8192.20126966  7157.78109431]
New Q values:  [ 5390.041898   -6442.16912869 -8192.20126966  4198.23053629]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4452.39366187  3140.19469697]
------
Step:20, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4452.39366187  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  3397.37003415  3140.19469697]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5390.041898   -6442.16912869 -8192.20126966  4198.23053629]
------
Step:21, Action:North
State  288
Old Q Values:  [ 5390.041898   -6442.16912869 -8192.20126966  4198.23053629]
New Q values:  [ 8456.35395201 -6442.16912869 -8192.20126966  4198.23053629]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21003.12397602  6387.37944463  1542.2004081   2599.28130597]
------
Step:22, Action:North
State  216
Old Q Values:  [ 2559.38510746  7442.1023349  -8896.20691497  6772.52228586]
New Q values:  [ 5130.71424896  7442.1023349  -8896.20691497  6772.52228586]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.14383176e+03 -3.22965309e-01  1.36918674e+04]
------
Step:23, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  4.14383176e+03 -3.22965309e-01  1.36918674e+04]
New Q values:  [ 7.64171987e+01  4.14383176e+03 -3.22965309e-01  6.41875741e+03]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3142.03490244   214.49203629]
------
Step:24, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2361.4403651    702.49890507]
New Q values:  [-9594.56523706 -8069.05606225  2434.93975336   702.49890507]
Reward: -1  Episode Reward:  36
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  4969.87869106 -2383.80019164   158.26808661]
------
Step:25, Action:South
State  136
Old Q Values:  [ -170.77177351  4969.87869106 -2383.80019164   158.26808661]
New Q values:  [ -170.77177351  4219.98217689 -2383.80019164   158.26808661]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5130.71424896  7442.1023349  -8896.20691497  6772.52228586]
------
Step:26, Action:South
State  216
Old Q Values:  [ 5130.71424896  7442.1023349  -8896.20691497  6772.52228586]
New Q values:  [ 5130.71424896  5513.14711956 -8896.20691497  6772.52228586]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8456.35395201 -6442.16912869 -8192.20126966  4198.23053629]
------
Step:27, Action:North
State  288
Old Q Values:  [ 8456.35395201 -6442.16912869 -8192.20126966  4198.23053629]
New Q values:  [ 5413.69826656 -6442.16912869 -8192.20126966  4198.23053629]
Reward: -1  Episode Reward:  33
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5130.71424896  5513.14711956 -8896.20691497  6772.52228586]
------
Step:28, Action:West
State  216
Old Q Values:  [ 5130.71424896  5513.14711956 -8896.20691497  6772.52228586]
New Q values:  [ 5130.71424896  5513.14711956 -8896.20691497  3493.3962231 ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  2616.62436253 2473.65658776 1141.49622464]
------
Step:29, Action:South
State  200
Old Q Values:  [  62.8218634  2616.62436253 2473.65658776 1141.49622464]
New Q values:  [  62.8218634  2065.26075526 2473.65658776 1141.49622464]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3397.37003415  3140.19469697]
------
Step:30, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3397.37003415  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  2982.45749363  3140.19469697]
Reward: -1  Episode Reward:  30
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5413.69826656 -6442.16912869 -8192.20126966  4198.23053629]
------
Step:31, Action:North
State  288
Old Q Values:  [ 5413.69826656 -6442.16912869 -8192.20126966  4198.23053629]
New Q values:  [ 3818.82344249 -6442.16912869 -8192.20126966  4198.23053629]
Reward: -1  Episode Reward:  29
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5130.71424896  5513.14711956 -8896.20691497  3493.3962231 ]
------
Step:32, Action:South
State  216
Old Q Values:  [ 5130.71424896  5513.14711956 -8896.20691497  3493.3962231 ]
New Q values:  [ 5130.71424896  3464.12800871 -8896.20691497  3493.3962231 ]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3818.82344249 -6442.16912869 -8192.20126966  4198.23053629]
------
Step:33, Action:West
State  288
Old Q Values:  [ 3818.82344249 -6442.16912869 -8192.20126966  4198.23053629]
New Q values:  [ 3818.82344249 -6442.16912869 -8192.20126966  2620.7506236 ]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2982.45749363  3140.19469697]
------
Step:34, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2982.45749363  3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  2338.0300302   3140.19469697]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3818.82344249 -6442.16912869 -8192.20126966  2620.7506236 ]
------
Step:35, Action:North
State  288
Old Q Values:  [ 3818.82344249 -6442.16912869 -8192.20126966  2620.7506236 ]
New Q values:  [ 3547.4705906  -6442.16912869 -8192.20126966  2620.7506236 ]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1568.40160439 6735.13737869    0.          386.1281519 ]
------
Step:36, Action:North
State  218
Old Q Values:  [1568.40160439 6735.13737869    0.          386.1281519 ]
New Q values:  [2552.38786537 6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.14383176e+03 -3.22965309e-01  6.41875741e+03]
------
Step:37, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  4.14383176e+03 -3.22965309e-01  6.41875741e+03]
New Q values:  [ 7.64171987e+01  4.14383176e+03 -3.22965309e-01  3.50951344e+03]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3142.03490244   214.49203629]
------
Step:38, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3142.03490244   214.49203629]
New Q values:  [ -281.736      -1150.91067548  2499.36348811   214.49203629]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.14383176e+03 -3.22965309e-01  3.50951344e+03]
------
Step:39, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.14383176e+03 -3.22965309e-01  3.50951344e+03]
New Q values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  3.50951344e+03]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2552.38786537 6735.13737869    0.          386.1281519 ]
------
Step:40, Action:North
State  216
Old Q Values:  [ 5130.71424896  3464.12800871 -8896.20691497  3493.3962231 ]
New Q values:  [ 3154.92787452  3464.12800871 -8896.20691497  3493.3962231 ]
Reward: -1  Episode Reward:  20
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  3.50951344e+03]
------
Step:41, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  3.50951344e+03]
New Q values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  1.89437121e+03]
Reward: -1  Episode Reward:  19
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   905.75843272  1637.21944137]
------
Step:42, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   905.75843272  1637.21944137]
New Q values:  [ -253.44886264 -1902.20915811   905.75843272  1699.65393533]
Reward: -1  Episode Reward:  18
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 3484.5538626   514.74819532 -252.78192178]
------
Step:43, Action:South
State  107
Old Q Values:  [-252.35169558 3484.5538626   514.74819532 -252.78192178]
New Q values:  [-252.35169558 2144.26479824  514.74819532 -252.78192178]
Reward: -1  Episode Reward:  17
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[1354.56432284    0.         2503.47751067 -178.98      ]
------
Step:44, Action:North
State  187
Old Q Values:  [1537.11065629    0.          526.18496922    0.        ]
New Q values:  [1257.52370199    0.          526.18496922    0.        ]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 2144.26479824  514.74819532 -252.78192178]
------
Step:45, Action:South
State  107
Old Q Values:  [-252.35169558 2144.26479824  514.74819532 -252.78192178]
New Q values:  [-252.35169558 1234.36302989  514.74819532 -252.78192178]
Reward: -1  Episode Reward:  15
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[1257.52370199    0.          526.18496922    0.        ]
------
Step:46, Action:North
State  185
Old Q Values:  [1354.56432284    0.         2503.47751067 -178.98      ]
New Q values:  [ 911.5346381     0.         2503.47751067 -178.98      ]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1234.36302989  514.74819532 -252.78192178]
------
Step:47, Action:South
State  107
Old Q Values:  [-252.35169558 1234.36302989  514.74819532 -252.78192178]
New Q values:  [-252.35169558 1244.18846516  514.74819532 -252.78192178]
Reward: -1  Episode Reward:  13
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 911.5346381     0.         2503.47751067 -178.98      ]
------
Step:48, Action:North
State  185
Old Q Values:  [ 911.5346381     0.         2503.47751067 -178.98      ]
New Q values:  [ 737.27039479    0.         2503.47751067 -178.98      ]
Reward: -1  Episode Reward:  12
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1244.18846516  514.74819532 -252.78192178]
------
Step:49, Action:South
State  107
Old Q Values:  [-252.35169558 1244.18846516  514.74819532 -252.78192178]
New Q values:  [-252.35169558 1248.11863926  514.74819532 -252.78192178]
Reward: -1  Episode Reward:  11
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 737.27039479    0.         2503.47751067 -178.98      ]
------
Step:50, Action:North
State  185
Old Q Values:  [ 737.27039479    0.         2503.47751067 -178.98      ]
New Q values:  [ 951.34161137    0.         2503.47751067 -178.98      ]
Reward: -1  Episode Reward:  10
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        2190.11151151   65.14560537    0.        ]
------
Step:51, Action:South
State  105
Old Q Values:  [-180.6        2190.11151151   65.14560537    0.        ]
New Q values:  [-180.6        1626.4878578    65.14560537    0.        ]
Reward: -1  Episode Reward:  9
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 951.34161137    0.         2503.47751067 -178.98      ]
------
Step:52, Action:East
State  185
Old Q Values:  [ 951.34161137    0.         2503.47751067 -178.98      ]
New Q values:  [ 951.34161137    0.         1742.8879806  -178.98      ]
Reward: -1  Episode Reward:  8
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  2065.26075526 2473.65658776 1141.49622464]
------
Step:53, Action:East
State  201
Old Q Values:  [ 2.33354578e+00  4.65122563e+03 -8.87652194e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  4.65122563e+03 -2.50318991e+03  2.00341972e+02]
Reward: -1  Episode Reward:  7
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3154.92787452  3464.12800871 -8896.20691497  3493.3962231 ]
------
Step:54, Action:West
State  216
Old Q Values:  [ 3154.92787452  3464.12800871 -8896.20691497  3493.3962231 ]
New Q values:  [ 3154.92787452  3464.12800871 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  6
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  2065.26075526 2473.65658776 1141.49622464]
------
Step:55, Action:East
State  200
Old Q Values:  [  62.8218634  2065.26075526 2473.65658776 1141.49622464]
New Q values:  [  62.8218634  2065.26075526 2028.10103772 1141.49622464]
Reward: -1  Episode Reward:  5
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3154.92787452  3464.12800871 -8896.20691497  2138.85546557]
------
Step:56, Action:South
State  216
Old Q Values:  [ 3154.92787452  3464.12800871 -8896.20691497  2138.85546557]
New Q values:  [ 3154.92787452  2449.29238066 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  4
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3547.4705906  -6442.16912869 -8192.20126966  2620.7506236 ]
------
Step:57, Action:North
State  288
Old Q Values:  [ 3547.4705906  -6442.16912869 -8192.20126966  2620.7506236 ]
New Q values:  [ 2364.8665986  -6442.16912869 -8192.20126966  2620.7506236 ]
Reward: -1  Episode Reward:  3
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3154.92787452  2449.29238066 -8896.20691497  2138.85546557]
------
Step:58, Action:North
State  216
Old Q Values:  [ 3154.92787452  2449.29238066 -8896.20691497  2138.85546557]
New Q values:  [ 2527.36580288  2449.29238066 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  2
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  4219.98217689 -2383.80019164   158.26808661]
------
Step:59, Action:South
State  136
Old Q Values:  [ -170.77177351  4219.98217689 -2383.80019164   158.26808661]
New Q values:  [ -170.77177351  2445.60261162 -2383.80019164   158.26808661]
Reward: -1  Episode Reward:  1
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2527.36580288  2449.29238066 -8896.20691497  2138.85546557]
------
Step:60, Action:South
State  216
Old Q Values:  [ 2527.36580288  2449.29238066 -8896.20691497  2138.85546557]
New Q values:  [ 2527.36580288  1765.34213935 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  0
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2364.8665986  -6442.16912869 -8192.20126966  2620.7506236 ]
------
Step:61, Action:West
State  288
Old Q Values:  [ 2364.8665986  -6442.16912869 -8192.20126966  2620.7506236 ]
New Q values:  [ 2364.8665986  -6442.16912869 -8192.20126966  5798.66474547]
Reward: -1  Episode Reward:  -1
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4644.37724943 15836.54832008]
------
Step:62, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2338.0300302   3140.19469697]
New Q values:  [-2527.46239811 -8521.23367799  2338.0300302  86064.3639267 ]
Reward: 100009  Episode Reward:  100008
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8684.98912339  9882.11503699  1099.96026581]
------
Step:1, Action:East
State  193
Old Q Values:  [-5922.26708831  8684.98912339  9882.11503699  1099.96026581]
New Q values:  [-5922.26708831  8684.98912339  4982.20195654  1099.96026581]
Reward: -9991  Episode Reward:  -9991
xxxxx
x.. x
x. gx
x...x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2364.8665986  -6442.16912869 -8192.20126966  5798.66474547]
------
Step:1, Action:West
State  288
Old Q Values:  [ 2364.8665986  -6442.16912869 -8192.20126966  5798.66474547]
New Q values:  [ 2364.8665986  -6442.16912869 -8192.20126966 28144.1750762 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2338.0300302  86064.3639267 ]
------
Step:2, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 15836.54832008]
New Q values:  [ 870.35122762 -168.92307549 4644.37724943 6867.57885267]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1758.5317488    26.73544252 1030.57864962  -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [1758.5317488    26.73544252 1030.57864962  -35.88578819]
New Q values:  [1647.86741031   26.73544252 1030.57864962  -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1244.39473591 3130.18236928    0.        ]
------
Step:4, Action:East
State  181
Old Q Values:  [ 675.92350432 1174.40001123 2639.76424729  -30.99112081]
New Q values:  [ 675.92350432 1174.40001123 3666.80243593  -30.99112081]
Reward: 9  Episode Reward:  36
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8684.98912339  4982.20195654  1099.96026581]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.25670530e+04 1.06660207e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 3.08455304e+04 1.06660207e+04 2.91043938e+03]
Reward: -1  Episode Reward:  35
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2338.0300302  86064.3639267 ]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2338.0300302  86064.3639267 ]
New Q values:  [-2527.46239811 -8521.23367799  2338.0300302  35182.48841793]
Reward: -1  Episode Reward:  34
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1172.25062926 -2735.46306511  2524.47615748 -2601.74710518]
------
Step:7, Action:East
State  260
Old Q Values:  [ 1172.25062926 -2735.46306511  2524.47615748 -2601.74710518]
New Q values:  [ 1172.25062926 -2735.46306511 11563.93698837 -2601.74710518]
Reward: -1  Episode Reward:  33
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2338.0300302  35182.48841793]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2338.0300302  35182.48841793]
New Q values:  [-2527.46239811 -8521.23367799  2338.0300302  38875.28141509]
Reward: -1  Episode Reward:  32
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[82676.28682639  2256.66526474  3195.57842484  1875.31501677]
------
Step:9, Action:North
State  257
Old Q Values:  [82676.28682639  2256.66526474  3195.57842484  1875.31501677]
New Q values:  [43026.35080304  2256.66526474  3195.57842484  1875.31501677]
Reward: -1  Episode Reward:  31
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33188.12024162 24935.33673146  6803.82810591     0.        ]
------
Step:10, Action:North
State  181
Old Q Values:  [ 675.92350432 1174.40001123 3666.80243593  -30.99112081]
New Q values:  [ 688.746565   1174.40001123 3666.80243593  -30.99112081]
Reward: 9  Episode Reward:  40
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         1376.59054423    0.            0.        ]
------
Step:11, Action:South
State  103
Old Q Values:  [ 221.30610858 1769.55253897    5.4           0.        ]
New Q values:  [ 221.30610858 1807.26174637    5.4           0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 688.746565   1174.40001123 3666.80243593  -30.99112081]
------
Step:12, Action:South
State  181
Old Q Values:  [ 688.746565   1174.40001123 3666.80243593  -30.99112081]
New Q values:  [ 688.746565    963.52022758 3666.80243593  -30.99112081]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1647.86741031   26.73544252 1030.57864962  -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [1647.86741031   26.73544252 1030.57864962  -35.88578819]
New Q values:  [1758.5876949    26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 688.746565    963.52022758 3666.80243593  -30.99112081]
------
Step:14, Action:South
State  181
Old Q Values:  [ 688.746565    963.52022758 3666.80243593  -30.99112081]
New Q values:  [ 688.746565    912.3843995  3666.80243593  -30.99112081]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1758.5876949    26.73544252 1030.57864962  -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [1758.5876949    26.73544252 1030.57864962  -35.88578819]
New Q values:  [1641.88978875   26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1244.39473591 3130.18236928    0.        ]
------
Step:16, Action:East
State  181
Old Q Values:  [ 688.746565    912.3843995  3666.80243593  -30.99112081]
New Q values:  [ 688.746565    912.3843995  4071.61771139  -30.99112081]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8684.98912339  4982.20195654  1099.96026581]
------
Step:17, Action:South
State  193
Old Q Values:  [-5922.26708831  8684.98912339  4982.20195654  1099.96026581]
New Q values:  [-5922.26708831  5533.66930516  4982.20195654  1099.96026581]
Reward: -1  Episode Reward:  33
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 6867.57885267]
------
Step:18, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  1201.07018831]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103   972.39501195]
Reward: -1  Episode Reward:  32
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1641.88978875   26.73544252 1030.57864962  -35.88578819]
------
Step:19, Action:North
State  261
Old Q Values:  [1641.88978875   26.73544252 1030.57864962  -35.88578819]
New Q values:  [1877.64122892   26.73544252 1030.57864962  -35.88578819]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 688.746565    912.3843995  4071.61771139  -30.99112081]
------
Step:20, Action:South
State  180
Old Q Values:  [ -746.35376992  1811.65086161  4188.46795952 -4966.32149798]
New Q values:  [ -746.35376992  4193.24144116  4188.46795952 -4966.32149798]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1172.25062926 -2735.46306511 11563.93698837 -2601.74710518]
------
Step:21, Action:East
State  260
Old Q Values:  [ 1172.25062926 -2735.46306511 11563.93698837 -2601.74710518]
New Q values:  [ 1172.25062926 -2735.46306511 16287.55921987 -2601.74710518]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2338.0300302  38875.28141509]
------
Step:22, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2338.0300302  38875.28141509]
New Q values:  [-2527.46239811 -8521.23367799  9377.86453494 38875.28141509]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2364.8665986  -6442.16912869 -8192.20126966 28144.1750762 ]
------
Step:23, Action:West
State  288
Old Q Values:  [ 2364.8665986  -6442.16912869 -8192.20126966 28144.1750762 ]
New Q values:  [ 2364.8665986  -6442.16912869 -8192.20126966 22919.65445501]
Reward: -1  Episode Reward:  27
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9377.86453494 38875.28141509]
------
Step:24, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9377.86453494 38875.28141509]
New Q values:  [-2527.46239811 -8521.23367799  9377.86453494 20435.780332  ]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1172.25062926 -2735.46306511 16287.55921987 -2601.74710518]
------
Step:25, Action:East
State  261
Old Q Values:  [1877.64122892   26.73544252 1030.57864962  -35.88578819]
New Q values:  [1877.64122892   26.73544252 6542.36555945  -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9377.86453494 20435.780332  ]
------
Step:26, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9377.86453494 20435.780332  ]
New Q values:  [-2527.46239811 -8521.23367799  9377.86453494 10136.42180063]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1877.64122892   26.73544252 6542.36555945  -35.88578819]
------
Step:27, Action:North
State  261
Old Q Values:  [1877.64122892   26.73544252 6542.36555945  -35.88578819]
New Q values:  [1689.51120235   26.73544252 6542.36555945  -35.88578819]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1244.39473591 3130.18236928    0.        ]
------
Step:28, Action:East
State  181
Old Q Values:  [ 688.746565    912.3843995  4071.61771139  -30.99112081]
New Q values:  [ 688.746565    912.3843995  3288.1478761   -30.99112081]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5533.66930516  4982.20195654  1099.96026581]
------
Step:29, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.08455304e+04 1.06660207e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.53785387e+04 1.06660207e+04 2.91043938e+03]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9377.86453494 10136.42180063]
------
Step:30, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9377.86453494 10136.42180063]
New Q values:  [-2527.46239811 -8521.23367799  9377.86453494  6016.67838809]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1689.51120235   26.73544252 6542.36555945  -35.88578819]
------
Step:31, Action:North
State  261
Old Q Values:  [1689.51120235   26.73544252 6542.36555945  -35.88578819]
New Q values:  [1614.25919173   26.73544252 6542.36555945  -35.88578819]
Reward: -1  Episode Reward:  19
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1244.39473591 3130.18236928    0.        ]
------
Step:32, Action:East
State  183
Old Q Values:  [ 877.23516594 1244.39473591 3130.18236928    0.        ]
New Q values:  [ 877.23516594 1244.39473591 2465.7081539     0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  4.04745069e+03  1.20371620e+03]
------
Step:33, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.92643098e+03  4.04745069e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.92643098e+03  7.91931747e+03  1.20371620e+03]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21003.12397602  6387.37944463  1542.2004081   2599.28130597]
------
Step:34, Action:North
State  208
Old Q Values:  [23413.18647249  2951.29802374 -4228.04879148  9373.30884915]
New Q values:  [29309.76928313  2951.29802374 -4228.04879148  9373.30884915]
Reward: 9  Episode Reward:  26
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237  8169.33589502  -180.00807518 66463.64898045]
------
Step:35, Action:West
State  128
Old Q Values:  [ 11374.93691792  22352.84815254  -8652.84       132497.26237104]
New Q values:  [ 11374.93691792  22352.84815254  -8652.84       146289.23386729]
Reward: 100009  Episode Reward:  100035
xxxxx
xga x
x   x
x   x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3482.40377611     0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [ 877.23516594 1244.39473591 2465.7081539     0.        ]
New Q values:  [ 877.23516594 1244.39473591 3367.47850188    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  7.91931747e+03  1.20371620e+03]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.53785387e+04 1.06660207e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.53785387e+04 1.30647391e+04 2.91043938e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[29309.76928313  2951.29802374 -4228.04879148  9373.30884915]
------
Step:3, Action:North
State  216
Old Q Values:  [ 2527.36580288  1765.34213935 -8896.20691497  2138.85546557]
New Q values:  [ 1750.02710464  1765.34213935 -8896.20691497  2138.85546557]
Reward: 9  Episode Reward:  27
xxxxx
x.gax
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2445.60261162 -2383.80019164   158.26808661]
------
Step:4, Action:South
State  130
Old Q Values:  [46177.80406237  8169.33589502  -180.00807518 66463.64898045]
New Q values:  [46177.80406237 12060.06514295  -180.00807518 66463.64898045]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[29309.76928313  2951.29802374 -4228.04879148  9373.30884915]
------
Step:5, Action:North
State  208
Old Q Values:  [29309.76928313  2951.29802374 -4228.04879148  9373.30884915]
New Q values:  [31662.40240739  2951.29802374 -4228.04879148  9373.30884915]
Reward: -1  Episode Reward:  25
xxxxx
x..ax
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 12060.06514295  -180.00807518 66463.64898045]
------
Step:6, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  1.89437121e+03]
New Q values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  3.34152167e+04]
Reward: 9  Episode Reward:  34
xxxxx
x.a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   33009.11692778 108840.22747196]
------
Step:7, Action:West
State  126
Old Q Values:  [   0.          331.64678262 2473.27977453 1713.51474577]
New Q values:  [   0.          331.64678262 2473.27977453 1460.24513884]
Reward: 9  Episode Reward:  43
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2564.79746843  718.60978155 -120.29354603]
------
Step:8, Action:South
State  110
Old Q Values:  [ -239.29051573 -1734.4785591    365.96192905  -180.6       ]
New Q values:  [-239.29051573 -155.93031593  365.96192905 -180.6       ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xa  x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -5.70379540e+03  1.79487036e+03  0.00000000e+00]
------
Step:9, Action:East
State  191
Old Q Values:  [  3.06655861 792.13768726 439.48757099   0.        ]
New Q values:  [  3.06655861 792.13768726 355.15241917   0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[    0.         -1406.21014518   599.85796923     0.        ]
------
Step:10, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.39942508e+04 3.41531787e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 1.39942508e+04 3.38606836e+03 0.00000000e+00]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2552.38786537 6735.13737869    0.          386.1281519 ]
------
Step:11, Action:North
State  216
Old Q Values:  [ 1750.02710464  1765.34213935 -8896.20691497  2138.85546557]
New Q values:  [10723.97585913  1765.34213935 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  3.34152167e+04]
------
Step:12, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  3.34152167e+04]
New Q values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  1.41152957e+04]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2499.36348811   214.49203629]
------
Step:13, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2499.36348811   214.49203629]
New Q values:  [ -281.736      -1150.91067548  5233.73411608   214.49203629]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  1.41152957e+04]
------
Step:14, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  1.41152957e+04]
New Q values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  7.21563853e+03]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  5233.73411608   214.49203629]
------
Step:15, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  5233.73411608   214.49203629]
New Q values:  [ -281.736      -1150.91067548  4257.58520522   214.49203629]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  7.21563853e+03]
------
Step:16, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  7.21563853e+03]
New Q values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  4.16293097e+03]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4257.58520522   214.49203629]
------
Step:17, Action:East
State  126
Old Q Values:  [   0.          331.64678262 2473.27977453 1460.24513884]
New Q values:  [   0.          331.64678262 2237.59120179 1460.24513884]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  4.16293097e+03]
------
Step:18, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  4.16293097e+03]
New Q values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  2.94184795e+03]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4257.58520522   214.49203629]
------
Step:19, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   905.75843272  1699.65393533]
New Q values:  [ -253.44886264 -1902.20915811  1464.94554802  1699.65393533]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  2.94184795e+03]
------
Step:20, Action:West
State  136
Old Q Values:  [ -170.77177351  2445.60261162 -2383.80019164   158.26808661]
New Q values:  [ -170.77177351  2445.60261162 -2383.80019164   132.13805377]
Reward: -1  Episode Reward:  30
xxxxx
x agx
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   231.43606375]
------
Step:21, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2434.93975336   702.49890507]
New Q values:  [-9594.56523706 -8069.05606225  2434.93975336   590.4934664 ]
Reward: -1  Episode Reward:  29
xxxxx
xag x
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1033.64634791   -8.57207238 -180.6       ]
------
Step:22, Action:South
State  108
Old Q Values:  [-8463.16477134  3108.92046516   492.53480369     0.        ]
New Q values:  [-8463.16477134  2529.34094544   492.53480369     0.        ]
Reward: -1  Episode Reward:  28
xxxxx
xg  x
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  4287.90919792  3369.21098184     0.        ]
------
Step:23, Action:South
State  188
Old Q Values:  [-6523.78898263  4287.90919792  3369.21098184     0.        ]
New Q values:  [-6523.78898263  6600.83144513  3369.21098184     0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xg  x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1172.25062926 -2735.46306511 16287.55921987 -2601.74710518]
------
Step:24, Action:East
State  260
Old Q Values:  [ 1172.25062926 -2735.46306511 16287.55921987 -2601.74710518]
New Q values:  [ 1172.25062926 -2735.46306511  7136.39270026 -2601.74710518]
Reward: 9  Episode Reward:  36
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103   972.39501195]
------
Step:25, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9377.86453494  6016.67838809]
New Q values:  [-2527.46239811 -8521.23367799 70632.44215048  6016.67838809]
Reward: 100009  Episode Reward:  100045
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 6867.57885267]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 70632.44215048  6016.67838809]
New Q values:  [-2527.46239811 -8521.23367799 70632.44215048  4374.78102307]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1614.25919173   26.73544252 6542.36555945  -35.88578819]
------
Step:2, Action:East
State  261
Old Q Values:  [1614.25919173   26.73544252 6542.36555945  -35.88578819]
New Q values:  [ 1614.25919173    26.73544252 23806.07886892   -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 70632.44215048  4374.78102307]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 70632.44215048  4374.78102307]
New Q values:  [-2527.46239811 -8521.23367799 35134.27319669  4374.78102307]
Reward: 9  Episode Reward:  17
xxxxx
x.g x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2364.8665986  -6442.16912869 -8192.20126966 22919.65445501]
------
Step:4, Action:West
State  288
Old Q Values:  [ 2364.8665986  -6442.16912869 -8192.20126966 22919.65445501]
New Q values:  [ 2364.8665986  -6442.16912869 -8192.20126966 11227.5354378 ]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 6867.57885267]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 35134.27319669  4374.78102307]
New Q values:  [-2527.46239811 -8521.23367799 35134.27319669 14657.21765014]
Reward: -1  Episode Reward:  15
xxxxx
x.g x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[43026.35080304  2256.66526474  3195.57842484  1875.31501677]
------
Step:6, Action:North
State  261
Old Q Values:  [ 1614.25919173    26.73544252 23806.07886892   -35.88578819]
New Q values:  [ 1637.54803952    26.73544252 23806.07886892   -35.88578819]
Reward: 9  Episode Reward:  24
xxxxx
x..gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 688.746565    912.3843995  3288.1478761   -30.99112081]
------
Step:7, Action:East
State  177
Old Q Values:  [33188.12024162 24935.33673146  6803.82810591     0.        ]
New Q values:  [33188.12024162 24935.33673146  4387.03203391     0.        ]
Reward: 9  Episode Reward:  33
xxxxx
x.. x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5533.66930516  4982.20195654  1099.96026581]
------
Step:8, Action:South
State  195
Old Q Values:  [   38.85388605 12979.35189914  8036.1837185   1101.59744825]
New Q values:  [  38.85388605 7251.41441546 8036.1837185  1101.59744825]
Reward: -1  Episode Reward:  32
xxxxx
x.. x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 6867.57885267]
------
Step:9, Action:West
State  273
Old Q Values:  [ 870.35122762 -168.92307549 4644.37724943 6867.57885267]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 15654.33678198]
Reward: -1  Episode Reward:  31
xxxxx
x.. x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[43026.35080304  2256.66526474  3195.57842484  1875.31501677]
------
Step:10, Action:North
State  261
Old Q Values:  [ 1637.54803952    26.73544252 23806.07886892   -35.88578819]
New Q values:  [ 1640.86357864    26.73544252 23806.07886892   -35.88578819]
Reward: -1  Episode Reward:  30
xxxxx
x.. x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 688.746565    912.3843995  3288.1478761   -30.99112081]
------
Step:11, Action:South
State  180
Old Q Values:  [ -746.35376992  4193.24144116  4188.46795952 -4966.32149798]
New Q values:  [ -746.35376992  3817.61438654  4188.46795952 -4966.32149798]
Reward: -1  Episode Reward:  29
xxxxx
x.. x
xg .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1172.25062926 -2735.46306511  7136.39270026 -2601.74710518]
------
Step:12, Action:East
State  260
Old Q Values:  [ 1172.25062926 -2735.46306511  7136.39270026 -2601.74710518]
New Q values:  [ 1172.25062926 -2735.46306511 13394.23903911 -2601.74710518]
Reward: -1  Episode Reward:  28
xxxxx
x.. x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 35134.27319669 14657.21765014]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 35134.27319669 14657.21765014]
New Q values:  [-2527.46239811 -8521.23367799 17421.36991002 14657.21765014]
Reward: -1  Episode Reward:  27
xxxxx
x.. x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2364.8665986  -6442.16912869 -8192.20126966 11227.5354378 ]
------
Step:14, Action:West
State  288
Old Q Values:  [ 2364.8665986  -6442.16912869 -8192.20126966 11227.5354378 ]
New Q values:  [ 2364.8665986  -6442.16912869 -8192.20126966  9716.82514813]
Reward: -1  Episode Reward:  26
xxxxx
xg. x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 17421.36991002 14657.21765014]
------
Step:15, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 17421.36991002 14657.21765014]
New Q values:  [-2527.46239811 -8521.23367799  9882.99550845 14657.21765014]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2364.8665986  -6442.16912869 -8192.20126966  9716.82514813]
------
Step:16, Action:West
State  288
Old Q Values:  [ 2364.8665986  -6442.16912869 -8192.20126966  9716.82514813]
New Q values:  [ 2364.8665986  -6442.16912869 -8192.20126966  8283.29535429]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9882.99550845 14657.21765014]
------
Step:17, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9882.99550845 14657.21765014]
New Q values:  [-2527.46239811 -8521.23367799  6437.58680967 14657.21765014]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2364.8665986  -6442.16912869 -8192.20126966  8283.29535429]
------
Step:18, Action:North
State  288
Old Q Values:  [ 2364.8665986  -6442.16912869 -8192.20126966  8283.29535429]
New Q values:  [ 7252.28383225 -6442.16912869 -8192.20126966  8283.29535429]
Reward: 9  Episode Reward:  32
xxxxx
x.. x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21003.12397602  6387.37944463  1542.2004081   2599.28130597]
------
Step:19, Action:North
State  208
Old Q Values:  [31662.40240739  2951.29802374 -4228.04879148  9373.30884915]
New Q values:  [32603.45565709  2951.29802374 -4228.04879148  9373.30884915]
Reward: -1  Episode Reward:  31
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 12060.06514295  -180.00807518 66463.64898045]
------
Step:20, Action:West
State  128
Old Q Values:  [ 11374.93691792  22352.84815254  -8652.84       146289.23386729]
New Q values:  [11374.93691792 22352.84815254 -8652.84       91806.02246579]
Reward: 9  Episode Reward:  40
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[     0.           3629.92591876  32120.21206907 110949.76306292]
------
Step:21, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   33009.11692778 108840.22747196]
New Q values:  [  -180.6          3557.6642036   33142.14146525 108840.22747196]
Reward: -1  Episode Reward:  39
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 12060.06514295  -180.00807518 66463.64898045]
------
Step:22, Action:West
State  128
Old Q Values:  [11374.93691792 22352.84815254 -8652.84       91806.02246579]
New Q values:  [11374.93691792 22352.84815254 -8652.84       70006.73790519]
Reward: -1  Episode Reward:  38
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[     0.           3629.92591876  32120.21206907 110949.76306292]
------
Step:23, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   33142.14146525 108840.22747196]
New Q values:  [  -180.6          3557.6642036   33195.35128023 108840.22747196]
Reward: -1  Episode Reward:  37
xxxxx
x. ax
xg  x
x   x
xxxxx
Step:24, Action:West
State  128
Old Q Values:  [11374.93691792 22352.84815254 -8652.84       70006.73790519]
New Q values:  [11374.93691792 22352.84815254 -8652.84       61287.02408095]
Reward: -1  Episode Reward:  36
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[     0.           3629.92591876  32120.21206907 110949.76306292]
------
Step:25, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   33195.35128023 108840.22747196]
New Q values:  [  -180.6          3557.6642036   33216.63520623 108840.22747196]
Reward: -1  Episode Reward:  35
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 12060.06514295  -180.00807518 66463.64898045]
------
Step:26, Action:West
State  128
Old Q Values:  [11374.93691792 22352.84815254 -8652.84       61287.02408095]
New Q values:  [11374.93691792 22352.84815254 -8652.84       57799.13855126]
Reward: -1  Episode Reward:  34
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[     0.           3629.92591876  32120.21206907 110949.76306292]
------
Step:27, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   33216.63520623 108840.22747196]
New Q values:  [  -180.6          3557.6642036   33225.14877663 108840.22747196]
Reward: -1  Episode Reward:  33
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 12060.06514295  -180.00807518 66463.64898045]
------
Step:28, Action:West
State  130
Old Q Values:  [46177.80406237 12060.06514295  -180.00807518 66463.64898045]
New Q values:  [46177.80406237 12060.06514295  -180.00807518 59236.92783377]
Reward: -1  Episode Reward:  32
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   33225.14877663 108840.22747196]
------
Step:29, Action:West
State  115
Old Q Values:  [-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.32644343e+05]
New Q values:  [-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.32773276e+05]
Reward: 100009  Episode Reward:  100041
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4257.58520522   214.49203629]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  4257.58520522   214.49203629]
New Q values:  [ -281.736      -1150.91067548  2811.67625702   214.49203629]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  2.94184795e+03]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.67747392e+03 -3.22965309e-01  2.94184795e+03]
New Q values:  [ 7.64171987e+01  7.77732676e+03 -3.22965309e-01  2.94184795e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21003.12397602  6387.37944463  1542.2004081   2599.28130597]
------
Step:3, Action:North
State  210
Old Q Values:  [21003.12397602  6387.37944463  1542.2004081   2599.28130597]
New Q values:  [10733.84761823  6387.37944463  1542.2004081   2599.28130597]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  7.77732676e+03 -3.22965309e-01  2.94184795e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  7.77732676e+03 -3.22965309e-01  2.94184795e+03]
New Q values:  [ 7.64171987e+01  6.33048499e+03 -3.22965309e-01  2.94184795e+03]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[10733.84761823  6387.37944463  1542.2004081   2599.28130597]
------
Step:5, Action:North
State  210
Old Q Values:  [10733.84761823  6387.37944463  1542.2004081   2599.28130597]
New Q values:  [6192.08454406 6387.37944463 1542.2004081  2599.28130597]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  6.33048499e+03 -3.22965309e-01  2.94184795e+03]
------
Step:6, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  6.33048499e+03 -3.22965309e-01  2.94184795e+03]
New Q values:  [ 7.64171987e+01  4.44780783e+03 -3.22965309e-01  2.94184795e+03]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6192.08454406 6387.37944463 1542.2004081  2599.28130597]
------
Step:7, Action:South
State  216
Old Q Values:  [10723.97585913  1765.34213935 -8896.20691497  2138.85546557]
New Q values:  [10723.97585913  3196.52546203 -8896.20691497  2138.85546557]
Reward: 9  Episode Reward:  23
xxxxx
x.  x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7252.28383225 -6442.16912869 -8192.20126966  8283.29535429]
------
Step:8, Action:West
State  288
Old Q Values:  [ 7252.28383225 -6442.16912869 -8192.20126966  8283.29535429]
New Q values:  [ 7252.28383225 -6442.16912869 -8192.20126966  8015.01917631]
Reward: 9  Episode Reward:  32
xxxxx
x.  x
x .gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4644.37724943 15654.33678198]
------
Step:9, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 15654.33678198]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 19175.03995371]
Reward: 9  Episode Reward:  41
xxxxx
x. gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[43026.35080304  2256.66526474  3195.57842484  1875.31501677]
------
Step:10, Action:North
State  257
Old Q Values:  [43026.35080304  2256.66526474  3195.57842484  1875.31501677]
New Q values:  [27166.3763937   2256.66526474  3195.57842484  1875.31501677]
Reward: -1  Episode Reward:  40
xxxxx
x.  x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33188.12024162 24935.33673146  4387.03203391     0.        ]
------
Step:11, Action:North
State  181
Old Q Values:  [ 688.746565    912.3843995  3288.1478761   -30.99112081]
New Q values:  [ 590.99253037  912.3843995  3288.1478761   -30.99112081]
Reward: 9  Episode Reward:  49
xxxxx
xa gx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1033.64634791   -8.57207238 -180.6       ]
------
Step:12, Action:South
State  111
Old Q Values:  [-177.44732869 2564.79746843  718.60978155 -120.29354603]
New Q values:  [-177.44732869 2011.7633502   718.60978155 -120.29354603]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 590.99253037  912.3843995  3288.1478761   -30.99112081]
------
Step:13, Action:East
State  177
Old Q Values:  [33188.12024162 24935.33673146  4387.03203391     0.        ]
New Q values:  [33188.12024162 24935.33673146 63420.31360511     0.        ]
Reward: 100009  Episode Reward:  100057
xxxxx
x  gx
x a x
x   x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6437.58680967 14657.21765014]
------
Step:1, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6437.58680967 14657.21765014]
New Q values:  [-2527.46239811 -8521.23367799  6437.58680967  3886.55877179]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x ..x
xg .x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  2811.67625702   214.49203629]
New Q values:  [ -281.736      -1150.91067548  2811.67625702   694.72581958]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2011.7633502   718.60978155 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 2011.7633502   718.60978155 -120.29354603]
New Q values:  [-177.44732869 1820.34889065  718.60978155 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1244.39473591 3367.47850188    0.        ]
------
Step:3, Action:East
State  189
Old Q Values:  [  64.81505849 1220.1801337   909.92809116  154.04646645]
New Q values:  [  64.81505849 1220.1801337   777.82012054  154.04646645]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[1.32443385e-01 1.36149628e+03 0.00000000e+00 4.03062559e+02]
------
Step:4, Action:South
State  199
Old Q Values:  [  14.86214194 3981.2013878   549.89931413 1915.70494401]
New Q values:  [  14.86214194 1982.97289372  549.89931413 1915.70494401]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.          -29.77444073 1283.64112866]
------
Step:5, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103   972.39501195]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  7530.18166546]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1640.86357864    26.73544252 23806.07886892   -35.88578819]
------
Step:6, Action:North
State  260
Old Q Values:  [ 1172.25062926 -2735.46306511 13394.23903911 -2601.74710518]
New Q values:  [ 1513.02138454 -2735.46306511 13394.23903911 -2601.74710518]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
xa .x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3482.40377611     0.        ]
------
Step:7, Action:East
State  183
Old Q Values:  [ 877.23516594 1244.39473591 3367.47850188    0.        ]
New Q values:  [ 877.23516594 1244.39473591 3289.89455876    0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -6.13340328e+03  6.47834386e+03  0.00000000e+00]
------
Step:8, Action:East
State  199
Old Q Values:  [  14.86214194 1982.97289372  549.89931413 1915.70494401]
New Q values:  [  14.86214194 1982.97289372 2141.57355904 1915.70494401]
Reward: 9  Episode Reward:  42
xxxxx
x  .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6192.08454406 6387.37944463 1542.2004081  2599.28130597]
------
Step:9, Action:North
State  216
Old Q Values:  [10723.97585913  3196.52546203 -8896.20691497  2138.85546557]
New Q values:  [ 5629.33269237  3196.52546203 -8896.20691497  2138.85546557]
Reward: 9  Episode Reward:  51
xxxxx
x  ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.44780783e+03 -3.22965309e-01  2.94184795e+03]
------
Step:10, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  4.44780783e+03 -3.22965309e-01  2.94184795e+03]
New Q values:  [ 7.64171987e+01  4.44780783e+03 -3.22965309e-01  1.84741654e+03]
Reward: -1  Episode Reward:  50
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 2237.59120179 1460.24513884]
------
Step:11, Action:East
State  126
Old Q Values:  [   0.          331.64678262 2237.59120179 1460.24513884]
New Q values:  [   0.          331.64678262 2228.77882944 1460.24513884]
Reward: -1  Episode Reward:  49
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.44780783e+03 -3.22965309e-01  1.84741654e+03]
------
Step:12, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.44780783e+03 -3.22965309e-01  1.84741654e+03]
New Q values:  [ 7.64171987e+01  3.79906435e+03 -3.22965309e-01  1.84741654e+03]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2552.38786537 6735.13737869    0.          386.1281519 ]
------
Step:13, Action:South
State  210
Old Q Values:  [6192.08454406 6387.37944463 1542.2004081  2599.28130597]
New Q values:  [ 6192.08454406 64964.85753075  1542.2004081   2599.28130597]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1640.86357864    26.73544252 23806.07886892   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [ 1640.86357864    26.73544252 23806.07886892   -35.88578819]
New Q values:  [ 1648.18979429    26.73544252 23806.07886892   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 590.99253037  912.3843995  3288.1478761   -30.99112081]
------
Step:2, Action:South
State  181
Old Q Values:  [ 590.99253037  912.3843995  3288.1478761   -30.99112081]
New Q values:  [ 590.99253037 7506.17742048 3288.1478761   -30.99112081]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1648.18979429    26.73544252 23806.07886892   -35.88578819]
------
Step:3, Action:East
State  261
Old Q Values:  [ 1648.18979429    26.73544252 23806.07886892   -35.88578819]
New Q values:  [ 1648.18979429    26.73544252 11780.88604721   -35.88578819]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  7530.18166546]
------
Step:4, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  7530.18166546]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  6545.73848034]
Reward: -1  Episode Reward:  6
xxxxx
x.g.x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1648.18979429    26.73544252 11780.88604721   -35.88578819]
------
Step:5, Action:East
State  260
Old Q Values:  [ 1513.02138454 -2735.46306511 13394.23903911 -2601.74710518]
New Q values:  [ 1513.02138454 -2735.46306511  7320.81715975 -2601.74710518]
Reward: -1  Episode Reward:  5
xxxxx
xg..x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  6545.73848034]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6437.58680967  3886.55877179]
New Q values:  [-2527.46239811 -8521.23367799  6437.58680967  3750.26865664]
Reward: -1  Episode Reward:  4
xxxxx
x...x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1513.02138454 -2735.46306511  7320.81715975 -2601.74710518]
------
Step:7, Action:East
State  261
Old Q Values:  [ 1648.18979429    26.73544252 11780.88604721   -35.88578819]
New Q values:  [1648.18979429   26.73544252 6675.47596299  -35.88578819]
Reward: -1  Episode Reward:  3
xxxxx
x...x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  6545.73848034]
------
Step:8, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 19175.03995371]
New Q values:  [ 870.35122762 -168.92307549 4644.37724943 9672.05877038]
Reward: -1  Episode Reward:  2
xxxxx
x...x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1648.18979429   26.73544252 6675.47596299  -35.88578819]
------
Step:9, Action:East
State  261
Old Q Values:  [1648.18979429   26.73544252 6675.47596299  -35.88578819]
New Q values:  [1648.18979429   26.73544252 5571.20801631  -35.88578819]
Reward: -1  Episode Reward:  1
xxxxx
x...x
x ..x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 9672.05877038]
------
Step:10, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6437.58680967  3750.26865664]
New Q values:  [-2527.46239811 -8521.23367799  6437.58680967  3170.86986755]
Reward: -1  Episode Reward:  0
xxxxx
x...x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1648.18979429   26.73544252 5571.20801631  -35.88578819]
------
Step:11, Action:North
State  260
Old Q Values:  [ 1513.02138454 -2735.46306511  7320.81715975 -2601.74710518]
New Q values:  [ 1649.32968665 -2735.46306511  7320.81715975 -2601.74710518]
Reward: -1  Episode Reward:  -1
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3482.40377611     0.        ]
------
Step:12, Action:East
State  183
Old Q Values:  [ 877.23516594 1244.39473591 3289.89455876    0.        ]
New Q values:  [ 877.23516594 1244.39473591 3697.15306382    0.        ]
Reward: 9  Episode Reward:  8
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  7.91931747e+03  1.20371620e+03]
------
Step:13, Action:East
State  195
Old Q Values:  [  38.85388605 7251.41441546 8036.1837185  1101.59744825]
New Q values:  [   38.85388605  7251.41441546 22709.33074662  1101.59744825]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6192.08454406 64964.85753075  1542.2004081   2599.28130597]
------
Step:14, Action:North
State  208
Old Q Values:  [32603.45565709  2951.29802374 -4228.04879148  9373.30884915]
New Q values:  [30817.86061297  2951.29802374 -4228.04879148  9373.30884915]
Reward: 9  Episode Reward:  26
xxxxx
x..ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 12060.06514295  -180.00807518 59236.92783377]
------
Step:15, Action:West
State  130
Old Q Values:  [46177.80406237 12060.06514295  -180.00807518 59236.92783377]
New Q values:  [46177.80406237 12060.06514295  -180.00807518 56352.2393751 ]
Reward: 9  Episode Reward:  35
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   33225.14877663 108840.22747196]
------
Step:16, Action:West
State  126
Old Q Values:  [   0.          331.64678262 2228.77882944 1460.24513884]
New Q values:  [   0.          331.64678262 2228.77882944 1135.60272273]
Reward: 9  Episode Reward:  44
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1820.34889065  718.60978155 -120.29354603]
------
Step:17, Action:South
State  110
Old Q Values:  [-239.29051573 -155.93031593  365.96192905 -180.6       ]
New Q values:  [-239.29051573  475.48898133  365.96192905 -180.6       ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -5.70379540e+03  1.79487036e+03  0.00000000e+00]
------
Step:18, Action:East
State  191
Old Q Values:  [  3.06655861 792.13768726 355.15241917   0.        ]
New Q values:  [  3.06655861 792.13768726 321.41835844   0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[    0.         -1406.21014518   599.85796923     0.        ]
------
Step:19, Action:East
State  204
Old Q Values:  [   0.         1387.82535955 1341.90130646  441.58769553]
New Q values:  [   0.         1387.82535955 2224.9603303   441.58769553]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5629.33269237  3196.52546203 -8896.20691497  2138.85546557]
------
Step:20, Action:North
State  216
Old Q Values:  [ 5629.33269237  3196.52546203 -8896.20691497  2138.85546557]
New Q values:  [ 3390.85238052  3196.52546203 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.79906435e+03 -3.22965309e-01  1.84741654e+03]
------
Step:21, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.79906435e+03 -3.22965309e-01  1.84741654e+03]
New Q values:  [ 7.64171987e+01  3.79906435e+03 -3.22965309e-01  1.40700027e+03]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 2228.77882944 1135.60272273]
------
Step:22, Action:East
State  124
Old Q Values:  [   0.         1166.51141701 1806.64860675  963.6944397 ]
New Q values:  [   0.         1166.51141701 1455.74022619  963.6944397 ]
Reward: -1  Episode Reward:  38
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2445.60261162 -2383.80019164   132.13805377]
------
Step:23, Action:South
State  136
Old Q Values:  [ -170.77177351  2445.60261162 -2383.80019164   132.13805377]
New Q values:  [ -170.77177351  1994.8967588  -2383.80019164   132.13805377]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3390.85238052  3196.52546203 -8896.20691497  2138.85546557]
------
Step:24, Action:South
State  208
Old Q Values:  [30817.86061297  2951.29802374 -4228.04879148  9373.30884915]
New Q values:  [30817.86061297 63590.42496239 -4228.04879148  9373.30884915]
Reward: 100009  Episode Reward:  100046
xxxxx
x  gx
x   x
x  ax
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30817.86061297 63590.42496239 -4228.04879148  9373.30884915]
------
Step:1, Action:South
State  208
Old Q Values:  [30817.86061297 63590.42496239 -4228.04879148  9373.30884915]
New Q values:  [30817.86061297 27846.07573785 -4228.04879148  9373.30884915]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7252.28383225 -6442.16912869 -8192.20126966  8015.01917631]
------
Step:2, Action:West
State  288
Old Q Values:  [ 7252.28383225 -6442.16912869 -8192.20126966  8015.01917631]
New Q values:  [ 7252.28383225 -6442.16912869 -8192.20126966  5142.68371342]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6437.58680967  3170.86986755]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6437.58680967  3170.86986755]
New Q values:  [-2527.46239811 -8521.23367799  4750.11987354  3170.86986755]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7252.28383225 -6442.16912869 -8192.20126966  5142.68371342]
------
Step:4, Action:North
State  288
Old Q Values:  [ 7252.28383225 -6442.16912869 -8192.20126966  5142.68371342]
New Q values:  [22389.77079212 -6442.16912869 -8192.20126966  5142.68371342]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6192.08454406 64964.85753075  1542.2004081   2599.28130597]
------
Step:5, Action:North
State  210
Old Q Values:  [ 6192.08454406 64964.85753075  1542.2004081   2599.28130597]
New Q values:  [19387.90563015 64964.85753075  1542.2004081   2599.28130597]
Reward: 9  Episode Reward:  25
xxxxx
x..ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 12060.06514295  -180.00807518 56352.2393751 ]
------
Step:6, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.79906435e+03 -3.22965309e-01  1.40700027e+03]
New Q values:  [ 7.64171987e+01  3.79906435e+03 -3.22965309e-01  1.41170298e+03]
Reward: 9  Episode Reward:  34
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2811.67625702   694.72581958]
------
Step:7, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2434.93975336   590.4934664 ]
New Q values:  [-9594.56523706 -8069.05606225  1571.84492898   590.4934664 ]
Reward: -1  Episode Reward:  33
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1994.8967588  -2383.80019164   132.13805377]
------
Step:8, Action:South
State  136
Old Q Values:  [ -170.77177351  1994.8967588  -2383.80019164   132.13805377]
New Q values:  [ -170.77177351 10042.71688741 -2383.80019164   132.13805377]
Reward: -1  Episode Reward:  32
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30817.86061297 27846.07573785 -4228.04879148  9373.30884915]
------
Step:9, Action:North
State  208
Old Q Values:  [30817.86061297 27846.07573785 -4228.04879148  9373.30884915]
New Q values:  [29232.21605772 27846.07573785 -4228.04879148  9373.30884915]
Reward: -1  Episode Reward:  31
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 12060.06514295  -180.00807518 56352.2393751 ]
------
Step:10, Action:West
State  136
Old Q Values:  [ -170.77177351 10042.71688741 -2383.80019164   132.13805377]
New Q values:  [ -170.77177351 10042.71688741 -2383.80019164   523.8087002 ]
Reward: -1  Episode Reward:  30
xxxxx
xga x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1571.84492898   590.4934664 ]
------
Step:11, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   33225.14877663 108840.22747196]
New Q values:  [  -180.6          3557.6642036   30195.13132318 108840.22747196]
Reward: -1  Episode Reward:  29
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 12060.06514295  -180.00807518 56352.2393751 ]
------
Step:12, Action:West
State  136
Old Q Values:  [ -170.77177351 10042.71688741 -2383.80019164   523.8087002 ]
New Q values:  [ -170.77177351 10042.71688741 -2383.80019164   680.47695878]
Reward: -1  Episode Reward:  28
xxxxx
xga x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1571.84492898   590.4934664 ]
------
Step:13, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   30195.13132318 108840.22747196]
New Q values:  [  -180.6          3557.6642036   28983.1243418  108840.22747196]
Reward: -1  Episode Reward:  27
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 12060.06514295  -180.00807518 56352.2393751 ]
------
Step:14, Action:West
State  136
Old Q Values:  [ -170.77177351 10042.71688741 -2383.80019164   680.47695878]
New Q values:  [ -170.77177351 10042.71688741 -2383.80019164   743.14426221]
Reward: -1  Episode Reward:  26
xxxxx
xga x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1571.84492898   590.4934664 ]
------
Step:15, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   28983.1243418  108840.22747196]
New Q values:  [  -180.6          3557.6642036   28498.32154925 108840.22747196]
Reward: -1  Episode Reward:  25
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 12060.06514295  -180.00807518 56352.2393751 ]
------
Step:16, Action:West
State  130
Old Q Values:  [46177.80406237 12060.06514295  -180.00807518 56352.2393751 ]
New Q values:  [46177.80406237 12060.06514295  -180.00807518 55192.36399163]
Reward: -1  Episode Reward:  24
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 108840.22747196]
------
Step:17, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  2811.67625702   694.72581958]
New Q values:  [ -281.736      -1150.91067548  2811.67625702   657.72591961]
Reward: 9  Episode Reward:  33
xxxxx
xa  x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1248.11863926  514.74819532 -252.78192178]
------
Step:18, Action:South
State  107
Old Q Values:  [-252.35169558 1248.11863926  514.74819532 -252.78192178]
New Q values:  [-252.35169558  881.9045663   514.74819532 -252.78192178]
Reward: 9  Episode Reward:  42
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[1257.52370199    0.          526.18496922    0.        ]
------
Step:19, Action:North
State  185
Old Q Values:  [ 951.34161137    0.         1742.8879806  -178.98      ]
New Q values:  [ 644.50801444    0.         1742.8879806  -178.98      ]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  881.9045663   514.74819532 -252.78192178]
------
Step:20, Action:South
State  107
Old Q Values:  [-252.35169558  881.9045663   514.74819532 -252.78192178]
New Q values:  [-252.35169558  729.41893712  514.74819532 -252.78192178]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[1257.52370199    0.          526.18496922    0.        ]
------
Step:21, Action:North
State  185
Old Q Values:  [ 644.50801444    0.         1742.8879806  -178.98      ]
New Q values:  [ 476.02888691    0.         1742.8879806  -178.98      ]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  729.41893712  514.74819532 -252.78192178]
------
Step:22, Action:South
State  107
Old Q Values:  [-252.35169558  729.41893712  514.74819532 -252.78192178]
New Q values:  [-252.35169558  668.42468544  514.74819532 -252.78192178]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[1257.52370199    0.          526.18496922    0.        ]
------
Step:23, Action:North
State  187
Old Q Values:  [1257.52370199    0.          526.18496922    0.        ]
New Q values:  [702.93688643   0.         526.18496922   0.        ]
Reward: -1  Episode Reward:  37
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  668.42468544  514.74819532 -252.78192178]
------
Step:24, Action:South
State  107
Old Q Values:  [-252.35169558  668.42468544  514.74819532 -252.78192178]
New Q values:  [-252.35169558  789.63626836  514.74819532 -252.78192178]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 476.02888691    0.         1742.8879806  -178.98      ]
------
Step:25, Action:North
State  184
Old Q Values:  [9.67970369e+00 0.00000000e+00 1.40614268e+04 0.00000000e+00]
New Q values:  [  118.98855974     0.         14061.4268019      0.        ]
Reward: -1  Episode Reward:  35
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   385.72226087  -180.6       ]
------
Step:26, Action:East
State  107
Old Q Values:  [-252.35169558  789.63626836  514.74819532 -252.78192178]
New Q values:  [-252.35169558  789.63626836 1048.80215523 -252.78192178]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2811.67625702   657.72591961]
------
Step:27, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1464.94554802  1699.65393533]
New Q values:  [ -253.44886264 -1902.20915811  1725.09752278  1699.65393533]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.79906435e+03 -3.22965309e-01  1.41170298e+03]
------
Step:28, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.79906435e+03 -3.22965309e-01  1.41170298e+03]
New Q values:  [ 7.64171987e+01  3.79906435e+03 -3.22965309e-01  1.08161045e+03]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1725.09752278  1699.65393533]
------
Step:29, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2811.67625702   657.72591961]
New Q values:  [ -281.736      -1150.91067548  2263.78980638   657.72591961]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.79906435e+03 -3.22965309e-01  1.08161045e+03]
------
Step:30, Action:South
State  130
Old Q Values:  [46177.80406237 12060.06514295  -180.00807518 55192.36399163]
New Q values:  [46177.80406237 24312.8833164   -180.00807518 55192.36399163]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[19387.90563015 64964.85753075  1542.2004081   2599.28130597]
------
Step:31, Action:South
State  216
Old Q Values:  [ 3390.85238052  3196.52546203 -8896.20691497  2138.85546557]
New Q values:  [ 3390.85238052  7994.94142245 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[22389.77079212 -6442.16912869 -8192.20126966  5142.68371342]
------
Step:32, Action:North
State  288
Old Q Values:  [22389.77079212 -6442.16912869 -8192.20126966  5142.68371342]
New Q values:  [11353.79074358 -6442.16912869 -8192.20126966  5142.68371342]
Reward: -1  Episode Reward:  28
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3390.85238052  7994.94142245 -8896.20691497  2138.85546557]
------
Step:33, Action:South
State  216
Old Q Values:  [ 3390.85238052  7994.94142245 -8896.20691497  2138.85546557]
New Q values:  [ 3390.85238052  6603.51379205 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11353.79074358 -6442.16912869 -8192.20126966  5142.68371342]
------
Step:34, Action:North
State  288
Old Q Values:  [11353.79074358 -6442.16912869 -8192.20126966  5142.68371342]
New Q values:  [ 6521.97043505 -6442.16912869 -8192.20126966  5142.68371342]
Reward: -1  Episode Reward:  26
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3390.85238052  6603.51379205 -8896.20691497  2138.85546557]
------
Step:35, Action:South
State  216
Old Q Values:  [ 3390.85238052  6603.51379205 -8896.20691497  2138.85546557]
New Q values:  [ 3390.85238052  4597.39664734 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  25
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6521.97043505 -6442.16912869 -8192.20126966  5142.68371342]
------
Step:36, Action:North
State  288
Old Q Values:  [ 6521.97043505 -6442.16912869 -8192.20126966  5142.68371342]
New Q values:  [ 3987.40716822 -6442.16912869 -8192.20126966  5142.68371342]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3390.85238052  4597.39664734 -8896.20691497  2138.85546557]
------
Step:37, Action:South
State  216
Old Q Values:  [ 3390.85238052  4597.39664734 -8896.20691497  2138.85546557]
New Q values:  [ 3390.85238052  3381.16377296 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  23
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3987.40716822 -6442.16912869 -8192.20126966  5142.68371342]
------
Step:38, Action:West
State  288
Old Q Values:  [ 3987.40716822 -6442.16912869 -8192.20126966  5142.68371342]
New Q values:  [ 3987.40716822 -6442.16912869 -8192.20126966  4958.09111648]
Reward: -1  Episode Reward:  22
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 9672.05877038]
------
Step:39, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4750.11987354  3170.86986755]
New Q values:  [-2527.46239811 -8521.23367799  4750.11987354 69423.66086513]
Reward: 100009  Episode Reward:  100031
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1033.64634791   -8.57207238 -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869 1820.34889065  718.60978155 -120.29354603]
New Q values:  [-177.44732869 2985.3927824   718.60978155 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 590.99253037 7506.17742048 3288.1478761   -30.99112081]
------
Step:2, Action:South
State  183
Old Q Values:  [ 877.23516594 1244.39473591 3697.15306382    0.        ]
New Q values:  [ 877.23516594 2174.52029926 3697.15306382    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x . x
x ..x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1648.18979429   26.73544252 5571.20801631  -35.88578819]
------
Step:3, Action:East
State  261
Old Q Values:  [1648.18979429   26.73544252 5571.20801631  -35.88578819]
New Q values:  [ 1648.18979429    26.73544252 17060.98146606   -35.88578819]
Reward: -9991  Episode Reward:  -9973
xxxxx
x . x
x ..x
x g.x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3987.40716822 -6442.16912869 -8192.20126966  4958.09111648]
------
Step:1, Action:West
State  288
Old Q Values:  [ 3987.40716822 -6442.16912869 -8192.20126966  4958.09111648]
New Q values:  [ 3987.40716822 -6442.16912869 -8192.20126966 22815.73470613]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4750.11987354 69423.66086513]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4750.11987354 69423.66086513]
New Q values:  [-2527.46239811 -8521.23367799  4750.11987354 29971.10949398]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1649.32968665 -2735.46306511  7320.81715975 -2601.74710518]
------
Step:3, Action:East
State  257
Old Q Values:  [27166.3763937   2256.66526474  3195.57842484  1875.31501677]
New Q values:  [27166.3763937   2256.66526474 10268.96421813  1875.31501677]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
x.g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4750.11987354 29971.10949398]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4750.11987354 29971.10949398]
New Q values:  [-2527.46239811 -8521.23367799  4750.11987354 17106.13823741]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1648.18979429    26.73544252 17060.98146606   -35.88578819]
------
Step:5, Action:East
State  260
Old Q Values:  [ 1649.32968665 -2735.46306511  7320.81715975 -2601.74710518]
New Q values:  [ 1649.32968665 -2735.46306511  8059.56833512 -2601.74710518]
Reward: -1  Episode Reward:  15
xxxxx
xg .x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4750.11987354 17106.13823741]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4750.11987354 17106.13823741]
New Q values:  [-2527.46239811 -8521.23367799  4750.11987354  9259.7257955 ]
Reward: -1  Episode Reward:  14
xxxxx
x. .x
xg..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1649.32968665 -2735.46306511  8059.56833512 -2601.74710518]
------
Step:7, Action:East
State  260
Old Q Values:  [ 1649.32968665 -2735.46306511  8059.56833512 -2601.74710518]
New Q values:  [ 1649.32968665 -2735.46306511  6001.1450727  -2601.74710518]
Reward: -1  Episode Reward:  13
xxxxx
x. .x
x...x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4750.11987354  9259.7257955 ]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4750.11987354  9259.7257955 ]
New Q values:  [-2527.46239811 -8521.23367799  8744.16836126  9259.7257955 ]
Reward: -1  Episode Reward:  12
xxxxx
x. .x
x...x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3987.40716822 -6442.16912869 -8192.20126966 22815.73470613]
------
Step:9, Action:West
State  288
Old Q Values:  [ 3987.40716822 -6442.16912869 -8192.20126966 22815.73470613]
New Q values:  [ 3987.40716822 -6442.16912869 -8192.20126966  5903.6116211 ]
Reward: -10001  Episode Reward:  -9989
xxxxx
x. .x
x...x
x g x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5533.66930516  4982.20195654  1099.96026581]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831  5533.66930516  4982.20195654  1099.96026581]
New Q values:  [-5922.26708831  5120.48535318  4982.20195654  1099.96026581]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x. gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 9672.05877038]
------
Step:2, Action:West
State  273
Old Q Values:  [ 870.35122762 -168.92307549 4644.37724943 9672.05877038]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 12024.13642626]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x. .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[27166.3763937   2256.66526474 10268.96421813  1875.31501677]
------
Step:3, Action:North
State  261
Old Q Values:  [ 1648.18979429    26.73544252 17060.98146606   -35.88578819]
New Q values:  [ 1773.82183686    26.73544252 17060.98146606   -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 2174.52029926 3697.15306382    0.        ]
------
Step:4, Action:East
State  181
Old Q Values:  [ 590.99253037 7506.17742048 3288.1478761   -30.99112081]
New Q values:  [ 590.99253037 7506.17742048  -71.77924246  -30.99112081]
Reward: -10001  Episode Reward:  -9974
xxxxx
x.. x
x g.x
x  .x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  7.91931747e+03  1.20371620e+03]
------
Step:1, Action:East
State  195
Old Q Values:  [   38.85388605  7251.41441546 22709.33074662  1101.59744825]
New Q values:  [   38.85388605  7251.41441546 28578.58955787  1101.59744825]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[19387.90563015 64964.85753075  1542.2004081   2599.28130597]
------
Step:2, Action:North
State  210
Old Q Values:  [19387.90563015 64964.85753075  1542.2004081   2599.28130597]
New Q values:  [ 8900.28155563 64964.85753075  1542.2004081   2599.28130597]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.79906435e+03 -3.22965309e-01  1.08161045e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.79906435e+03 -3.22965309e-01  1.08161045e+03]
New Q values:  [ 7.64171987e+01  1.02886906e+04 -3.22965309e-01  1.08161045e+03]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[29232.21605772 27846.07573785 -4228.04879148  9373.30884915]
------
Step:4, Action:North
State  208
Old Q Values:  [29232.21605772 27846.07573785 -4228.04879148  9373.30884915]
New Q values:  [14705.10148931 27846.07573785 -4228.04879148  9373.30884915]
Reward: -1  Episode Reward:  16
xxxxx
x.gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 10042.71688741 -2383.80019164   743.14426221]
------
Step:5, Action:South
State  136
Old Q Values:  [ -170.77177351 10042.71688741 -2383.80019164   743.14426221]
New Q values:  [ -170.77177351 12370.30947632 -2383.80019164   743.14426221]
Reward: -1  Episode Reward:  15
xxxxx
xg. x
x. ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14705.10148931 27846.07573785 -4228.04879148  9373.30884915]
------
Step:6, Action:South
State  208
Old Q Values:  [14705.10148931 27846.07573785 -4228.04879148  9373.30884915]
New Q values:  [14705.10148931 12914.91378147 -4228.04879148  9373.30884915]
Reward: 9  Episode Reward:  24
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3987.40716822 -6442.16912869 -8192.20126966  5903.6116211 ]
------
Step:7, Action:West
State  288
Old Q Values:  [ 3987.40716822 -6442.16912869 -8192.20126966  5903.6116211 ]
New Q values:  [ 3987.40716822 -6442.16912869 -8192.20126966  5968.08557632]
Reward: -1  Episode Reward:  23
xxxxx
x..gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4644.37724943 12024.13642626]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8744.16836126  9259.7257955 ]
New Q values:  [-2527.46239811 -8521.23367799  8744.16836126 11859.20323631]
Reward: 9  Episode Reward:  32
xxxxx
x.g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[27166.3763937   2256.66526474 10268.96421813  1875.31501677]
------
Step:9, Action:North
State  257
Old Q Values:  [27166.3763937   2256.66526474 10268.96421813  1875.31501677]
New Q values:  [29898.04463901  2256.66526474 10268.96421813  1875.31501677]
Reward: 9  Episode Reward:  41
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33188.12024162 24935.33673146 63420.31360511     0.        ]
------
Step:10, Action:North
State  177
Old Q Values:  [33188.12024162 24935.33673146 63420.31360511     0.        ]
New Q values:  [24286.95305781 24935.33673146 63420.31360511     0.        ]
Reward: 9  Episode Reward:  50
xxxxx
xag x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:NE
[    0.         36687.68320385     0.             0.        ]
------
Step:11, Action:South
State  100
Old Q Values:  [ 0.0000000e+00  1.1047793e+03 -6.0000000e-01  0.0000000e+00]
New Q values:  [ 0.00000000e+00  1.69785211e+03 -6.00000000e-01  0.00000000e+00]
Reward: -1  Episode Reward:  49
xxxxx
xg. x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -746.35376992  3817.61438654  4188.46795952 -4966.32149798]
------
Step:12, Action:East
State  180
Old Q Values:  [ -746.35376992  3817.61438654  4188.46795952 -4966.32149798]
New Q values:  [ -746.35376992  3817.61438654  6288.34879091 -4966.32149798]
Reward: -1  Episode Reward:  48
xxxxx
x . x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.53785387e+04 1.30647391e+04 2.91043938e+03]
------
Step:13, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.53785387e+04 1.30647391e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 9.70857645e+03 1.30647391e+04 2.91043938e+03]
Reward: -1  Episode Reward:  47
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8744.16836126 11859.20323631]
------
Step:14, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8744.16836126 11859.20323631]
New Q values:  [-2527.46239811 -8521.23367799  8744.16836126  9861.37573434]
Reward: -1  Episode Reward:  46
xxxxx
x . x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1773.82183686    26.73544252 17060.98146606   -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [ 1773.82183686    26.73544252 17060.98146606   -35.88578819]
New Q values:  [ 2960.78196089    26.73544252 17060.98146606   -35.88578819]
Reward: -1  Episode Reward:  45
xxxxx
x . x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 590.99253037 7506.17742048  -71.77924246  -30.99112081]
------
Step:16, Action:South
State  181
Old Q Values:  [ 590.99253037 7506.17742048  -71.77924246  -30.99112081]
New Q values:  [ 590.99253037 8120.16540801  -71.77924246  -30.99112081]
Reward: -1  Episode Reward:  44
xxxxx
x . x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2960.78196089    26.73544252 17060.98146606   -35.88578819]
------
Step:17, Action:East
State  261
Old Q Values:  [ 2960.78196089    26.73544252 17060.98146606   -35.88578819]
New Q values:  [2960.78196089   26.73544252 9782.20530673  -35.88578819]
Reward: -1  Episode Reward:  43
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8744.16836126  9861.37573434]
------
Step:18, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8744.16836126  9861.37573434]
New Q values:  [-2527.46239811 -8521.23367799  8744.16836126 12913.36368544]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[29898.04463901  2256.66526474 10268.96421813  1875.31501677]
------
Step:19, Action:North
State  260
Old Q Values:  [ 1649.32968665 -2735.46306511  6001.1450727  -2601.74710518]
New Q values:  [ 2545.63651193 -2735.46306511  6001.1450727  -2601.74710518]
Reward: -1  Episode Reward:  41
xxxxx
xg. x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -746.35376992  3817.61438654  6288.34879091 -4966.32149798]
------
Step:20, Action:East
State  180
Old Q Values:  [ -746.35376992  3817.61438654  6288.34879091 -4966.32149798]
New Q values:  [ -746.35376992  3817.61438654  6434.16123939 -4966.32149798]
Reward: -1  Episode Reward:  40
xxxxx
xg. x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.70857645e+03 1.30647391e+04 2.91043938e+03]
------
Step:21, Action:East
State  192
Old Q Values:  [3.89777037e-01 9.70857645e+03 1.30647391e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 9.70857645e+03 9.63682608e+03 2.91043938e+03]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14705.10148931 12914.91378147 -4228.04879148  9373.30884915]
------
Step:22, Action:North
State  208
Old Q Values:  [14705.10148931 12914.91378147 -4228.04879148  9373.30884915]
New Q values:  [22439.14979321 12914.91378147 -4228.04879148  9373.30884915]
Reward: -1  Episode Reward:  38
xxxxx
x .ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 24312.8833164   -180.00807518 55192.36399163]
------
Step:23, Action:West
State  130
Old Q Values:  [46177.80406237 24312.8833164   -180.00807518 55192.36399163]
New Q values:  [ 46177.80406237  24312.8833164    -180.00807518 114734.41383824]
Reward: 100009  Episode Reward:  100047
xxxxx
x a x
x   x
x g x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3482.40377611     0.        ]
------
Step:1, Action:East
State  180
Old Q Values:  [ -746.35376992  3817.61438654  6434.16123939 -4966.32149798]
New Q values:  [ -746.35376992  3817.61438654  5491.63742986 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.70857645e+03 9.63682608e+03 2.91043938e+03]
------
Step:2, Action:South
State  192
Old Q Values:  [3.89777037e-01 9.70857645e+03 9.63682608e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 5.85255212e+03 9.63682608e+03 2.91043938e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  6545.73848034]
------
Step:3, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  6545.73848034]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  5552.35698416]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2960.78196089   26.73544252 9782.20530673  -35.88578819]
------
Step:4, Action:North
State  260
Old Q Values:  [ 2545.63651193 -2735.46306511  6001.1450727  -2601.74710518]
New Q values:  [ 2062.37573761 -2735.46306511  6001.1450727  -2601.74710518]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xa .x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3482.40377611     0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [ 877.23516594 2174.52029926 3697.15306382    0.        ]
New Q values:  [ 877.23516594 2174.52029926 3854.05646585    0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  7.91931747e+03  1.20371620e+03]
------
Step:6, Action:East
State  195
Old Q Values:  [   38.85388605  7251.41441546 28578.58955787  1101.59744825]
New Q values:  [   38.85388605  7251.41441546 30926.29308237  1101.59744825]
Reward: 9  Episode Reward:  24
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 8900.28155563 64964.85753075  1542.2004081   2599.28130597]
------
Step:7, Action:North
State  210
Old Q Values:  [ 8900.28155563 64964.85753075  1542.2004081   2599.28130597]
New Q values:  [37985.83677372 64964.85753075  1542.2004081   2599.28130597]
Reward: 9  Episode Reward:  33
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 46177.80406237  24312.8833164    -180.00807518 114734.41383824]
------
Step:8, Action:West
State  130
Old Q Values:  [ 46177.80406237  24312.8833164    -180.00807518 114734.41383824]
New Q values:  [46177.80406237 24312.8833164   -180.00807518 78551.23377689]
Reward: 9  Episode Reward:  42
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 108840.22747196]
------
Step:9, Action:West
State  126
Old Q Values:  [   0.          331.64678262 2228.77882944 1135.60272273]
New Q values:  [   0.          331.64678262 2228.77882944  602.28778349]
Reward: 9  Episode Reward:  51
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  475.48898133  365.96192905 -180.6       ]
------
Step:10, Action:East
State  108
Old Q Values:  [-8463.16477134  2529.34094544   492.53480369     0.        ]
New Q values:  [-8463.16477134  2529.34094544   633.13598933     0.        ]
Reward: -1  Episode Reward:  50
xxxxx
xga x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 1455.74022619  963.6944397 ]
------
Step:11, Action:East
State  124
Old Q Values:  [   0.         1166.51141701 1455.74022619  963.6944397 ]
New Q values:  [   0.         1166.51141701 4292.78893337  963.6944397 ]
Reward: -1  Episode Reward:  49
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 12370.30947632 -2383.80019164   743.14426221]
------
Step:12, Action:South
State  136
Old Q Values:  [ -170.77177351 12370.30947632 -2383.80019164   743.14426221]
New Q values:  [ -170.77177351  5964.77950468 -2383.80019164   743.14426221]
Reward: -1  Episode Reward:  48
xxxxx
x  gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3390.85238052  3381.16377296 -8896.20691497  2138.85546557]
------
Step:13, Action:South
State  208
Old Q Values:  [22439.14979321 12914.91378147 -4228.04879148  9373.30884915]
New Q values:  [22439.14979321 66961.79118548 -4228.04879148  9373.30884915]
Reward: 100009  Episode Reward:  100057
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
Step:1, Action:East
State  208
Old Q Values:  [22439.14979321 66961.79118548 -4228.04879148  9373.30884915]
New Q values:  [22439.14979321 66961.79118548 18216.71783905  9373.30884915]
Reward: -301  Episode Reward:  -301
xxxxx
x.g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22439.14979321 66961.79118548 18216.71783905  9373.30884915]
------
Step:2, Action:South
State  208
Old Q Values:  [22439.14979321 66961.79118548 18216.71783905  9373.30884915]
New Q values:  [22439.14979321 28580.54214709 18216.71783905  9373.30884915]
Reward: 9  Episode Reward:  -292
xxxxx
x.. x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3987.40716822 -6442.16912869 -8192.20126966  5968.08557632]
------
Step:3, Action:West
State  288
Old Q Values:  [ 3987.40716822 -6442.16912869 -8192.20126966  5968.08557632]
New Q values:  [ 3987.40716822 -6442.16912869 -8192.20126966  6266.64333616]
Reward: 9  Episode Reward:  -283
xxxxx
x.. x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8744.16836126 12913.36368544]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8744.16836126 12913.36368544]
New Q values:  [-2527.46239811 -8521.23367799  8744.16836126  9324.31731417]
Reward: -9991  Episode Reward:  -10274
xxxxx
x.. x
x.. x
xg  x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8744.16836126  9324.31731417]
------
Step:1, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  5552.35698416]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  5161.00438568]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2960.78196089   26.73544252 9782.20530673  -35.88578819]
------
Step:2, Action:East
State  261
Old Q Values:  [2960.78196089   26.73544252 9782.20530673  -35.88578819]
New Q values:  [2960.78196089   26.73544252 7519.52305057  -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4644.37724943 12024.13642626]
------
Step:3, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 12024.13642626]
New Q values:  [ 870.35122762 -168.92307549 4644.37724943 7064.91148568]
Reward: -1  Episode Reward:  7
xxxxx
x. gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2960.78196089   26.73544252 7519.52305057  -35.88578819]
------
Step:4, Action:East
State  261
Old Q Values:  [2960.78196089   26.73544252 7519.52305057  -35.88578819]
New Q values:  [2960.78196089   26.73544252 5126.68266593  -35.88578819]
Reward: -1  Episode Reward:  6
xxxxx
x. .x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 7064.91148568]
------
Step:5, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  5161.00438568]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  3601.80655405]
Reward: -1  Episode Reward:  5
xxxxx
x. .x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2960.78196089   26.73544252 5126.68266593  -35.88578819]
------
Step:6, Action:East
State  261
Old Q Values:  [2960.78196089   26.73544252 5126.68266593  -35.88578819]
New Q values:  [ 2960.78196089    26.73544252 -1152.63173938   -35.88578819]
Reward: -10001  Episode Reward:  -9996
xxxxx
x. .x
x...x
x g.x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 590.99253037 8120.16540801  -71.77924246  -30.99112081]
------
Step:1, Action:South
State  181
Old Q Values:  [ 590.99253037 8120.16540801  -71.77924246  -30.99112081]
New Q values:  [ 590.99253037 4141.70075147  -71.77924246  -30.99112081]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2960.78196089    26.73544252 -1152.63173938   -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [ 2960.78196089    26.73544252 -1152.63173938   -35.88578819]
New Q values:  [ 2426.2230098     26.73544252 -1152.63173938   -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 590.99253037 4141.70075147  -71.77924246  -30.99112081]
------
Step:3, Action:South
State  183
Old Q Values:  [ 877.23516594 2174.52029926 3854.05646585    0.        ]
New Q values:  [ 877.23516594 1597.07502264 3854.05646585    0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2426.2230098     26.73544252 -1152.63173938   -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [ 2426.2230098     26.73544252 -1152.63173938   -35.88578819]
New Q values:  [ 2212.39942936    26.73544252 -1152.63173938   -35.88578819]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 590.99253037 4141.70075147  -71.77924246  -30.99112081]
------
Step:5, Action:South
State  181
Old Q Values:  [ 590.99253037 4141.70075147  -71.77924246  -30.99112081]
New Q values:  [ 590.99253037 2319.8001294   -71.77924246  -30.99112081]
Reward: -1  Episode Reward:  5
xxxxx
x.g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2212.39942936    26.73544252 -1152.63173938   -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [ 2212.39942936    26.73544252 -1152.63173938   -35.88578819]
New Q values:  [ 1580.29981056    26.73544252 -1152.63173938   -35.88578819]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 590.99253037 2319.8001294   -71.77924246  -30.99112081]
------
Step:7, Action:South
State  181
Old Q Values:  [ 590.99253037 2319.8001294   -71.77924246  -30.99112081]
New Q values:  [ 590.99253037 1401.40999493  -71.77924246  -30.99112081]
Reward: -1  Episode Reward:  3
xxxxx
x.g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1580.29981056    26.73544252 -1152.63173938   -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [ 1580.29981056    26.73544252 -1152.63173938   -35.88578819]
New Q values:  [ 1051.9429227     26.73544252 -1152.63173938   -35.88578819]
Reward: -1  Episode Reward:  2
xxxxx
x..gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 590.99253037 1401.40999493  -71.77924246  -30.99112081]
------
Step:9, Action:South
State  181
Old Q Values:  [ 590.99253037 1401.40999493  -71.77924246  -30.99112081]
New Q values:  [590.99253037 875.54687478 -71.77924246 -30.99112081]
Reward: -1  Episode Reward:  1
xxxxx
x.. x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1051.9429227     26.73544252 -1152.63173938   -35.88578819]
------
Step:10, Action:North
State  261
Old Q Values:  [ 1051.9429227     26.73544252 -1152.63173938   -35.88578819]
New Q values:  [  682.84123152    26.73544252 -1152.63173938   -35.88578819]
Reward: -1  Episode Reward:  0
xxxxx
x..gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[590.99253037 875.54687478 -71.77924246 -30.99112081]
------
Step:11, Action:South
State  181
Old Q Values:  [590.99253037 875.54687478 -71.77924246 -30.99112081]
New Q values:  [590.99253037 554.47111937 -71.77924246 -30.99112081]
Reward: -1  Episode Reward:  -1
xxxxx
x.g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  682.84123152    26.73544252 -1152.63173938   -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [  682.84123152    26.73544252 -1152.63173938   -35.88578819]
New Q values:  [  449.83425172    26.73544252 -1152.63173938   -35.88578819]
Reward: -1  Episode Reward:  -2
xxxxx
x..gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[590.99253037 554.47111937 -71.77924246 -30.99112081]
------
Step:13, Action:North
State  181
Old Q Values:  [590.99253037 554.47111937 -71.77924246 -30.99112081]
New Q values:  [551.89091652 554.47111937 -71.77924246 -30.99112081]
Reward: 9  Episode Reward:  7
xxxxx
xag x
x ..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1033.64634791   -8.57207238 -180.6       ]
------
Step:14, Action:South
State  109
Old Q Values:  [-241.10880094 1033.64634791   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  579.19987497   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
x .gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[551.89091652 554.47111937 -71.77924246 -30.99112081]
------
Step:15, Action:South
State  181
Old Q Values:  [551.89091652 554.47111937 -71.77924246 -30.99112081]
New Q values:  [551.89091652 356.13872326 -71.77924246 -30.99112081]
Reward: -1  Episode Reward:  5
xxxxx
x . x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  449.83425172    26.73544252 -1152.63173938   -35.88578819]
------
Step:16, Action:North
State  261
Old Q Values:  [  449.83425172    26.73544252 -1152.63173938   -35.88578819]
New Q values:  [ 1335.55064044    26.73544252 -1152.63173938   -35.88578819]
Reward: -1  Episode Reward:  4
xxxxx
x . x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1597.07502264 3854.05646585    0.        ]
------
Step:17, Action:East
State  183
Old Q Values:  [ 877.23516594 1597.07502264 3854.05646585    0.        ]
New Q values:  [  877.23516594  1597.07502264 10824.91051105     0.        ]
Reward: 9  Episode Reward:  13
xxxxx
x . x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  7251.41441546 30926.29308237  1101.59744825]
------
Step:18, Action:East
State  193
Old Q Values:  [-5922.26708831  5120.48535318  4982.20195654  1099.96026581]
New Q values:  [-5922.26708831  5120.48535318 -2984.46350323  1099.96026581]
Reward: -9991  Episode Reward:  -9978
xxxxx
x . x
x  gx
x ..x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[37985.83677372 64964.85753075  1542.2004081   2599.28130597]
------
Step:1, Action:North
State  208
Old Q Values:  [22439.14979321 28580.54214709 18216.71783905  9373.30884915]
New Q values:  [12067.66708391 28580.54214709 18216.71783905  9373.30884915]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.02886906e+04 -3.22965309e-01  1.08161045e+03]
------
Step:2, Action:West
State  136
Old Q Values:  [ -170.77177351  5964.77950468 -2383.80019164   743.14426221]
New Q values:  [ -170.77177351  5964.77950468 -2383.80019164   372.08852401]
Reward: 9  Episode Reward:  18
xxxxx
x.agx
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   231.43606375]
------
Step:3, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1571.84492898   590.4934664 ]
New Q values:  [-9594.56523706 -8069.05606225  1571.84492898   729.5437439 ]
Reward: 9  Episode Reward:  27
xxxxx
xag x
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        1626.4878578    65.14560537    0.        ]
------
Step:4, Action:South
State  109
Old Q Values:  [-241.10880094  579.19987497   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  402.64722495   -8.57207238 -180.6       ]
Reward: 9  Episode Reward:  36
xxxxx
x  gx
xa. x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[551.89091652 356.13872326 -71.77924246 -30.99112081]
------
Step:5, Action:North
State  181
Old Q Values:  [551.89091652 356.13872326 -71.77924246 -30.99112081]
New Q values:  [1115.77420133  356.13872326  -71.77924246  -30.99112081]
Reward: -1  Episode Reward:  35
xxxxx
xa  x
x .gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2985.3927824   718.60978155 -120.29354603]
------
Step:6, Action:South
State  109
Old Q Values:  [-241.10880094  402.64722495   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  495.19115038   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
xa. x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1115.77420133  356.13872326  -71.77924246  -30.99112081]
------
Step:7, Action:North
State  181
Old Q Values:  [1115.77420133  356.13872326  -71.77924246  -30.99112081]
New Q values:  [594.26702564 356.13872326 -71.77924246 -30.99112081]
Reward: -1  Episode Reward:  33
xxxxx
xag x
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  495.19115038   -8.57207238 -180.6       ]
------
Step:8, Action:South
State  108
Old Q Values:  [-8463.16477134  2529.34094544   633.13598933     0.        ]
New Q values:  [-8463.16477134  2658.62760714   633.13598933     0.        ]
Reward: -1  Episode Reward:  32
xxxxx
xg  x
xa. x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -746.35376992  3817.61438654  5491.63742986 -4966.32149798]
------
Step:9, Action:East
State  185
Old Q Values:  [ 476.02888691    0.         1742.8879806  -178.98      ]
New Q values:  [ 476.02888691    0.         1322.13341882 -178.98      ]
Reward: 9  Episode Reward:  41
xxxxx
x g x
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  2065.26075526 2028.10103772 1141.49622464]
------
Step:10, Action:South
State  200
Old Q Values:  [  62.8218634  2065.26075526 2028.10103772 1141.49622464]
New Q values:  [  62.8218634  3628.79949635 2028.10103772 1141.49622464]
Reward: 9  Episode Reward:  50
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8744.16836126  9324.31731417]
------
Step:11, Action:West
State  273
Old Q Values:  [ 870.35122762 -168.92307549 4644.37724943 7064.91148568]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 71800.77798597]
Reward: 100009  Episode Reward:  100059
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.85255212e+03 9.63682608e+03 2.91043938e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 5.85255212e+03 9.63682608e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 5.85255212e+03 1.24342931e+04 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12067.66708391 28580.54214709 18216.71783905  9373.30884915]
------
Step:2, Action:South
State  208
Old Q Values:  [12067.66708391 28580.54214709 18216.71783905  9373.30884915]
New Q values:  [12067.66708391 13317.60985968 18216.71783905  9373.30884915]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3987.40716822 -6442.16912869 -8192.20126966  6266.64333616]
------
Step:3, Action:West
State  288
Old Q Values:  [ 3987.40716822 -6442.16912869 -8192.20126966  6266.64333616]
New Q values:  [ 3987.40716822 -6442.16912869 -8192.20126966  5309.35252871]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8744.16836126  9324.31731417]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8744.16836126  9324.31731417]
New Q values:  [-2527.46239811 -8521.23367799  8744.16836126  4135.7921178 ]
Reward: 9  Episode Reward:  36
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1335.55064044    26.73544252 -1152.63173938   -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [ 1335.55064044    26.73544252 -1152.63173938   -35.88578819]
New Q values:  [ 3781.09340949    26.73544252 -1152.63173938   -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  877.23516594  1597.07502264 10824.91051105     0.        ]
------
Step:6, Action:East
State  183
Old Q Values:  [  877.23516594  1597.07502264 10824.91051105     0.        ]
New Q values:  [ 877.23516594 1597.07502264 6705.15944474    0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  7.91931747e+03  1.20371620e+03]
------
Step:7, Action:East
State  195
Old Q Values:  [   38.85388605  7251.41441546 30926.29308237  1101.59744825]
New Q values:  [   38.85388605  7251.41441546 31859.37449217  1101.59744825]
Reward: -1  Episode Reward:  33
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[37985.83677372 64964.85753075  1542.2004081   2599.28130597]
------
Step:8, Action:North
State  208
Old Q Values:  [12067.66708391 13317.60985968 18216.71783905  9373.30884915]
New Q values:  [28397.83696663 13317.60985968 18216.71783905  9373.30884915]
Reward: 9  Episode Reward:  42
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 24312.8833164   -180.00807518 78551.23377689]
------
Step:9, Action:West
State  128
Old Q Values:  [11374.93691792 22352.84815254 -8652.84       57799.13855126]
New Q values:  [11374.93691792 22352.84815254 -8652.84       44428.56690885]
Reward: 9  Episode Reward:  51
xxxxx
x.agx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.          2847.11106414 -5999.38454759 71011.70496116]
------
Step:10, Action:West
State  115
Old Q Values:  [-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.32773276e+05]
New Q values:  [-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.32824849e+05]
Reward: 100009  Episode Reward:  100060
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5964.77950468 -2383.80019164   372.08852401]
------
Step:1, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.02886906e+04 -3.22965309e-01  1.08161045e+03]
New Q values:  [ 7.64171987e+01  5.13813194e+03 -3.22965309e-01  1.08161045e+03]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3390.85238052  3381.16377296 -8896.20691497  2138.85546557]
------
Step:2, Action:North
State  216
Old Q Values:  [ 3390.85238052  3381.16377296 -8896.20691497  2138.85546557]
New Q values:  [ 2897.1805331   3381.16377296 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  8
xxxxx
x .ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.13813194e+03 -3.22965309e-01  1.08161045e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  5.13813194e+03 -3.22965309e-01  1.08161045e+03]
New Q values:  [ 7.64171987e+01  2.15441100e+04 -3.22965309e-01  1.08161045e+03]
Reward: -1  Episode Reward:  7
xxxxx
x . x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[37985.83677372 64964.85753075  1542.2004081   2599.28130597]
------
Step:4, Action:South
State  210
Old Q Values:  [37985.83677372 64964.85753075  1542.2004081   2599.28130597]
New Q values:  [37985.83677372 27584.14877091  1542.2004081   2599.28130597]
Reward: 9  Episode Reward:  16
xxxxx
x . x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3987.40716822 -6442.16912869 -8192.20126966  5309.35252871]
------
Step:5, Action:West
State  288
Old Q Values:  [ 3987.40716822 -6442.16912869 -8192.20126966  5309.35252871]
New Q values:  [ 3987.40716822 -6442.16912869 -8192.20126966  4752.39151986]
Reward: 9  Episode Reward:  25
xxxxx
x . x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8744.16836126  4135.7921178 ]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8744.16836126  4135.7921178 ]
New Q values:  [-2527.46239811 -8521.23367799  4922.78480046  4135.7921178 ]
Reward: -1  Episode Reward:  24
xxxxx
x . x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3987.40716822 -6442.16912869 -8192.20126966  4752.39151986]
------
Step:7, Action:West
State  288
Old Q Values:  [ 3987.40716822 -6442.16912869 -8192.20126966  4752.39151986]
New Q values:  [ 3987.40716822 -6442.16912869 -8192.20126966  3377.19204808]
Reward: -1  Episode Reward:  23
xxxxx
x . x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4922.78480046  4135.7921178 ]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4922.78480046  4135.7921178 ]
New Q values:  [-2527.46239811 -8521.23367799  3164.73607065  4135.7921178 ]
Reward: -1  Episode Reward:  22
xxxxx
x . x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3987.40716822 -6442.16912869 -8192.20126966  3377.19204808]
------
Step:9, Action:North
State  288
Old Q Values:  [ 3987.40716822 -6442.16912869 -8192.20126966  3377.19204808]
New Q values:  [12990.11389941 -6442.16912869 -8192.20126966  3377.19204808]
Reward: -1  Episode Reward:  21
xxxxx
x . x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[37985.83677372 27584.14877091  1542.2004081   2599.28130597]
------
Step:10, Action:North
State  208
Old Q Values:  [28397.83696663 13317.60985968 18216.71783905  9373.30884915]
New Q values:  [17821.76779678 13317.60985968 18216.71783905  9373.30884915]
Reward: -1  Episode Reward:  20
xxxxx
x .ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.15441100e+04 -3.22965309e-01  1.08161045e+03]
------
Step:11, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.15441100e+04 -3.22965309e-01  1.08161045e+03]
New Q values:  [ 7.64171987e+01  2.00127950e+04 -3.22965309e-01  1.08161045e+03]
Reward: -1  Episode Reward:  19
xxxxx
x . x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[37985.83677372 27584.14877091  1542.2004081   2599.28130597]
------
Step:12, Action:North
State  208
Old Q Values:  [17821.76779678 13317.60985968 18216.71783905  9373.30884915]
New Q values:  [13131.9456324  13317.60985968 18216.71783905  9373.30884915]
Reward: -1  Episode Reward:  18
xxxxx
x .ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.00127950e+04 -3.22965309e-01  1.08161045e+03]
------
Step:13, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.00127950e+04 -3.22965309e-01  1.08161045e+03]
New Q values:  [ 7.64171987e+01  1.94002691e+04 -3.22965309e-01  1.08161045e+03]
Reward: -1  Episode Reward:  17
xxxxx
x . x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[37985.83677372 27584.14877091  1542.2004081   2599.28130597]
------
Step:14, Action:North
State  210
Old Q Values:  [37985.83677372 27584.14877091  1542.2004081   2599.28130597]
New Q values:  [21013.8154246  27584.14877091  1542.2004081   2599.28130597]
Reward: -1  Episode Reward:  16
xxxxx
x .ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.94002691e+04 -3.22965309e-01  1.08161045e+03]
------
Step:15, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.94002691e+04 -3.22965309e-01  1.08161045e+03]
New Q values:  [ 7.64171987e+01  1.60347523e+04 -3.22965309e-01  1.08161045e+03]
Reward: -1  Episode Reward:  15
xxxxx
x . x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21013.8154246  27584.14877091  1542.2004081   2599.28130597]
------
Step:16, Action:North
State  210
Old Q Values:  [21013.8154246  27584.14877091  1542.2004081   2599.28130597]
New Q values:  [13215.35184527 27584.14877091  1542.2004081   2599.28130597]
Reward: -1  Episode Reward:  14
xxxxx
x .ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.60347523e+04 -3.22965309e-01  1.08161045e+03]
------
Step:17, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.60347523e+04 -3.22965309e-01  1.08161045e+03]
New Q values:  [ 7.64171987e+01  1.18783163e+04 -3.22965309e-01  1.08161045e+03]
Reward: -1  Episode Reward:  13
xxxxx
x . x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13131.9456324  13317.60985968 18216.71783905  9373.30884915]
------
Step:18, Action:East
State  210
Old Q Values:  [13215.35184527 27584.14877091  1542.2004081   2599.28130597]
New Q values:  [13215.35184527 27584.14877091  8711.52479451  2599.28130597]
Reward: -301  Episode Reward:  -288
xxxxx
x . x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13215.35184527 27584.14877091  8711.52479451  2599.28130597]
------
Step:19, Action:South
State  210
Old Q Values:  [13215.35184527 27584.14877091  8711.52479451  2599.28130597]
New Q values:  [13215.35184527 14930.09367819  8711.52479451  2599.28130597]
Reward: -1  Episode Reward:  -289
xxxxx
x . x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12990.11389941 -6442.16912869 -8192.20126966  3377.19204808]
------
Step:20, Action:North
State  288
Old Q Values:  [12990.11389941 -6442.16912869 -8192.20126966  3377.19204808]
New Q values:  [10660.46091148 -6442.16912869 -8192.20126966  3377.19204808]
Reward: -1  Episode Reward:  -290
xxxxx
x . x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13131.9456324  13317.60985968 18216.71783905  9373.30884915]
------
Step:21, Action:East
State  208
Old Q Values:  [13131.9456324  13317.60985968 18216.71783905  9373.30884915]
New Q values:  [13131.9456324  13317.60985968 12571.10248734  9373.30884915]
Reward: -301  Episode Reward:  -591
xxxxx
x . x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13131.9456324  13317.60985968 12571.10248734  9373.30884915]
------
Step:22, Action:South
State  208
Old Q Values:  [13131.9456324  13317.60985968 12571.10248734  9373.30884915]
New Q values:  [13131.9456324   8524.58221732 12571.10248734  9373.30884915]
Reward: -1  Episode Reward:  -592
xxxxx
x . x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10660.46091148 -6442.16912869 -8192.20126966  3377.19204808]
------
Step:23, Action:West
State  288
Old Q Values:  [10660.46091148 -6442.16912869 -8192.20126966  3377.19204808]
New Q values:  [10660.46091148 -6442.16912869 -8192.20126966  2591.01445457]
Reward: -1  Episode Reward:  -593
xxxxx
x . x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3164.73607065  4135.7921178 ]
------
Step:24, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3164.73607065  4135.7921178 ]
New Q values:  [-2527.46239811 -8521.23367799  3164.73607065  3460.06036893]
Reward: 9  Episode Reward:  -584
xxxxx
x . x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2062.37573761 -2735.46306511  6001.1450727  -2601.74710518]
------
Step:25, Action:East
State  260
Old Q Values:  [ 2062.37573761 -2735.46306511  6001.1450727  -2601.74710518]
New Q values:  [ 2062.37573761 -2735.46306511  3437.87613976 -2601.74710518]
Reward: -1  Episode Reward:  -585
xxxxx
x . x
x.. x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3164.73607065  3460.06036893]
------
Step:26, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3164.73607065  3460.06036893]
New Q values:  [-2527.46239811 -8521.23367799  4463.4327017   3460.06036893]
Reward: -1  Episode Reward:  -586
xxxxx
x . x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10660.46091148 -6442.16912869 -8192.20126966  2591.01445457]
------
Step:27, Action:North
State  288
Old Q Values:  [10660.46091148 -6442.16912869 -8192.20126966  2591.01445457]
New Q values:  [ 8742.61246805 -6442.16912869 -8192.20126966  2591.01445457]
Reward: -1  Episode Reward:  -587
xxxxx
x . x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13215.35184527 14930.09367819  8711.52479451  2599.28130597]
------
Step:28, Action:North
State  210
Old Q Values:  [13215.35184527 14930.09367819  8711.52479451  2599.28130597]
New Q values:  [ 8849.03561379 14930.09367819  8711.52479451  2599.28130597]
Reward: -1  Episode Reward:  -588
xxxxx
x .ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.18783163e+04 -3.22965309e-01  1.08161045e+03]
------
Step:29, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.18783163e+04 -3.22965309e-01  1.08161045e+03]
New Q values:  [ 7.64171987e+01  9.22975460e+03 -3.22965309e-01  1.08161045e+03]
Reward: -1  Episode Reward:  -589
xxxxx
x . x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 8849.03561379 14930.09367819  8711.52479451  2599.28130597]
------
Step:30, Action:North
State  210
Old Q Values:  [ 8849.03561379 14930.09367819  8711.52479451  2599.28130597]
New Q values:  [ 6307.94062683 14930.09367819  8711.52479451  2599.28130597]
Reward: -1  Episode Reward:  -590
xxxxx
x .ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  9.22975460e+03 -3.22965309e-01  1.08161045e+03]
------
Step:31, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  9.22975460e+03 -3.22965309e-01  1.08161045e+03]
New Q values:  [ 7.64171987e+01  8.17032995e+03 -3.22965309e-01  1.08161045e+03]
Reward: -1  Episode Reward:  -591
xxxxx
x . x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6307.94062683 14930.09367819  8711.52479451  2599.28130597]
------
Step:32, Action:South
State  210
Old Q Values:  [ 6307.94062683 14930.09367819  8711.52479451  2599.28130597]
New Q values:  [6307.94062683 8594.22121169 8711.52479451 2599.28130597]
Reward: -1  Episode Reward:  -592
xxxxx
x . x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8742.61246805 -6442.16912869 -8192.20126966  2591.01445457]
------
Step:33, Action:North
State  288
Old Q Values:  [ 8742.61246805 -6442.16912869 -8192.20126966  2591.01445457]
New Q values:  [ 6109.90242557 -6442.16912869 -8192.20126966  2591.01445457]
Reward: -1  Episode Reward:  -593
xxxxx
x . x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6307.94062683 8594.22121169 8711.52479451 2599.28130597]
------
Step:34, Action:East
State  210
Old Q Values:  [6307.94062683 8594.22121169 8711.52479451 2599.28130597]
New Q values:  [6307.94062683 8594.22121169 5917.46735616 2599.28130597]
Reward: -301  Episode Reward:  -894
xxxxx
x . x
x..ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6307.94062683 8594.22121169 5917.46735616 2599.28130597]
------
Step:35, Action:South
State  210
Old Q Values:  [6307.94062683 8594.22121169 5917.46735616 2599.28130597]
New Q values:  [6307.94062683 5270.05921235 5917.46735616 2599.28130597]
Reward: -1  Episode Reward:  -895
xxxxx
x . x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6109.90242557 -6442.16912869 -8192.20126966  2591.01445457]
------
Step:36, Action:North
State  288
Old Q Values:  [ 6109.90242557 -6442.16912869 -8192.20126966  2591.01445457]
New Q values:  [ 6382.94465995 -6442.16912869 -8192.20126966  2591.01445457]
Reward: -1  Episode Reward:  -896
xxxxx
x . x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13131.9456324   8524.58221732 12571.10248734  9373.30884915]
------
Step:37, Action:North
State  210
Old Q Values:  [6307.94062683 5270.05921235 5917.46735616 2599.28130597]
New Q values:  [4973.67523429 5270.05921235 5917.46735616 2599.28130597]
Reward: -1  Episode Reward:  -897
xxxxx
x .ax
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  8.17032995e+03 -3.22965309e-01  1.08161045e+03]
------
Step:38, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  8.17032995e+03 -3.22965309e-01  1.08161045e+03]
New Q values:  [ 7.64171987e+01  5.04277218e+03 -3.22965309e-01  1.08161045e+03]
Reward: -1  Episode Reward:  -898
xxxxx
x . x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4973.67523429 5270.05921235 5917.46735616 2599.28130597]
------
Step:39, Action:East
State  210
Old Q Values:  [4973.67523429 5270.05921235 5917.46735616 2599.28130597]
New Q values:  [4973.67523429 5270.05921235 3961.62714931 2599.28130597]
Reward: -301  Episode Reward:  -1199
xxxxx
x . x
x..ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4973.67523429 5270.05921235 3961.62714931 2599.28130597]
------
Step:40, Action:South
State  210
Old Q Values:  [4973.67523429 5270.05921235 3961.62714931 2599.28130597]
New Q values:  [ 4973.67523429 -1977.69291708  3961.62714931  2599.28130597]
Reward: -10001  Episode Reward:  -11200
xxxxx
x . x
x.. x
x  gx
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -746.35376992  3817.61438654  5491.63742986 -4966.32149798]
------
Step:1, Action:East
State  180
Old Q Values:  [ -746.35376992  3817.61438654  5491.63742986 -4966.32149798]
New Q values:  [ -746.35376992  3817.61438654  3055.60135842 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1090.23428232  2845.15462157   239.04887894]
------
Step:2, Action:East
State  200
Old Q Values:  [  62.8218634  3628.79949635 2028.10103772 1141.49622464]
New Q values:  [  62.8218634  3628.79949635 1830.98954698 1141.49622464]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2897.1805331   3381.16377296 -8896.20691497  2138.85546557]
------
Step:3, Action:South
State  216
Old Q Values:  [ 2897.1805331   3381.16377296 -8896.20691497  2138.85546557]
New Q values:  [ 2897.1805331   3272.74890717 -8896.20691497  2138.85546557]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6382.94465995 -6442.16912869 -8192.20126966  2591.01445457]
------
Step:4, Action:North
State  288
Old Q Values:  [ 6382.94465995 -6442.16912869 -8192.20126966  2591.01445457]
New Q values:  [ 6492.1615537  -6442.16912869 -8192.20126966  2591.01445457]
Reward: -1  Episode Reward:  26
xxxxx
xg..x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13131.9456324   8524.58221732 12571.10248734  9373.30884915]
------
Step:5, Action:North
State  216
Old Q Values:  [ 2897.1805331   3272.74890717 -8896.20691497  2138.85546557]
New Q values:  [ 2953.70606465  3272.74890717 -8896.20691497  2138.85546557]
Reward: 9  Episode Reward:  35
xxxxx
x gax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5964.77950468 -2383.80019164   372.08852401]
------
Step:6, Action:South
State  136
Old Q Values:  [ -170.77177351  5964.77950468 -2383.80019164   372.08852401]
New Q values:  [ -170.77177351  3367.13647402 -2383.80019164   372.08852401]
Reward: -1  Episode Reward:  34
xxxxx
x .gx
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2953.70606465  3272.74890717 -8896.20691497  2138.85546557]
------
Step:7, Action:South
State  216
Old Q Values:  [ 2953.70606465  3272.74890717 -8896.20691497  2138.85546557]
New Q values:  [ 2953.70606465  3256.14802898 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6492.1615537  -6442.16912869 -8192.20126966  2591.01445457]
------
Step:8, Action:North
State  288
Old Q Values:  [ 6492.1615537  -6442.16912869 -8192.20126966  2591.01445457]
New Q values:  [ 3573.10903017 -6442.16912869 -8192.20126966  2591.01445457]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2953.70606465  3256.14802898 -8896.20691497  2138.85546557]
------
Step:9, Action:South
State  216
Old Q Values:  [ 2953.70606465  3256.14802898 -8896.20691497  2138.85546557]
New Q values:  [ 2953.70606465  2373.79192064 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  31
xxxxx
x g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3573.10903017 -6442.16912869 -8192.20126966  2591.01445457]
------
Step:10, Action:North
State  288
Old Q Values:  [ 3573.10903017 -6442.16912869 -8192.20126966  2591.01445457]
New Q values:  [ 2314.75543146 -6442.16912869 -8192.20126966  2591.01445457]
Reward: -1  Episode Reward:  30
xxxxx
x .gx
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2953.70606465  2373.79192064 -8896.20691497  2138.85546557]
------
Step:11, Action:South
State  216
Old Q Values:  [ 2953.70606465  2373.79192064 -8896.20691497  2138.85546557]
New Q values:  [ 2953.70606465  1726.22110463 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2314.75543146 -6442.16912869 -8192.20126966  2591.01445457]
------
Step:12, Action:West
State  288
Old Q Values:  [ 2314.75543146 -6442.16912869 -8192.20126966  2591.01445457]
New Q values:  [ 2314.75543146 -6442.16912869 -8192.20126966  2380.83559234]
Reward: 9  Episode Reward:  38
xxxxx
xg. x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4463.4327017   3460.06036893]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4463.4327017   3460.06036893]
New Q values:  [-2527.46239811 -8521.23367799  2499.02375838  3460.06036893]
Reward: -1  Episode Reward:  37
xxxxx
x . x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2314.75543146 -6442.16912869 -8192.20126966  2380.83559234]
------
Step:14, Action:West
State  288
Old Q Values:  [ 2314.75543146 -6442.16912869 -8192.20126966  2380.83559234]
New Q values:  [ 2314.75543146 -6442.16912869 -8192.20126966  1989.75234762]
Reward: -1  Episode Reward:  36
xxxxx
x . x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2499.02375838  3460.06036893]
------
Step:15, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2499.02375838  3460.06036893]
New Q values:  [-2527.46239811 -8521.23367799  1693.43613279  3460.06036893]
Reward: -1  Episode Reward:  35
xxxxx
x . x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2314.75543146 -6442.16912869 -8192.20126966  1989.75234762]
------
Step:16, Action:North
State  288
Old Q Values:  [ 2314.75543146 -6442.16912869 -8192.20126966  1989.75234762]
New Q values:  [ 2417.40474287 -6442.16912869 -8192.20126966  1989.75234762]
Reward: -1  Episode Reward:  34
xxxxx
x . x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4973.67523429 -1977.69291708  3961.62714931  2599.28130597]
------
Step:17, Action:North
State  210
Old Q Values:  [ 4973.67523429 -1977.69291708  3961.62714931  2599.28130597]
New Q values:  [25554.24022678 -1977.69291708  3961.62714931  2599.28130597]
Reward: -1  Episode Reward:  33
xxxxx
x .ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 24312.8833164   -180.00807518 78551.23377689]
------
Step:18, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  5.04277218e+03 -3.22965309e-01  1.08161045e+03]
New Q values:  [ 7.64171987e+01  5.04277218e+03 -3.22965309e-01  9.55573437e+02]
Reward: 9  Episode Reward:  42
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1725.09752278  1699.65393533]
------
Step:19, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2263.78980638   657.72591961]
New Q values:  [ -281.736      -1150.91067548  2417.74757803   657.72591961]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.04277218e+03 -3.22965309e-01  9.55573437e+02]
------
Step:20, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  5.04277218e+03 -3.22965309e-01  9.55573437e+02]
New Q values:  [ 7.64171987e+01  2.90262069e+03 -3.22965309e-01  9.55573437e+02]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2953.70606465  1726.22110463 -8896.20691497  2138.85546557]
------
Step:21, Action:North
State  216
Old Q Values:  [ 2953.70606465  1726.22110463 -8896.20691497  2138.85546557]
New Q values:  [ 2191.02336807  1726.22110463 -8896.20691497  2138.85546557]
Reward: -1  Episode Reward:  39
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3367.13647402 -2383.80019164   372.08852401]
------
Step:22, Action:South
State  136
Old Q Values:  [ -170.77177351  3367.13647402 -2383.80019164   372.08852401]
New Q values:  [ -170.77177351  2003.56160003 -2383.80019164   372.08852401]
Reward: -1  Episode Reward:  38
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2191.02336807  1726.22110463 -8896.20691497  2138.85546557]
------
Step:23, Action:West
State  216
Old Q Values:  [ 2191.02336807  1726.22110463 -8896.20691497  2138.85546557]
New Q values:  [ 2191.02336807  1726.22110463 -8896.20691497  2250.30987429]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  4.65122563e+03 -2.50318991e+03  2.00341972e+02]
------
Step:24, Action:South
State  203
Old Q Values:  [3.60604218e+00 1.39942508e+04 3.38606836e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 2.71373337e+04 3.38606836e+03 0.00000000e+00]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4644.37724943 71800.77798597]
------
Step:25, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 71800.77798597]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 97695.12458609]
Reward: 100009  Episode Reward:  100045
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  7.91931747e+03  1.20371620e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 5.85255212e+03 1.24342931e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 5.85255212e+03 8.91870092e+03 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13131.9456324   8524.58221732 12571.10248734  9373.30884915]
------
Step:2, Action:North
State  208
Old Q Values:  [13131.9456324   8524.58221732 12571.10248734  9373.30884915]
New Q values:  [ 5859.24673297  8524.58221732 12571.10248734  9373.30884915]
Reward: 9  Episode Reward:  18
xxxxx
xg.ax
x.  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2003.56160003 -2383.80019164   372.08852401]
------
Step:3, Action:South
State  136
Old Q Values:  [ -170.77177351  2003.56160003 -2383.80019164   372.08852401]
New Q values:  [ -170.77177351  4572.15538621 -2383.80019164   372.08852401]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x. ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5859.24673297  8524.58221732 12571.10248734  9373.30884915]
------
Step:4, Action:East
State  208
Old Q Values:  [ 5859.24673297  8524.58221732 12571.10248734  9373.30884915]
New Q values:  [5859.24673297 8524.58221732 8619.17174114 9373.30884915]
Reward: -301  Episode Reward:  -284
xxxxx
xg. x
x. ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[5859.24673297 8524.58221732 8619.17174114 9373.30884915]
------
Step:5, Action:West
State  208
Old Q Values:  [5859.24673297 8524.58221732 8619.17174114 9373.30884915]
New Q values:  [5859.24673297 8524.58221732 8619.17174114 6424.33381559]
Reward: -1  Episode Reward:  -285
xxxxx
x.g x
x.a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.85255212e+03 8.91870092e+03 2.91043938e+03]
------
Step:6, Action:East
State  192
Old Q Values:  [3.89777037e-01 5.85255212e+03 8.91870092e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 5.85255212e+03 6.15263189e+03 2.91043938e+03]
Reward: -1  Episode Reward:  -286
xxxxx
xg. x
x. ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[5859.24673297 8524.58221732 8619.17174114 6424.33381559]
------
Step:7, Action:East
State  208
Old Q Values:  [5859.24673297 8524.58221732 8619.17174114 6424.33381559]
New Q values:  [5859.24673297 8524.58221732 5852.8202188  6424.33381559]
Reward: -301  Episode Reward:  -587
xxxxx
x.. x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[5859.24673297 8524.58221732 5852.8202188  6424.33381559]
------
Step:8, Action:South
State  208
Old Q Values:  [5859.24673297 8524.58221732 5852.8202188  6424.33381559]
New Q values:  [5859.24673297 4140.45430979 5852.8202188  6424.33381559]
Reward: 9  Episode Reward:  -578
xxxxx
x.. x
x.g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2417.40474287 -6442.16912869 -8192.20126966  1989.75234762]
------
Step:9, Action:North
State  288
Old Q Values:  [ 2417.40474287 -6442.16912869 -8192.20126966  1989.75234762]
New Q values:  [ 2893.66204183 -6442.16912869 -8192.20126966  1989.75234762]
Reward: -1  Episode Reward:  -579
xxxxx
x.g x
x. ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[5859.24673297 4140.45430979 5852.8202188  6424.33381559]
------
Step:10, Action:West
State  208
Old Q Values:  [5859.24673297 4140.45430979 5852.8202188  6424.33381559]
New Q values:  [5859.24673297 4140.45430979 5852.8202188  4414.92309331]
Reward: -1  Episode Reward:  -580
xxxxx
xg. x
x.a x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.85255212e+03 6.15263189e+03 2.91043938e+03]
------
Step:11, Action:East
State  192
Old Q Values:  [3.89777037e-01 5.85255212e+03 6.15263189e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 5.85255212e+03 4.21822678e+03 2.91043938e+03]
Reward: -1  Episode Reward:  -581
xxxxx
x.g x
x. ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[5859.24673297 4140.45430979 5852.8202188  4414.92309331]
------
Step:12, Action:North
State  208
Old Q Values:  [5859.24673297 4140.45430979 5852.8202188  4414.92309331]
New Q values:  [9671.66876584 4140.45430979 5852.8202188  4414.92309331]
Reward: -10001  Episode Reward:  -10582
xxxxx
x..gx
x.  x
x . x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  7.91931747e+03  1.20371620e+03]
------
Step:1, Action:East
State  195
Old Q Values:  [   38.85388605  7251.41441546 31859.37449217  1101.59744825]
New Q values:  [   38.85388605  7251.41441546 20415.4218649   1101.59744825]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[25554.24022678 -1977.69291708  3961.62714931  2599.28130597]
------
Step:2, Action:North
State  208
Old Q Values:  [9671.66876584 4140.45430979 5852.8202188  4414.92309331]
New Q values:  [4744.85371435 4140.45430979 5852.8202188  4414.92309331]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.90262069e+03 -3.22965309e-01  9.55573437e+02]
------
Step:3, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.90262069e+03 -3.22965309e-01  9.55573437e+02]
New Q values:  [ 7.64171987e+01  2.90262069e+03 -3.22965309e-01  1.11295365e+03]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2417.74757803   657.72591961]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1725.09752278  1699.65393533]
New Q values:  [ -253.44886264 -1902.20915811  1560.22521712  1699.65393533]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.90262069e+03 -3.22965309e-01  1.11295365e+03]
------
Step:5, Action:West
State  136
Old Q Values:  [ -170.77177351  4572.15538621 -2383.80019164   372.08852401]
New Q values:  [ -170.77177351  4572.15538621 -2383.80019164   217.66622873]
Reward: -1  Episode Reward:  25
xxxxx
x.agx
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   231.43606375]
------
Step:6, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1560.22521712  1699.65393533]
New Q values:  [ -253.44886264 -1902.20915811  1560.22521712   999.9022207 ]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  789.63626836 1048.80215523 -252.78192178]
------
Step:7, Action:East
State  107
Old Q Values:  [-252.35169558  789.63626836 1048.80215523 -252.78192178]
New Q values:  [-252.35169558  789.63626836  886.98842723 -252.78192178]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1560.22521712   999.9022207 ]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2417.74757803   657.72591961]
New Q values:  [ -281.736      -1150.91067548  1837.28523922   657.72591961]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.90262069e+03 -3.22965309e-01  1.11295365e+03]
------
Step:9, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.90262069e+03 -3.22965309e-01  1.11295365e+03]
New Q values:  [ 7.64171987e+01  1.83554124e+03 -3.22965309e-01  1.11295365e+03]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2191.02336807  1726.22110463 -8896.20691497  2250.30987429]
------
Step:10, Action:North
State  218
Old Q Values:  [2552.38786537 6735.13737869    0.          386.1281519 ]
New Q values:  [1571.01751804 6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.83554124e+03 -3.22965309e-01  1.11295365e+03]
------
Step:11, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.83554124e+03 -3.22965309e-01  1.11295365e+03]
New Q values:  [ 7.64171987e+01  8.39988856e+03 -3.22965309e-01  1.11295365e+03]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[25554.24022678 -1977.69291708  3961.62714931  2599.28130597]
------
Step:12, Action:North
State  216
Old Q Values:  [ 2191.02336807  1726.22110463 -8896.20691497  2250.30987429]
New Q values:  [ 3395.77591639  1726.22110463 -8896.20691497  2250.30987429]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  8.39988856e+03 -3.22965309e-01  1.11295365e+03]
------
Step:13, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  8.39988856e+03 -3.22965309e-01  1.11295365e+03]
New Q values:  [ 7.64171987e+01  8.39988856e+03 -3.22965309e-01  9.95767031e+02]
Reward: -1  Episode Reward:  27
xxxxx
x a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1837.28523922   657.72591961]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1837.28523922   657.72591961]
New Q values:  [ -281.736      -1150.91067548  3254.28066486   657.72591961]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  8.39988856e+03 -3.22965309e-01  9.95767031e+02]
------
Step:15, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  8.39988856e+03 -3.22965309e-01  9.95767031e+02]
New Q values:  [ 7.64171987e+01  4.37808820e+03 -3.22965309e-01  9.95767031e+02]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3395.77591639  1726.22110463 -8896.20691497  2250.30987429]
------
Step:16, Action:North
State  216
Old Q Values:  [ 3395.77591639  1726.22110463 -8896.20691497  2250.30987429]
New Q values:  [ 2671.1368267   1726.22110463 -8896.20691497  2250.30987429]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.37808820e+03 -3.22965309e-01  9.95767031e+02]
------
Step:17, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  4.37808820e+03 -3.22965309e-01  9.95767031e+02]
New Q values:  [ 7.64171987e+01  4.37808820e+03 -3.22965309e-01  1.37399101e+03]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3254.28066486   657.72591961]
------
Step:18, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1560.22521712   999.9022207 ]
New Q values:  [ -253.44886264 -1902.20915811  1936.91654699   999.9022207 ]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.37808820e+03 -3.22965309e-01  1.37399101e+03]
------
Step:19, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  4.37808820e+03 -3.22965309e-01  1.37399101e+03]
New Q values:  [ 7.64171987e+01  4.37808820e+03 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  21
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1936.91654699   999.9022207 ]
------
Step:20, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3254.28066486   657.72591961]
New Q values:  [ -281.736      -1150.91067548  2614.53872608   657.72591961]
Reward: -1  Episode Reward:  20
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.37808820e+03 -3.22965309e-01  1.13007137e+03]
------
Step:21, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.37808820e+03 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  2.55197633e+03 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2671.1368267   1726.22110463 -8896.20691497  2250.30987429]
------
Step:22, Action:North
State  216
Old Q Values:  [ 2671.1368267   1726.22110463 -8896.20691497  2250.30987429]
New Q values:  [ 2439.50134654  1726.22110463 -8896.20691497  2250.30987429]
Reward: -1  Episode Reward:  18
xxxxx
x gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  4572.15538621 -2383.80019164   217.66622873]
------
Step:23, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.55197633e+03 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  1.75204094e+03 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2439.50134654  1726.22110463 -8896.20691497  2250.30987429]
------
Step:24, Action:North
State  218
Old Q Values:  [1571.01751804 6735.13737869    0.          386.1281519 ]
New Q values:  [1153.41928779 6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.75204094e+03 -3.22965309e-01  1.13007137e+03]
------
Step:25, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.75204094e+03 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  8.36648844e+03 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[25554.24022678 -1977.69291708  3961.62714931  2599.28130597]
------
Step:26, Action:North
State  218
Old Q Values:  [1153.41928779 6735.13737869    0.          386.1281519 ]
New Q values:  [2970.71424775 6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  8.36648844e+03 -3.22965309e-01  1.13007137e+03]
------
Step:27, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  8.36648844e+03 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  1.10122674e+04 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[25554.24022678 -1977.69291708  3961.62714931  2599.28130597]
------
Step:28, Action:North
State  218
Old Q Values:  [2970.71424775 6735.13737869    0.          386.1281519 ]
New Q values:  [4491.36593257 6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.10122674e+04 -3.22965309e-01  1.13007137e+03]
------
Step:29, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.10122674e+04 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  5.13615738e+03 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  11
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2439.50134654  1726.22110463 -8896.20691497  2250.30987429]
------
Step:30, Action:North
State  218
Old Q Values:  [4491.36593257 6735.13737869    0.          386.1281519 ]
New Q values:  [3336.7935876  6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  10
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.13615738e+03 -3.22965309e-01  1.13007137e+03]
------
Step:31, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  5.13615738e+03 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  9.72013502e+03 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  9
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[25554.24022678 -1977.69291708  3961.62714931  2599.28130597]
------
Step:32, Action:North
State  218
Old Q Values:  [3336.7935876  6735.13737869    0.          386.1281519 ]
New Q values:  [4250.15794128 6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  8
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  9.72013502e+03 -3.22965309e-01  1.13007137e+03]
------
Step:33, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  9.72013502e+03 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  1.15537261e+04 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  7
xxxxx
x   x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[25554.24022678 -1977.69291708  3961.62714931  2599.28130597]
------
Step:34, Action:North
State  218
Old Q Values:  [4250.15794128 6735.13737869    0.          386.1281519 ]
New Q values:  [5165.58099942 6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  6
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.15537261e+04 -3.22965309e-01  1.13007137e+03]
------
Step:35, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.15537261e+04 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  1.22871625e+04 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  5
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[25554.24022678 -1977.69291708  3961.62714931  2599.28130597]
------
Step:36, Action:North
State  216
Old Q Values:  [ 2439.50134654  1726.22110463 -8896.20691497  2250.30987429]
New Q values:  [ 4661.34928819  1726.22110463 -8896.20691497  2250.30987429]
Reward: -1  Episode Reward:  4
xxxxx
x  ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.22871625e+04 -3.22965309e-01  1.13007137e+03]
------
Step:37, Action:West
State  136
Old Q Values:  [ -170.77177351  4572.15538621 -2383.80019164   217.66622873]
New Q values:  [ -170.77177351  4572.15538621 -2383.80019164   155.89731062]
Reward: -1  Episode Reward:  3
xxxxx
x agx
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   231.43606375]
------
Step:38, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1571.84492898   729.5437439 ]
New Q values:  [-9594.56523706 -8069.05606225  1571.84492898   779.1638549 ]
Reward: -1  Episode Reward:  2
xxxxx
xag x
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        1626.4878578    65.14560537    0.        ]
------
Step:39, Action:South
State  107
Old Q Values:  [-252.35169558  789.63626836  886.98842723 -252.78192178]
New Q values:  [-252.35169558  717.89453299  886.98842723 -252.78192178]
Reward: 9  Episode Reward:  11
xxxxx
x   x
xag x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 476.02888691    0.         1322.13341882 -178.98      ]
------
Step:40, Action:North
State  184
Old Q Values:  [  118.98855974     0.         14061.4268019      0.        ]
New Q values:  [  162.71210215     0.         14061.4268019      0.        ]
Reward: -1  Episode Reward:  10
xxxxx
xa  x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   385.72226087  -180.6       ]
------
Step:41, Action:East
State  110
Old Q Values:  [-239.29051573  475.48898133  365.96192905 -180.6       ]
New Q values:  [-239.29051573  475.48898133  814.41842045 -180.6       ]
Reward: -1  Episode Reward:  9
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 2228.77882944  602.28778349]
------
Step:42, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2614.53872608   657.72591961]
New Q values:  [ -281.736      -1150.91067548  4731.36424001   657.72591961]
Reward: -1  Episode Reward:  8
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.22871625e+04 -3.22965309e-01  1.13007137e+03]
------
Step:43, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.22871625e+04 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  6.93480621e+03 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  7
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[5165.58099942 6735.13737869    0.          386.1281519 ]
------
Step:44, Action:North
State  218
Old Q Values:  [5165.58099942 6735.13737869    0.          386.1281519 ]
New Q values:  [4146.07426368 6735.13737869    0.          386.1281519 ]
Reward: -1  Episode Reward:  6
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  6.93480621e+03 -3.22965309e-01  1.13007137e+03]
------
Step:45, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  6.93480621e+03 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  4.79386370e+03 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  5
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[4146.07426368 6735.13737869    0.          386.1281519 ]
------
Step:46, Action:South
State  218
Old Q Values:  [4146.07426368 6735.13737869    0.          386.1281519 ]
New Q values:  [4146.07426368 3567.55356402    0.          386.1281519 ]
Reward: 9  Episode Reward:  14
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2893.66204183 -6442.16912869 -8192.20126966  1989.75234762]
------
Step:47, Action:North
State  288
Old Q Values:  [ 2893.66204183 -6442.16912869 -8192.20126966  1989.75234762]
New Q values:  [ 2555.26960319 -6442.16912869 -8192.20126966  1989.75234762]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4661.34928819  1726.22110463 -8896.20691497  2250.30987429]
------
Step:48, Action:North
State  216
Old Q Values:  [ 4661.34928819  1726.22110463 -8896.20691497  2250.30987429]
New Q values:  [ 3235.58633114  1726.22110463 -8896.20691497  2250.30987429]
Reward: -1  Episode Reward:  12
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  4572.15538621 -2383.80019164   155.89731062]
------
Step:49, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.79386370e+03 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  2.88762138e+03 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  11
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3235.58633114  1726.22110463 -8896.20691497  2250.30987429]
------
Step:50, Action:North
State  218
Old Q Values:  [4146.07426368 3567.55356402    0.          386.1281519 ]
New Q values:  [2524.11611913 3567.55356402    0.          386.1281519 ]
Reward: -1  Episode Reward:  10
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.88762138e+03 -3.22965309e-01  1.13007137e+03]
------
Step:51, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.88762138e+03 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  2.22471462e+03 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  9
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2524.11611913 3567.55356402    0.          386.1281519 ]
------
Step:52, Action:North
State  216
Old Q Values:  [ 3235.58633114  1726.22110463 -8896.20691497  2250.30987429]
New Q values:  [ 1961.04891868  1726.22110463 -8896.20691497  2250.30987429]
Reward: -1  Episode Reward:  8
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.22471462e+03 -3.22965309e-01  1.13007137e+03]
------
Step:53, Action:West
State  136
Old Q Values:  [ -170.77177351  4572.15538621 -2383.80019164   155.89731062]
New Q values:  [ -170.77177351  4572.15538621 -2383.80019164   131.18974337]
Reward: -1  Episode Reward:  7
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   231.43606375]
------
Step:54, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1936.91654699   999.9022207 ]
New Q values:  [ -253.44886264 -1902.20915811  1936.91654699   665.45741645]
Reward: -1  Episode Reward:  6
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  717.89453299  886.98842723 -252.78192178]
------
Step:55, Action:East
State  107
Old Q Values:  [-252.35169558  717.89453299  886.98842723 -252.78192178]
New Q values:  [-252.35169558  717.89453299 1773.60464289 -252.78192178]
Reward: -1  Episode Reward:  5
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4731.36424001   657.72591961]
------
Step:56, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  4731.36424001   657.72591961]
New Q values:  [ -281.736      -1150.91067548  2559.36008223   657.72591961]
Reward: -1  Episode Reward:  4
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.22471462e+03 -3.22965309e-01  1.13007137e+03]
------
Step:57, Action:South
State  136
Old Q Values:  [ -170.77177351  4572.15538621 -2383.80019164   131.18974337]
New Q values:  [ -170.77177351  2503.35511677 -2383.80019164   131.18974337]
Reward: -1  Episode Reward:  3
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1961.04891868  1726.22110463 -8896.20691497  2250.30987429]
------
Step:58, Action:West
State  216
Old Q Values:  [ 1961.04891868  1726.22110463 -8896.20691497  2250.30987429]
New Q values:  [ 1961.04891868  1726.22110463 -8896.20691497  1988.16379862]
Reward: -1  Episode Reward:  2
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  3628.79949635 1830.98954698 1141.49622464]
------
Step:59, Action:South
State  200
Old Q Values:  [  62.8218634  3628.79949635 1830.98954698 1141.49622464]
New Q values:  [  62.8218634  2488.93790922 1830.98954698 1141.49622464]
Reward: -1  Episode Reward:  1
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1693.43613279  3460.06036893]
------
Step:60, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 97695.12458609]
New Q values:  [   870.35122762   -168.92307549   4644.37724943 108052.86322614]
Reward: 100009  Episode Reward:  100010
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5120.48535318 -2984.46350323  1099.96026581]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 5.85255212e+03 4.21822678e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 3.38443896e+03 4.21822678e+03 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1693.43613279  3460.06036893]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1693.43613279  3460.06036893]
New Q values:  [-2527.46239811 -8521.23367799  1693.43613279  2420.7869895 ]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2062.37573761 -2735.46306511  3437.87613976 -2601.74710518]
------
Step:3, Action:East
State  260
Old Q Values:  [ 2062.37573761 -2735.46306511  3437.87613976 -2601.74710518]
New Q values:  [ 2062.37573761 -2735.46306511  2100.78655275 -2601.74710518]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1693.43613279  2420.7869895 ]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1693.43613279  2420.7869895 ]
New Q values:  [-2527.46239811 -8521.23367799  1693.43613279  1597.95076163]
Reward: -1  Episode Reward:  16
xxxxx
xg. x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2062.37573761 -2735.46306511  2100.78655275 -2601.74710518]
------
Step:5, Action:East
State  257
Old Q Values:  [29898.04463901  2256.66526474 10268.96421813  1875.31501677]
New Q values:  [29898.04463901  2256.66526474  4615.01652709  1875.31501677]
Reward: -1  Episode Reward:  15
xxxxx
x.g x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1693.43613279  1597.95076163]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1693.43613279  1597.95076163]
New Q values:  [-2527.46239811 -8521.23367799  1449.35533407  1597.95076163]
Reward: 9  Episode Reward:  24
xxxxx
xg. x
x. .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2555.26960319 -6442.16912869 -8192.20126966  1989.75234762]
------
Step:7, Action:North
State  288
Old Q Values:  [ 2555.26960319 -6442.16912869 -8192.20126966  1989.75234762]
New Q values:  [ 2783.35390691 -6442.16912869 -8192.20126966  1989.75234762]
Reward: 9  Episode Reward:  33
xxxxx
x.g x
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[4744.85371435 4140.45430979 5852.8202188  4414.92309331]
------
Step:8, Action:East
State  208
Old Q Values:  [4744.85371435 4140.45430979 5852.8202188  4414.92309331]
New Q values:  [4744.85371435 4140.45430979 3916.37415316 4414.92309331]
Reward: -301  Episode Reward:  -268
xxxxx
x..gx
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[4744.85371435 4140.45430979 3916.37415316 4414.92309331]
------
Step:9, Action:West
State  208
Old Q Values:  [4744.85371435 4140.45430979 3916.37415316 4414.92309331]
New Q values:  [4744.85371435 4140.45430979 3916.37415316 3301.51484328]
Reward: -1  Episode Reward:  -269
xxxxx
x.. x
x.agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5120.48535318 -2984.46350323  1099.96026581]
------
Step:10, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.38443896e+03 4.21822678e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.83256081e+03 4.21822678e+03 2.91043938e+03]
Reward: -1  Episode Reward:  -270
xxxxx
x.. x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1449.35533407  1597.95076163]
------
Step:11, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1449.35533407  1597.95076163]
New Q values:  [-2527.46239811 -8521.23367799  1449.35533407  9607.99369635]
Reward: -1  Episode Reward:  -271
xxxxx
x.g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[29898.04463901  2256.66526474  4615.01652709  1875.31501677]
------
Step:12, Action:North
State  257
Old Q Values:  [29898.04463901  2256.66526474  4615.01652709  1875.31501677]
New Q values:  [30990.71193714  2256.66526474  4615.01652709  1875.31501677]
Reward: 9  Episode Reward:  -262
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[24286.95305781 24935.33673146 63420.31360511     0.        ]
------
Step:13, Action:South
State  177
Old Q Values:  [24286.95305781 24935.33673146 63420.31360511     0.        ]
New Q values:  [24286.95305781 19270.74827372 63420.31360511     0.        ]
Reward: -1  Episode Reward:  -263
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[30990.71193714  2256.66526474  4615.01652709  1875.31501677]
------
Step:14, Action:North
State  257
Old Q Values:  [30990.71193714  2256.66526474  4615.01652709  1875.31501677]
New Q values:  [31421.77885639  2256.66526474  4615.01652709  1875.31501677]
Reward: -1  Episode Reward:  -264
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[24286.95305781 19270.74827372 63420.31360511     0.        ]
------
Step:15, Action:North
State  181
Old Q Values:  [594.26702564 356.13872326 -71.77924246 -30.99112081]
New Q values:  [785.28533417 356.13872326 -71.77924246 -30.99112081]
Reward: 9  Episode Reward:  -255
xxxxx
xa. x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1807.26174637    5.4           0.        ]
------
Step:16, Action:South
State  103
Old Q Values:  [ 221.30610858 1807.26174637    5.4           0.        ]
New Q values:  [ 221.30610858 2733.85253197    5.4           0.        ]
Reward: -1  Episode Reward:  -256
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1597.07502264 6705.15944474    0.        ]
------
Step:17, Action:East
State  183
Old Q Values:  [ 877.23516594 1597.07502264 6705.15944474    0.        ]
New Q values:  [ 877.23516594 1597.07502264 5057.25901821    0.        ]
Reward: -1  Episode Reward:  -257
xxxxx
x . x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  7.91931747e+03  1.20371620e+03]
------
Step:18, Action:East
State  195
Old Q Values:  [   38.85388605  7251.41441546 20415.4218649   1101.59744825]
New Q values:  [   38.85388605  7251.41441546 15831.840814    1101.59744825]
Reward: -1  Episode Reward:  -258
xxxxx
x . x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[25554.24022678 -1977.69291708  3961.62714931  2599.28130597]
------
Step:19, Action:North
State  210
Old Q Values:  [25554.24022678 -1977.69291708  3961.62714931  2599.28130597]
New Q values:  [33786.46622378 -1977.69291708  3961.62714931  2599.28130597]
Reward: -1  Episode Reward:  -259
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 24312.8833164   -180.00807518 78551.23377689]
------
Step:20, Action:West
State  130
Old Q Values:  [46177.80406237 24312.8833164   -180.00807518 78551.23377689]
New Q values:  [ 46177.80406237  24312.8833164    -180.00807518 124077.96175234]
Reward: 100009  Episode Reward:  99750
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1936.91654699   665.45741645]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2559.36008223   657.72591961]
New Q values:  [ -281.736      -1150.91067548  1696.55841912   657.72591961]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.22471462e+03 -3.22965309e-01  1.13007137e+03]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.22471462e+03 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  1.10252257e+04 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[33786.46622378 -1977.69291708  3961.62714931  2599.28130597]
------
Step:3, Action:North
State  216
Old Q Values:  [ 1961.04891868  1726.22110463 -8896.20691497  1988.16379862]
New Q values:  [ 4091.3872821   1726.22110463 -8896.20691497  1988.16379862]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.10252257e+04 -3.22965309e-01  1.13007137e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.10252257e+04 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  1.45454302e+04 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[33786.46622378 -1977.69291708  3961.62714931  2599.28130597]
------
Step:5, Action:North
State  210
Old Q Values:  [33786.46622378 -1977.69291708  3961.62714931  2599.28130597]
New Q values:  [17877.6155355  -1977.69291708  3961.62714931  2599.28130597]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.45454302e+04 -3.22965309e-01  1.13007137e+03]
------
Step:6, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.45454302e+04 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  1.11808567e+04 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  4
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[17877.6155355  -1977.69291708  3961.62714931  2599.28130597]
------
Step:7, Action:North
State  216
Old Q Values:  [ 4091.3872821   1726.22110463 -8896.20691497  1988.16379862]
New Q values:  [ 4990.21192943  1726.22110463 -8896.20691497  1988.16379862]
Reward: -1  Episode Reward:  3
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.11808567e+04 -3.22965309e-01  1.13007137e+03]
------
Step:8, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.11808567e+04 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01  9.83502735e+03 -3.22965309e-01  1.13007137e+03]
Reward: -1  Episode Reward:  2
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[17877.6155355  -1977.69291708  3961.62714931  2599.28130597]
------
Step:9, Action:North
State  216
Old Q Values:  [ 4990.21192943  1726.22110463 -8896.20691497  1988.16379862]
New Q values:  [ 4945.99297661  1726.22110463 -8896.20691497  1988.16379862]
Reward: -1  Episode Reward:  1
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  9.83502735e+03 -3.22965309e-01  1.13007137e+03]
------
Step:10, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  9.83502735e+03 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  1.13007137e+03]
Reward: -10001  Episode Reward:  -10000
xxxxx
x.  x
x..gx
x...x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[17877.6155355  -1977.69291708  3961.62714931  2599.28130597]
------
Step:1, Action:North
State  210
Old Q Values:  [17877.6155355  -1977.69291708  3961.62714931  2599.28130597]
New Q values:  [ 7495.46762485 -1977.69291708  3961.62714931  2599.28130597]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  1.13007137e+03]
------
Step:2, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  1.13007137e+03]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  1.03850351e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1936.91654699   665.45741645]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1696.55841912   657.72591961]
New Q values:  [ -281.736      -1150.91067548   989.57442114   657.72591961]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  1.03850351e+03]
------
Step:4, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  1.03850351e+03]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  7.11673731e+02]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   989.57442114   657.72591961]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   989.57442114   657.72591961]
New Q values:  [ -281.736      -1150.91067548   608.73188775   657.72591961]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  7.11673731e+02]
------
Step:6, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  7.11673731e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  4.81387268e+02]
Reward: -1  Episode Reward:  14
xxxxx
x.a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   608.73188775   657.72591961]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   608.73188775   657.72591961]
New Q values:  [ -281.736      -1150.91067548   608.73188775   512.81589398]
Reward: 9  Episode Reward:  23
xxxxx
xa  x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  475.48898133  814.41842045 -180.6       ]
------
Step:8, Action:East
State  107
Old Q Values:  [-252.35169558  717.89453299 1773.60464289 -252.78192178]
New Q values:  [-252.35169558  717.89453299  891.46142348 -252.78192178]
Reward: -1  Episode Reward:  22
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   608.73188775   512.81589398]
------
Step:9, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   608.73188775   512.81589398]
New Q values:  [ -281.736      -1150.91067548   387.30893559   512.81589398]
Reward: -1  Episode Reward:  21
xxxxx
x  ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  4.81387268e+02]
------
Step:10, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  4.81387268e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.45799676e+02]
Reward: -1  Episode Reward:  20
xxxxx
x a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   387.30893559   512.81589398]
------
Step:11, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   387.30893559   512.81589398]
New Q values:  [ -281.736      -1150.91067548   387.30893559   471.96478464]
Reward: -1  Episode Reward:  19
xxxxx
xa  x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  717.89453299  891.46142348 -252.78192178]
------
Step:12, Action:East
State  107
Old Q Values:  [-252.35169558  717.89453299  891.46142348 -252.78192178]
New Q values:  [-252.35169558  717.89453299  497.57400478 -252.78192178]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   387.30893559   471.96478464]
------
Step:13, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1571.84492898   779.1638549 ]
New Q values:  [-9594.56523706 -8069.05606225  1571.84492898   799.0118993 ]
Reward: -1  Episode Reward:  17
xxxxx
xag x
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        1626.4878578    65.14560537    0.        ]
------
Step:14, Action:South
State  108
Old Q Values:  [-8463.16477134  2658.62760714   633.13598933     0.        ]
New Q values:  [-8463.16477134  2214.13535882   633.13598933     0.        ]
Reward: 9  Episode Reward:  26
xxxxx
xg  x
xa. x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -746.35376992  3817.61438654  3055.60135842 -4966.32149798]
------
Step:15, Action:South
State  180
Old Q Values:  [ -746.35376992  3817.61438654  3055.60135842 -4966.32149798]
New Q values:  [ -746.35376992  2162.68172044  3055.60135842 -4966.32149798]
Reward: 9  Episode Reward:  35
xxxxx
x   x
xg. x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2062.37573761 -2735.46306511  2100.78655275 -2601.74710518]
------
Step:16, Action:East
State  260
Old Q Values:  [ 2062.37573761 -2735.46306511  2100.78655275 -2601.74710518]
New Q values:  [ 2062.37573761 -2735.46306511  3728.11273001 -2601.74710518]
Reward: 9  Episode Reward:  44
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1449.35533407  9607.99369635]
------
Step:17, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1449.35533407  9607.99369635]
New Q values:  [-2527.46239811 -8521.23367799  1449.35533407  4961.03129754]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2062.37573761 -2735.46306511  3728.11273001 -2601.74710518]
------
Step:18, Action:East
State  257
Old Q Values:  [31421.77885639  2256.66526474  4615.01652709  1875.31501677]
New Q values:  [31421.77885639  2256.66526474  3333.7160001   1875.31501677]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1449.35533407  4961.03129754]
------
Step:19, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1449.35533407  4961.03129754]
New Q values:  [-2527.46239811 -8521.23367799  1449.35533407  3118.14054187]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3781.09340949    26.73544252 -1152.63173938   -35.88578819]
------
Step:20, Action:North
State  261
Old Q Values:  [ 3781.09340949    26.73544252 -1152.63173938   -35.88578819]
New Q values:  [ 1747.42296405    26.73544252 -1152.63173938   -35.88578819]
Reward: -1  Episode Reward:  40
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[785.28533417 356.13872326 -71.77924246 -30.99112081]
------
Step:21, Action:North
State  181
Old Q Values:  [785.28533417 356.13872326 -71.77924246 -30.99112081]
New Q values:  [462.07147878 356.13872326 -71.77924246 -30.99112081]
Reward: -1  Episode Reward:  39
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  495.19115038   -8.57207238 -180.6       ]
------
Step:22, Action:South
State  99
Old Q Values:  [    0.         48393.28699369 65700.46302897     0.        ]
New Q values:  [    0.         38382.80887901 65700.46302897     0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[24286.95305781 19270.74827372 63420.31360511     0.        ]
------
Step:23, Action:North
State  180
Old Q Values:  [ -746.35376992  2162.68172044  3055.60135842 -4966.32149798]
New Q values:  [  -54.81598183  2162.68172044  3055.60135842 -4966.32149798]
Reward: -1  Episode Reward:  37
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  475.48898133  814.41842045 -180.6       ]
------
Step:24, Action:East
State  99
Old Q Values:  [    0.         38382.80887901 65700.46302897     0.        ]
New Q values:  [    0.         38382.80887901 58931.65345318     0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 108840.22747196]
------
Step:25, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1571.84492898   799.0118993 ]
New Q values:  [-9594.56523706 -8069.05606225  1571.84492898   467.56210483]
Reward: -1  Episode Reward:  35
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  495.19115038   -8.57207238 -180.6       ]
------
Step:26, Action:South
State  99
Old Q Values:  [    0.         38382.80887901 58931.65345318     0.        ]
New Q values:  [    0.         34378.61763314 58931.65345318     0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[24286.95305781 19270.74827372 63420.31360511     0.        ]
------
Step:27, Action:North
State  180
Old Q Values:  [  -54.81598183  2162.68172044  3055.60135842 -4966.32149798]
New Q values:  [  221.7991334   2162.68172044  3055.60135842 -4966.32149798]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  475.48898133  814.41842045 -180.6       ]
------
Step:28, Action:East
State  99
Old Q Values:  [    0.         34378.61763314 58931.65345318     0.        ]
New Q values:  [    0.         34378.61763314 56224.12962286     0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 108840.22747196]
------
Step:29, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   387.30893559   471.96478464]
New Q values:  [ -281.736      -1150.91067548   387.30893559   432.51143999]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  475.48898133  814.41842045 -180.6       ]
------
Step:30, Action:East
State  110
Old Q Values:  [-239.29051573  475.48898133  814.41842045 -180.6       ]
New Q values:  [-239.29051573  475.48898133  454.92080018 -180.6       ]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   387.30893559   432.51143999]
------
Step:31, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   387.30893559   432.51143999]
New Q values:  [ -281.736      -1150.91067548   387.30893559  1068.02241072]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2985.3927824   718.60978155 -120.29354603]
------
Step:32, Action:South
State  110
Old Q Values:  [-239.29051573  475.48898133  454.92080018 -180.6       ]
New Q values:  [-239.29051573 1234.31672537  454.92080018 -180.6       ]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3482.40377611     0.        ]
------
Step:33, Action:East
State  176
Old Q Values:  [76485.61294353  1621.55095326 45408.88473875     0.        ]
New Q values:  [76485.61294353  1621.55095326 79434.4219283      0.        ]
Reward: 100009  Episode Reward:  100037
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x.a.x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   387.30893559  1068.02241072]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   387.30893559  1068.02241072]
New Q values:  [ -281.736      -1150.91067548   387.30893559   802.9039819 ]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1234.31672537  454.92080018 -180.6       ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 2985.3927824   718.60978155 -120.29354603]
New Q values:  [-177.44732869 2716.73481842  718.60978155 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1597.07502264 5057.25901821    0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [ 877.23516594 1597.07502264 5057.25901821    0.        ]
New Q values:  [ 877.23516594 1597.07502264 2670.775675      0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x a.x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  14.86214194 1982.97289372 2141.57355904 1915.70494401]
------
Step:4, Action:East
State  197
Old Q Values:  [1.32443385e-01 1.36149628e+03 0.00000000e+00 4.03062559e+02]
New Q values:  [ 1.32443385e-01  1.36149628e+03 -4.51080211e+03  4.03062559e+02]
Reward: -9991  Episode Reward:  -9964
xxxxx
x  .x
x  gx
x. .x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2783.35390691 -6442.16912869 -8192.20126966  1989.75234762]
------
Step:1, Action:West
State  288
Old Q Values:  [ 2783.35390691 -6442.16912869 -8192.20126966  1989.75234762]
New Q values:  [ 2783.35390691 -6442.16912869 -8192.20126966  1736.74310161]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1449.35533407  3118.14054187]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1449.35533407  3118.14054187]
New Q values:  [-2527.46239811 -8521.23367799  1449.35533407  1776.88310596]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1747.42296405    26.73544252 -1152.63173938   -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [ 1747.42296405    26.73544252 -1152.63173938   -35.88578819]
New Q values:  [ 1505.60188812    26.73544252 -1152.63173938   -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1597.07502264 2670.775675      0.        ]
------
Step:4, Action:East
State  181
Old Q Values:  [462.07147878 356.13872326 -71.77924246 -30.99112081]
New Q values:  [ 462.07147878  356.13872326 1512.83390897  -30.99112081]
Reward: 9  Episode Reward:  36
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5120.48535318 -2984.46350323  1099.96026581]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.83256081e+03 4.21822678e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.26548926e+03 4.21822678e+03 2.91043938e+03]
Reward: -1  Episode Reward:  35
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1449.35533407  1776.88310596]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1449.35533407  1776.88310596]
New Q values:  [-2527.46239811 -8521.23367799  1449.35533407  1161.83380882]
Reward: -1  Episode Reward:  34
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1505.60188812    26.73544252 -1152.63173938   -35.88578819]
------
Step:7, Action:North
State  260
Old Q Values:  [ 2062.37573761 -2735.46306511  3728.11273001 -2601.74710518]
New Q values:  [ 1869.07142788 -2735.46306511  3728.11273001 -2601.74710518]
Reward: -1  Episode Reward:  33
xxxxx
x...x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3482.40377611     0.        ]
------
Step:8, Action:East
State  183
Old Q Values:  [ 877.23516594 1597.07502264 2670.775675      0.        ]
New Q values:  [ 877.23516594 1597.07502264 3443.50551032    0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  7.91931747e+03  1.20371620e+03]
------
Step:9, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.92643098e+03  7.91931747e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.92643098e+03  5.41576727e+03  1.20371620e+03]
Reward: -1  Episode Reward:  31
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 7495.46762485 -1977.69291708  3961.62714931  2599.28130597]
------
Step:10, Action:North
State  210
Old Q Values:  [ 7495.46762485 -1977.69291708  3961.62714931  2599.28130597]
New Q values:  [40226.97557565 -1977.69291708  3961.62714931  2599.28130597]
Reward: 9  Episode Reward:  40
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 46177.80406237  24312.8833164    -180.00807518 124077.96175234]
------
Step:11, Action:West
State  130
Old Q Values:  [ 46177.80406237  24312.8833164    -180.00807518 124077.96175234]
New Q values:  [46177.80406237 24312.8833164   -180.00807518 89484.03949258]
Reward: 9  Episode Reward:  49
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.32824849e+05]
------
Step:12, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   28498.32154925 108840.22747196]
New Q values:  [  -180.6          3557.6642036   28498.32154925 120408.72987564]
Reward: 100009  Episode Reward:  100058
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   387.30893559   802.9039819 ]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   387.30893559   802.9039819 ]
New Q values:  [ -281.736      -1150.91067548   387.30893559  1141.58203829]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2716.73481842  718.60978155 -120.29354603]
------
Step:2, Action:South
State  110
Old Q Values:  [-239.29051573 1234.31672537  454.92080018 -180.6       ]
New Q values:  [-239.29051573 1543.84782298  454.92080018 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3482.40377611     0.        ]
------
Step:3, Action:East
State  180
Old Q Values:  [  221.7991334   2162.68172044  3055.60135842 -4966.32149798]
New Q values:  [  221.7991334   2162.68172044  2081.18692984 -4966.32149798]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1090.23428232  2845.15462157   239.04887894]
------
Step:4, Action:East
State  196
Old Q Values:  [-2469.90645144  1090.23428232  2845.15462157   239.04887894]
New Q values:  [-2469.90645144  1090.23428232  2627.25974161   239.04887894]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4945.99297661  1726.22110463 -8896.20691497  1988.16379862]
------
Step:5, Action:North
State  218
Old Q Values:  [2524.11611913 3567.55356402    0.          386.1281519 ]
New Q values:  [1118.7863503  3567.55356402    0.          386.1281519 ]
Reward: 9  Episode Reward:  45
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.45799676e+02]
------
Step:6, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.45799676e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  4.80194482e+02]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   387.30893559  1141.58203829]
------
Step:7, Action:West
State  126
Old Q Values:  [   0.          331.64678262 2228.77882944  602.28778349]
New Q values:  [   0.          331.64678262 2228.77882944 1055.33555892]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2716.73481842  718.60978155 -120.29354603]
------
Step:8, Action:South
State  111
Old Q Values:  [-177.44732869 2716.73481842  718.60978155 -120.29354603]
New Q values:  [-177.44732869 1323.73523355  718.60978155 -120.29354603]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 792.13768726 321.41835844   0.        ]
------
Step:9, Action:South
State  189
Old Q Values:  [  64.81505849 1220.1801337   777.82012054  154.04646645]
New Q values:  [ 64.81505849 939.15261991 777.82012054 154.04646645]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1505.60188812    26.73544252 -1152.63173938   -35.88578819]
------
Step:10, Action:North
State  261
Old Q Values:  [ 1505.60188812    26.73544252 -1152.63173938   -35.88578819]
New Q values:  [  883.38654122    26.73544252 -1152.63173938   -35.88578819]
Reward: -1  Episode Reward:  40
xxxxx
x  gx
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 64.81505849 939.15261991 777.82012054 154.04646645]
------
Step:11, Action:South
State  189
Old Q Values:  [ 64.81505849 939.15261991 777.82012054 154.04646645]
New Q values:  [ 64.81505849 640.07701033 777.82012054 154.04646645]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x   x
xa..x
xxxxx
Step:12, Action:East
State  261
Old Q Values:  [  883.38654122    26.73544252 -1152.63173938   -35.88578819]
New Q values:  [883.38654122  26.73544252 624.88927046 -35.88578819]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  3601.80655405]
------
Step:13, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  3601.80655405]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  1705.13858399]
Reward: -1  Episode Reward:  47
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[883.38654122  26.73544252 624.88927046 -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [883.38654122  26.73544252 624.88927046 -35.88578819]
New Q values:  [586.10065265  26.73544252 624.88927046 -35.88578819]
Reward: -1  Episode Reward:  46
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 64.81505849 640.07701033 777.82012054 154.04646645]
------
Step:15, Action:East
State  189
Old Q Values:  [ 64.81505849 640.07701033 777.82012054 154.04646645]
New Q values:  [ 64.81505849 640.07701033 432.40094143 154.04646645]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         406.24297738   0.         198.38683706]
------
Step:16, Action:South
State  205
Old Q Values:  [  0.         406.24297738   0.         198.38683706]
New Q values:  [  0.         546.98952955   0.         198.38683706]
Reward: -1  Episode Reward:  44
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.          -29.77444073 1283.64112866]
------
Step:17, Action:West
State  277
Old Q Values:  [   1.64433       0.          -29.77444073 1283.64112866]
New Q values:  [  1.64433      0.         -29.77444073 700.3232326 ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[586.10065265  26.73544252 624.88927046 -35.88578819]
------
Step:18, Action:East
State  257
Old Q Values:  [31421.77885639  2256.66526474  3333.7160001   1875.31501677]
New Q values:  [31421.77885639  2256.66526474 33748.74536788  1875.31501677]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   870.35122762   -168.92307549   4644.37724943 108052.86322614]
------
Step:19, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  1705.13858399]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103   868.92221473]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x   x
xag.x
xxxxx
Step:20, Action:West
State  260
Old Q Values:  [ 1869.07142788 -2735.46306511  3728.11273001 -2601.74710518]
New Q values:  [ 1869.07142788 -2735.46306511  3728.11273001 -6102.86502307]
Reward: -10301  Episode Reward:  -10260
xxxxx
x   x
x   x
xg .x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[4744.85371435 4140.45430979 3916.37415316 3301.51484328]
------
Step:1, Action:North
State  208
Old Q Values:  [4744.85371435 4140.45430979 3916.37415316 3301.51484328]
New Q values:  [2047.39983025 4140.45430979 3916.37415316 3301.51484328]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  4.80194482e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  4.80194482e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  7.78552757e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1936.91654699   665.45741645]
------
Step:3, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1936.91654699   665.45741645]
New Q values:  [ -253.44886264 -1902.20915811  1007.73244583   665.45741645]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  7.78552757e+02]
------
Step:4, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  7.78552757e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  6.13140836e+02]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1007.73244583   665.45741645]
------
Step:5, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1007.73244583   665.45741645]
New Q values:  [ -253.44886264 -1902.20915811   586.43522927   665.45741645]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  6.13140836e+02]
------
Step:6, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  6.13140836e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  5.87130946e+02]
Reward: -1  Episode Reward:  14
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   387.30893559  1141.58203829]
------
Step:7, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   586.43522927   665.45741645]
New Q values:  [ -253.44886264 -1902.20915811   586.43522927   486.95132648]
Reward: 9  Episode Reward:  23
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  717.89453299  497.57400478 -252.78192178]
------
Step:8, Action:South
State  107
Old Q Values:  [-252.35169558  717.89453299  497.57400478 -252.78192178]
New Q values:  [-252.35169558  503.43887912  497.57400478 -252.78192178]
Reward: 9  Episode Reward:  32
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[702.93688643   0.         526.18496922   0.        ]
------
Step:9, Action:North
State  189
Old Q Values:  [ 64.81505849 640.07701033 432.40094143 154.04646645]
New Q values:  [422.44659346 640.07701033 432.40094143 154.04646645]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1323.73523355  718.60978155 -120.29354603]
------
Step:10, Action:South
State  107
Old Q Values:  [-252.35169558  503.43887912  497.57400478 -252.78192178]
New Q values:  [-252.35169558  411.65661758  497.57400478 -252.78192178]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[702.93688643   0.         526.18496922   0.        ]
------
Step:11, Action:North
State  187
Old Q Values:  [702.93688643   0.         526.18496922   0.        ]
New Q values:  [429.84695601   0.         526.18496922   0.        ]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  411.65661758  497.57400478 -252.78192178]
------
Step:12, Action:East
State  111
Old Q Values:  [-177.44732869 1323.73523355  718.60978155 -120.29354603]
New Q values:  [-177.44732869 1323.73523355  629.31852411 -120.29354603]
Reward: -1  Episode Reward:  28
xxxxx
x a x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   387.30893559  1141.58203829]
------
Step:13, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   387.30893559  1141.58203829]
New Q values:  [ -281.736      -1150.91067548   387.30893559   919.18716221]
Reward: -1  Episode Reward:  27
xxxxx
xa  x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1543.84782298  454.92080018 -180.6       ]
------
Step:14, Action:East
State  108
Old Q Values:  [-8463.16477134  2214.13535882   633.13598933     0.        ]
New Q values:  [-8463.16477134  2214.13535882   724.20787443     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
xga x
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1571.84492898   467.56210483]
------
Step:15, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   387.30893559   919.18716221]
New Q values:  [ -281.736      -1150.91067548   330.46285805   919.18716221]
Reward: -1  Episode Reward:  25
xxxxx
x  ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  5.87130946e+02]
------
Step:16, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  5.87130946e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  5.10008527e+02]
Reward: -1  Episode Reward:  24
xxxxx
x a x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   330.46285805   919.18716221]
------
Step:17, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1571.84492898   467.56210483]
New Q values:  [-9594.56523706 -8069.05606225  1571.84492898   334.98218705]
Reward: -1  Episode Reward:  23
xxxxx
xag x
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  495.19115038   -8.57207238 -180.6       ]
------
Step:18, Action:South
State  111
Old Q Values:  [-177.44732869 1323.73523355  629.31852411 -120.29354603]
New Q values:  [-177.44732869  720.91719652  629.31852411 -120.29354603]
Reward: -1  Episode Reward:  22
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[422.44659346 640.07701033 432.40094143 154.04646645]
------
Step:19, Action:South
State  189
Old Q Values:  [422.44659346 640.07701033 432.40094143 154.04646645]
New Q values:  [422.44659346 448.89758527 432.40094143 154.04646645]
Reward: 9  Episode Reward:  31
xxxxx
x   x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[586.10065265  26.73544252 624.88927046 -35.88578819]
------
Step:20, Action:East
State  261
Old Q Values:  [586.10065265  26.73544252 624.88927046 -35.88578819]
New Q values:  [586.10065265  26.73544252 871.3247205  -35.88578819]
Reward: 9  Episode Reward:  40
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103   868.92221473]
------
Step:21, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1449.35533407  1161.83380882]
New Q values:  [-2527.46239811 -8521.23367799 61420.1483057   1161.83380882]
Reward: 100009  Episode Reward:  100049
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  720.91719652  629.31852411 -120.29354603]
------
Step:1, Action:South
State  109
Old Q Values:  [-241.10880094  495.19115038   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  657.32663284   -8.57207238 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x .gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  356.13872326 1512.83390897  -30.99112081]
------
Step:2, Action:East
State  181
Old Q Values:  [ 462.07147878  356.13872326 1512.83390897  -30.99112081]
New Q values:  [ 462.07147878  356.13872326 2005.90125165  -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  4.65122563e+03 -2.50318991e+03  2.00341972e+02]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831  5120.48535318 -2984.46350323  1099.96026581]
New Q values:  [-5922.26708831 34469.45310911 -2984.46350323  1099.96026581]
Reward: 9  Episode Reward:  27
xxxxx
x .gx
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   870.35122762   -168.92307549   4644.37724943 108052.86322614]
------
Step:4, Action:West
State  277
Old Q Values:  [  1.64433      0.         -29.77444073 700.3232326 ]
New Q values:  [  1.64433      0.         -29.77444073 546.92670919]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[586.10065265  26.73544252 871.3247205  -35.88578819]
------
Step:5, Action:East
State  261
Old Q Values:  [586.10065265  26.73544252 871.3247205  -35.88578819]
New Q values:  [ 5.86100653e+02  2.67354425e+01  3.27637889e+04 -3.58857882e+01]
Reward: -1  Episode Reward:  35
xxxxx
x .gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   870.35122762   -168.92307549   4644.37724943 108052.86322614]
------
Step:6, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103   868.92221473]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103 10176.10554271]
Reward: -1  Episode Reward:  34
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5.86100653e+02  2.67354425e+01  3.27637889e+04 -3.58857882e+01]
------
Step:7, Action:East
State  261
Old Q Values:  [ 5.86100653e+02  2.67354425e+01  3.27637889e+04 -3.58857882e+01]
New Q values:  [  586.10065265    26.73544252 16157.74720523   -35.88578819]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103 10176.10554271]
------
Step:8, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103 10176.10554271]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  8917.16637865]
Reward: -1  Episode Reward:  32
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  586.10065265    26.73544252 16157.74720523   -35.88578819]
------
Step:9, Action:East
State  260
Old Q Values:  [ 1869.07142788 -2735.46306511  3728.11273001 -6102.86502307]
New Q values:  [ 1869.07142788 -2735.46306511  4165.7950056  -6102.86502307]
Reward: -1  Episode Reward:  31
xxxxx
xg..x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  8917.16637865]
------
Step:10, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  8917.16637865]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  8413.59071303]
Reward: -1  Episode Reward:  30
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  586.10065265    26.73544252 16157.74720523   -35.88578819]
------
Step:11, Action:East
State  261
Old Q Values:  [  586.10065265    26.73544252 16157.74720523   -35.88578819]
New Q values:  [ 5.86100653e+02  2.67354425e+01  3.88783578e+04 -3.58857882e+01]
Reward: -1  Episode Reward:  29
xxxxx
x .gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   870.35122762   -168.92307549   4644.37724943 108052.86322614]
------
Step:12, Action:West
State  277
Old Q Values:  [  1.64433      0.         -29.77444073 546.92670919]
New Q values:  [ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  1.18816780e+04]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5.86100653e+02  2.67354425e+01  3.88783578e+04 -3.58857882e+01]
------
Step:13, Action:East
State  261
Old Q Values:  [ 5.86100653e+02  2.67354425e+01  3.88783578e+04 -3.58857882e+01]
New Q values:  [ 5.86100653e+02  2.67354425e+01  4.79666021e+04 -3.58857882e+01]
Reward: -1  Episode Reward:  27
xxxxx
x .gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   870.35122762   -168.92307549   4644.37724943 108052.86322614]
------
Step:14, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  8413.59071303]
New Q values:  [ 1.68263752e+01 -5.80706396e+03  2.05323004e+03  1.77548169e+04]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5.86100653e+02  2.67354425e+01  4.79666021e+04 -3.58857882e+01]
------
Step:15, Action:East
State  261
Old Q Values:  [ 5.86100653e+02  2.67354425e+01  4.79666021e+04 -3.58857882e+01]
New Q values:  [ 5.86100653e+02  2.67354425e+01  5.16018998e+04 -3.58857882e+01]
Reward: -1  Episode Reward:  25
xxxxx
x .gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   870.35122762   -168.92307549   4644.37724943 108052.86322614]
------
Step:16, Action:West
State  276
Old Q Values:  [ 1.68263752e+01 -5.80706396e+03  2.05323004e+03  1.77548169e+04]
New Q values:  [ 1.68263752e+01 -5.80706396e+03  2.05323004e+03  2.25818967e+04]
Reward: -1  Episode Reward:  24
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5.86100653e+02  2.67354425e+01  5.16018998e+04 -3.58857882e+01]
------
Step:17, Action:East
State  260
Old Q Values:  [ 1869.07142788 -2735.46306511  4165.7950056  -6102.86502307]
New Q values:  [ 1869.07142788 -2735.46306511  8440.28701533 -6102.86502307]
Reward: -1  Episode Reward:  23
xxxxx
xg..x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1.68263752e+01 -5.80706396e+03  2.05323004e+03  2.25818967e+04]
------
Step:18, Action:West
State  276
Old Q Values:  [ 1.68263752e+01 -5.80706396e+03  2.05323004e+03  2.25818967e+04]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103 11564.24478873]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1869.07142788 -2735.46306511  8440.28701533 -6102.86502307]
------
Step:19, Action:East
State  260
Old Q Values:  [ 1869.07142788 -2735.46306511  8440.28701533 -6102.86502307]
New Q values:  [ 1869.07142788 -2735.46306511  6844.78824275 -6102.86502307]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103 11564.24478873]
------
Step:20, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 61420.1483057   1161.83380882]
New Q values:  [-2527.46239811 -8521.23367799 25408.46549436  1161.83380882]
Reward: 9  Episode Reward:  30
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2783.35390691 -6442.16912869 -8192.20126966  1736.74310161]
------
Step:21, Action:North
State  288
Old Q Values:  [ 2783.35390691 -6442.16912869 -8192.20126966  1736.74310161]
New Q values:  [ 2354.8778557  -6442.16912869 -8192.20126966  1736.74310161]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[2047.39983025 4140.45430979 3916.37415316 3301.51484328]
------
Step:22, Action:South
State  208
Old Q Values:  [2047.39983025 4140.45430979 3916.37415316 3301.51484328]
New Q values:  [2047.39983025 2362.04508063 3916.37415316 3301.51484328]
Reward: -1  Episode Reward:  28
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2354.8778557  -6442.16912869 -8192.20126966  1736.74310161]
------
Step:23, Action:North
State  288
Old Q Values:  [ 2354.8778557  -6442.16912869 -8192.20126966  1736.74310161]
New Q values:  [ 2116.26338823 -6442.16912869 -8192.20126966  1736.74310161]
Reward: -1  Episode Reward:  27
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[2047.39983025 2362.04508063 3916.37415316 3301.51484328]
------
Step:24, Action:East
State  208
Old Q Values:  [2047.39983025 2362.04508063 3916.37415316 3301.51484328]
New Q values:  [2047.39983025 2362.04508063 2560.86190721 3301.51484328]
Reward: -301  Episode Reward:  -274
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[2047.39983025 2362.04508063 2560.86190721 3301.51484328]
------
Step:25, Action:West
State  208
Old Q Values:  [2047.39983025 2362.04508063 2560.86190721 3301.51484328]
New Q values:  [2047.39983025 2362.04508063 2560.86190721 2585.47397011]
Reward: -1  Episode Reward:  -275
xxxxx
xg..x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.26548926e+03 4.21822678e+03 2.91043938e+03]
------
Step:26, Action:East
State  196
Old Q Values:  [-2469.90645144  1090.23428232  2627.25974161   239.04887894]
New Q values:  [-2469.90645144  1090.23428232  1825.94608768   239.04887894]
Reward: -1  Episode Reward:  -276
xxxxx
x g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[2047.39983025 2362.04508063 2560.86190721 2585.47397011]
------
Step:27, Action:West
State  208
Old Q Values:  [2047.39983025 2362.04508063 2560.86190721 2585.47397011]
New Q values:  [ 2047.39983025  2362.04508063  2560.86190721 11374.42552078]
Reward: -1  Episode Reward:  -277
xxxxx
x .gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 34469.45310911 -2984.46350323  1099.96026581]
------
Step:28, Action:South
State  193
Old Q Values:  [-5922.26708831 34469.45310911 -2984.46350323  1099.96026581]
New Q values:  [-5922.26708831 46203.04021149 -2984.46350323  1099.96026581]
Reward: -1  Episode Reward:  -278
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[   870.35122762   -168.92307549   4644.37724943 108052.86322614]
------
Step:29, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 25408.46549436  1161.83380882]
New Q values:  [-2527.46239811 -8521.23367799 25408.46549436 15944.70346682]
Reward: -1  Episode Reward:  -279
xxxxx
x ..x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5.86100653e+02  2.67354425e+01  5.16018998e+04 -3.58857882e+01]
------
Step:30, Action:East
State  260
Old Q Values:  [ 1869.07142788 -2735.46306511  6844.78824275 -6102.86502307]
New Q values:  [ 1869.07142788 -2735.46306511 10359.85494541 -6102.86502307]
Reward: -1  Episode Reward:  -280
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 25408.46549436 15944.70346682]
------
Step:31, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 25408.46549436 15944.70346682]
New Q values:  [-2527.46239811 -8521.23367799 10797.66521421 15944.70346682]
Reward: -1  Episode Reward:  -281
xxxxx
xg..x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2116.26338823 -6442.16912869 -8192.20126966  1736.74310161]
------
Step:32, Action:North
State  288
Old Q Values:  [ 2116.26338823 -6442.16912869 -8192.20126966  1736.74310161]
New Q values:  [ 4258.23301152 -6442.16912869 -8192.20126966  1736.74310161]
Reward: -1  Episode Reward:  -282
xxxxx
x g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2047.39983025  2362.04508063  2560.86190721 11374.42552078]
------
Step:33, Action:West
State  208
Old Q Values:  [ 2047.39983025  2362.04508063  2560.86190721 11374.42552078]
New Q values:  [2047.39983025 2362.04508063 2560.86190721 5814.63824111]
Reward: -1  Episode Reward:  -283
xxxxx
xg..x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.26548926e+03 4.21822678e+03 2.91043938e+03]
------
Step:34, Action:East
State  196
Old Q Values:  [-2469.90645144  1090.23428232  1825.94608768   239.04887894]
New Q values:  [-2469.90645144  1090.23428232  2474.1699074    239.04887894]
Reward: -1  Episode Reward:  -284
xxxxx
x g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[2047.39983025 2362.04508063 2560.86190721 5814.63824111]
------
Step:35, Action:West
State  208
Old Q Values:  [2047.39983025 2362.04508063 2560.86190721 5814.63824111]
New Q values:  [ 2047.39983025  2362.04508063  2560.86190721 16186.16735989]
Reward: -1  Episode Reward:  -285
xxxxx
x .gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 46203.04021149 -2984.46350323  1099.96026581]
------
Step:36, Action:South
State  196
Old Q Values:  [-2469.90645144  1090.23428232  2474.1699074    239.04887894]
New Q values:  [-2469.90645144  3904.76714955  2474.1699074    239.04887894]
Reward: -1  Episode Reward:  -286
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103 11564.24478873]
------
Step:37, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10797.66521421 15944.70346682]
New Q values:  [-2527.46239811 -8521.23367799 10797.66521421  9485.23787035]
Reward: -1  Episode Reward:  -287
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1869.07142788 -2735.46306511 10359.85494541 -6102.86502307]
------
Step:38, Action:East
State  261
Old Q Values:  [ 5.86100653e+02  2.67354425e+01  5.16018998e+04 -3.58857882e+01]
New Q values:  [  586.10065265    26.73544252 24109.43336101   -35.88578819]
Reward: -1  Episode Reward:  -288
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103 11564.24478873]
------
Step:39, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10797.66521421  9485.23787035]
New Q values:  [-2527.46239811 -8521.23367799 10797.66521421  6901.45163176]
Reward: -1  Episode Reward:  -289
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1869.07142788 -2735.46306511 10359.85494541 -6102.86502307]
------
Step:40, Action:East
State  260
Old Q Values:  [ 1869.07142788 -2735.46306511 10359.85494541 -6102.86502307]
New Q values:  [ 1869.07142788 -2735.46306511  7382.64154243 -6102.86502307]
Reward: -1  Episode Reward:  -290
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 10797.66521421  6901.45163176]
------
Step:41, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 10797.66521421  6901.45163176]
New Q values:  [-2527.46239811 -8521.23367799  5595.93598914  6901.45163176]
Reward: -1  Episode Reward:  -291
xxxxx
x ..x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4258.23301152 -6442.16912869 -8192.20126966  1736.74310161]
------
Step:42, Action:North
State  288
Old Q Values:  [ 4258.23301152 -6442.16912869 -8192.20126966  1736.74310161]
New Q values:  [ 6558.54341258 -6442.16912869 -8192.20126966  1736.74310161]
Reward: -1  Episode Reward:  -292
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2047.39983025  2362.04508063  2560.86190721 16186.16735989]
------
Step:43, Action:West
State  210
Old Q Values:  [40226.97557565 -1977.69291708  3961.62714931  2599.28130597]
New Q values:  [40226.97557565 -1977.69291708  3961.62714931  2663.84270475]
Reward: -1  Episode Reward:  -293
xxxxx
x ..x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  5.41576727e+03  1.20371620e+03]
------
Step:44, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.26548926e+03 4.21822678e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.26548926e+03 6.54254092e+03 2.91043938e+03]
Reward: -1  Episode Reward:  -294
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2047.39983025  2362.04508063  2560.86190721 16186.16735989]
------
Step:45, Action:West
State  208
Old Q Values:  [ 2047.39983025  2362.04508063  2560.86190721 16186.16735989]
New Q values:  [2047.39983025 2362.04508063 2560.86190721 2436.62921946]
Reward: -10001  Episode Reward:  -10295
xxxxx
x ..x
x g x
x   x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  5.10008527e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  5.10008527e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.85333980e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   586.43522927   486.95132648]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   330.46285805   919.18716221]
New Q values:  [ -281.736      -1150.91067548   247.18533711   919.18716221]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.85333980e+02]
------
Step:3, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.85333980e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.29464161e+02]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   586.43522927   486.95132648]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   586.43522927   486.95132648]
New Q values:  [ -253.44886264 -1902.20915811   332.8133399    486.95132648]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.29464161e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.29464161e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  4.06941813e+02]
Reward: -1  Episode Reward:  5
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   919.18716221]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   919.18716221]
New Q values:  [ -281.736      -1150.91067548   247.18533711   836.22921178]
Reward: 9  Episode Reward:  14
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1543.84782298  454.92080018 -180.6       ]
------
Step:7, Action:East
State  110
Old Q Values:  [-239.29051573 1543.84782298  454.92080018 -180.6       ]
New Q values:  [-239.29051573 1543.84782298  432.2370836  -180.6       ]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   836.22921178]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   836.22921178]
New Q values:  [ -281.736      -1150.91067548   247.18533711   797.0460316 ]
Reward: -1  Episode Reward:  12
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1543.84782298  432.2370836  -180.6       ]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558  411.65661758  497.57400478 -252.78192178]
New Q values:  [-252.35169558  411.65661758  437.5434114  -252.78192178]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   797.0460316 ]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   797.0460316 ]
New Q values:  [ -281.736      -1150.91067548   247.18533711   781.37275954]
Reward: -1  Episode Reward:  10
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1543.84782298  432.2370836  -180.6       ]
------
Step:11, Action:East
State  110
Old Q Values:  [-239.29051573 1543.84782298  432.2370836  -180.6       ]
New Q values:  [-239.29051573 1543.84782298  406.7066613  -180.6       ]
Reward: -1  Episode Reward:  9
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   781.37275954]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   781.37275954]
New Q values:  [ -281.736      -1150.91067548   247.18533711   443.21212723]
Reward: -1  Episode Reward:  8
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  411.65661758  437.5434114  -252.78192178]
------
Step:13, Action:East
State  107
Old Q Values:  [-252.35169558  411.65661758  437.5434114  -252.78192178]
New Q values:  [-252.35169558  411.65661758  320.5027625  -252.78192178]
Reward: -1  Episode Reward:  7
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   332.8133399    486.95132648]
------
Step:14, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   332.8133399    486.95132648]
New Q values:  [ -253.44886264 -1902.20915811   332.8133399    410.45568955]
Reward: -1  Episode Reward:  6
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  720.91719652  629.31852411 -120.29354603]
------
Step:15, Action:South
State  111
Old Q Values:  [-177.44732869  720.91719652  629.31852411 -120.29354603]
New Q values:  [-177.44732869 1326.8185317   629.31852411 -120.29354603]
Reward: 9  Episode Reward:  15
xxxxx
x   x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1597.07502264 3443.50551032    0.        ]
------
Step:16, Action:East
State  187
Old Q Values:  [429.84695601   0.         526.18496922   0.        ]
New Q values:  [429.84695601   0.         914.65732325   0.        ]
Reward: 9  Episode Reward:  24
xxxxx
x   x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577  2329.27778519     0.        ]
------
Step:17, Action:East
State  203
Old Q Values:  [3.60604218e+00 2.71373337e+04 3.38606836e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 2.71373337e+04 2.42409341e+03 0.00000000e+00]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1118.7863503  3567.55356402    0.          386.1281519 ]
------
Step:18, Action:North
State  216
Old Q Values:  [ 4945.99297661  1726.22110463 -8896.20691497  1988.16379862]
New Q values:  [ 2099.87973452  1726.22110463 -8896.20691497  1988.16379862]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  4.06941813e+02]
------
Step:19, Action:West
State  136
Old Q Values:  [ -170.77177351  2503.35511677 -2383.80019164   131.18974337]
New Q values:  [ -170.77177351  2503.35511677 -2383.80019164   121.30671647]
Reward: -1  Episode Reward:  21
xxxxx
x agx
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8868.36952603   231.43606375]
------
Step:20, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1571.84492898   334.98218705]
New Q values:  [-9594.56523706 -8069.05606225  1571.84492898   330.59086467]
Reward: -1  Episode Reward:  20
xxxxx
xag x
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  657.32663284   -8.57207238 -180.6       ]
------
Step:21, Action:South
State  108
Old Q Values:  [-8463.16477134  2214.13535882   724.20787443     0.        ]
New Q values:  [-8463.16477134  2865.30357707   724.20787443     0.        ]
Reward: -1  Episode Reward:  19
xxxxx
xg  x
xa  x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  6600.83144513  3369.21098184     0.        ]
------
Step:22, Action:South
State  188
Old Q Values:  [-6523.78898263  6600.83144513  3369.21098184     0.        ]
New Q values:  [-6523.78898263  4860.52504078  3369.21098184     0.        ]
Reward: 9  Episode Reward:  28
xxxxx
x   x
xg  x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1869.07142788 -2735.46306511  7382.64154243 -6102.86502307]
------
Step:23, Action:East
State  260
Old Q Values:  [ 1869.07142788 -2735.46306511  7382.64154243 -6102.86502307]
New Q values:  [ 1869.07142788 -2735.46306511  6427.73005359 -6102.86502307]
Reward: 9  Episode Reward:  37
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103 11564.24478873]
------
Step:24, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103 11564.24478873]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103 11857.92792379]
Reward: -1  Episode Reward:  36
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  586.10065265    26.73544252 24109.43336101   -35.88578819]
------
Step:25, Action:East
State  260
Old Q Values:  [ 1869.07142788 -2735.46306511  6427.73005359 -6102.86502307]
New Q values:  [ 1869.07142788 -2735.46306511  6127.87039857 -6102.86502307]
Reward: -1  Episode Reward:  35
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103 11857.92792379]
------
Step:26, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103 11857.92792379]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103 11975.40117782]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  586.10065265    26.73544252 24109.43336101   -35.88578819]
------
Step:27, Action:East
State  261
Old Q Values:  [  586.10065265    26.73544252 24109.43336101   -35.88578819]
New Q values:  [  586.10065265    26.73544252 13207.676756     -35.88578819]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  1.18816780e+04]
------
Step:28, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103 11975.40117782]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  8751.86349793]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  586.10065265    26.73544252 13207.676756     -35.88578819]
------
Step:29, Action:East
State  260
Old Q Values:  [ 1869.07142788 -2735.46306511  6127.87039857 -6102.86502307]
New Q values:  [ 1869.07142788 -2735.46306511  5076.10720881 -6102.86502307]
Reward: -1  Episode Reward:  31
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  8751.86349793]
------
Step:30, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  8751.86349793]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  5022.97756181]
Reward: -1  Episode Reward:  30
xxxxx
xg  x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1869.07142788 -2735.46306511  5076.10720881 -6102.86502307]
------
Step:31, Action:East
State  261
Old Q Values:  [  586.10065265    26.73544252 13207.676756     -35.88578819]
New Q values:  [ 586.10065265   26.73544252 6789.36397094  -35.88578819]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  5022.97756181]
------
Step:32, Action:West
State  277
Old Q Values:  [ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  1.18816780e+04]
New Q values:  [ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  6.78888041e+03]
Reward: -1  Episode Reward:  28
xxxxx
x  gx
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 586.10065265   26.73544252 6789.36397094  -35.88578819]
------
Step:33, Action:East
State  261
Old Q Values:  [ 586.10065265   26.73544252 6789.36397094  -35.88578819]
New Q values:  [ 586.10065265   26.73544252 4751.8097104   -35.88578819]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  6.78888041e+03]
------
Step:34, Action:West
State  273
Old Q Values:  [   870.35122762   -168.92307549   4644.37724943 108052.86322614]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 53345.16890082]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[31421.77885639  2256.66526474 33748.74536788  1875.31501677]
------
Step:35, Action:East
State  261
Old Q Values:  [ 586.10065265   26.73544252 4751.8097104   -35.88578819]
New Q values:  [ 586.10065265   26.73544252 3936.78800618  -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  6.78888041e+03]
------
Step:36, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 53345.16890082]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 31462.09117069]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[31421.77885639  2256.66526474 33748.74536788  1875.31501677]
------
Step:37, Action:East
State  261
Old Q Values:  [ 586.10065265   26.73544252 3936.78800618  -35.88578819]
New Q values:  [  586.10065265    26.73544252 -2918.99152898   -35.88578819]
Reward: -10001  Episode Reward:  -9977
xxxxx
x   x
x   x
x g.x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[2047.39983025 2362.04508063 2560.86190721 2436.62921946]
------
Step:1, Action:East
State  208
Old Q Values:  [2047.39983025 2362.04508063 2560.86190721 2436.62921946]
New Q values:  [2047.39983025 2362.04508063 1612.00333505 2436.62921946]
Reward: -301  Episode Reward:  -301
xxxxx
xg .x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[2047.39983025 2362.04508063 1612.00333505 2436.62921946]
------
Step:2, Action:West
State  208
Old Q Values:  [2047.39983025 2362.04508063 1612.00333505 2436.62921946]
New Q values:  [2047.39983025 2362.04508063 1612.00333505 2942.81396329]
Reward: 9  Episode Reward:  -292
xxxxx
x.g.x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.26548926e+03 6.54254092e+03 2.91043938e+03]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.26548926e+03 6.54254092e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 1.26548926e+03 3.49926056e+03 2.91043938e+03]
Reward: -1  Episode Reward:  -293
xxxxx
x. .x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[2047.39983025 2362.04508063 1612.00333505 2942.81396329]
------
Step:4, Action:South
State  208
Old Q Values:  [2047.39983025 2362.04508063 1612.00333505 2942.81396329]
New Q values:  [2047.39983025 2917.78105602 1612.00333505 2942.81396329]
Reward: 9  Episode Reward:  -284
xxxxx
x. .x
x. gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6558.54341258 -6442.16912869 -8192.20126966  1736.74310161]
------
Step:5, Action:West
State  288
Old Q Values:  [ 6558.54341258 -6442.16912869 -8192.20126966  1736.74310161]
New Q values:  [ 6558.54341258 -6442.16912869 -8192.20126966  2770.53273017]
Reward: 9  Episode Reward:  -275
xxxxx
x. .x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5595.93598914  6901.45163176]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5595.93598914  6901.45163176]
New Q values:  [-2527.46239811 -8521.23367799  5595.93598914 12919.5524927 ]
Reward: 9  Episode Reward:  -266
xxxxx
x. .x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[33845.23946664 15557.34758455  5576.40109469   644.94785455]
------
Step:7, Action:South
State  256
Old Q Values:  [33845.23946664 15557.34758455  5576.40109469   644.94785455]
New Q values:  [33845.23946664 10195.91087381  5576.40109469   644.94785455]
Reward: -10301  Episode Reward:  -10567
xxxxx
x. .x
x.  x
xg  x
xxxxx
Episode # 800
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  4.06941813e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  4.06941813e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.01140363e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   443.21212723]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   443.21212723]
New Q values:  [ -281.736      -1150.91067548   247.18533711   645.83919779]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1543.84782298  406.7066613  -180.6       ]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 1326.8185317   629.31852411 -120.29354603]
New Q values:  [-177.44732869 1569.17906578  629.31852411 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa .x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594 1597.07502264 3443.50551032    0.        ]
------
Step:4, Action:East
State  183
Old Q Values:  [ 877.23516594 1597.07502264 3443.50551032    0.        ]
New Q values:  [ 877.23516594 1597.07502264 9518.00231782    0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 2.71373337e+04 2.42409341e+03 0.00000000e+00]
------
Step:5, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  4.65122563e+03 -2.50318991e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  1.13045176e+04 -2.50318991e+03  2.00341972e+02]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4644.37724943 31462.09117069]
------
Step:6, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  5022.97756181]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  2190.42122052]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  586.10065265    26.73544252 -2918.99152898   -35.88578819]
------
Step:7, Action:North
State  261
Old Q Values:  [  586.10065265    26.73544252 -2918.99152898   -35.88578819]
New Q values:  [  368.50953664    26.73544252 -2918.99152898   -35.88578819]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[422.44659346 448.89758527 432.40094143 154.04646645]
------
Step:8, Action:South
State  183
Old Q Values:  [ 877.23516594 1597.07502264 9518.00231782    0.        ]
New Q values:  [ 877.23516594  748.78287005 9518.00231782    0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x  .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  368.50953664    26.73544252 -2918.99152898   -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [  368.50953664    26.73544252 -2918.99152898   -35.88578819]
New Q values:  [  281.47309024    26.73544252 -2918.99152898   -35.88578819]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[422.44659346 448.89758527 432.40094143 154.04646645]
------
Step:10, Action:South
State  181
Old Q Values:  [ 462.07147878  356.13872326 2005.90125165  -30.99112081]
New Q values:  [ 462.07147878  226.29741638 2005.90125165  -30.99112081]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  281.47309024    26.73544252 -2918.99152898   -35.88578819]
------
Step:11, Action:North
State  260
Old Q Values:  [ 1869.07142788 -2735.46306511  5076.10720881 -6102.86502307]
New Q values:  [-4604.16691272 -2735.46306511  5076.10720881 -6102.86502307]
Reward: -10001  Episode Reward:  -9961
xxxxx
x   x
xg .x
x  .x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  226.29741638 2005.90125165  -30.99112081]
------
Step:1, Action:East
State  189
Old Q Values:  [422.44659346 448.89758527 432.40094143 154.04646645]
New Q values:  [  422.44659346   448.89758527 14039.27244002   154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 46203.04021149 -2984.46350323  1099.96026581]
------
Step:2, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.26548926e+03 3.49926056e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 4.38746145e+03 3.49926056e+03 2.91043938e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5595.93598914 12919.5524927 ]
------
Step:3, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 31462.09117069]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 12674.67839535]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  281.47309024    26.73544252 -2918.99152898   -35.88578819]
------
Step:4, Action:North
State  261
Old Q Values:  [  281.47309024    26.73544252 -2918.99152898   -35.88578819]
New Q values:  [  713.75961159    26.73544252 -2918.99152898   -35.88578819]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  226.29741638 2005.90125165  -30.99112081]
------
Step:5, Action:East
State  177
Old Q Values:  [24286.95305781 19270.74827372 63420.31360511     0.        ]
New Q values:  [24286.95305781 19270.74827372 39228.43750549     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 46203.04021149 -2984.46350323  1099.96026581]
------
Step:6, Action:South
State  192
Old Q Values:  [3.89777037e-01 4.38746145e+03 3.49926056e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 5.63025033e+03 3.49926056e+03 2.91043938e+03]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5595.93598914 12919.5524927 ]
------
Step:7, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 12674.67839535]
New Q values:  [ 870.35122762 -168.92307549 4644.37724943 5283.39924162]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  713.75961159    26.73544252 -2918.99152898   -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [  713.75961159    26.73544252 -2918.99152898   -35.88578819]
New Q values:  [ 3140.30453998    26.73544252 -2918.99152898   -35.88578819]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  748.78287005 9518.00231782    0.        ]
------
Step:9, Action:East
State  177
Old Q Values:  [24286.95305781 19270.74827372 39228.43750549     0.        ]
New Q values:  [24286.95305781 19270.74827372 29551.68706564     0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 46203.04021149 -2984.46350323  1099.96026581]
------
Step:10, Action:South
State  192
Old Q Values:  [3.89777037e-01 5.63025033e+03 3.49926056e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 6.12736588e+03 3.49926056e+03 2.91043938e+03]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5595.93598914 12919.5524927 ]
------
Step:11, Action:West
State  273
Old Q Values:  [ 870.35122762 -168.92307549 4644.37724943 5283.39924162]
New Q values:  [ 870.35122762 -168.92307549 4644.37724943 3054.85105864]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3140.30453998    26.73544252 -2918.99152898   -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [ 3140.30453998    26.73544252 -2918.99152898   -35.88578819]
New Q values:  [ 4110.92251134    26.73544252 -2918.99152898   -35.88578819]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  748.78287005 9518.00231782    0.        ]
------
Step:13, Action:East
State  183
Old Q Values:  [ 877.23516594  748.78287005 9518.00231782    0.        ]
New Q values:  [ 877.23516594  748.78287005 5431.33110949    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  5.41576727e+03  1.20371620e+03]
------
Step:14, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.12736588e+03 3.49926056e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 6.12736588e+03 2.28794841e+03 2.91043938e+03]
Reward: 9  Episode Reward:  26
xxxxx
x.. x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[2047.39983025 2917.78105602 1612.00333505 2942.81396329]
------
Step:15, Action:South
State  210
Old Q Values:  [40226.97557565 -1977.69291708  3961.62714931  2663.84270475]
New Q values:  [40226.97557565  1181.88585694  3961.62714931  2663.84270475]
Reward: 9  Episode Reward:  35
xxxxx
x.. x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6558.54341258 -6442.16912869 -8192.20126966  2770.53273017]
------
Step:16, Action:North
State  288
Old Q Values:  [ 6558.54341258 -6442.16912869 -8192.20126966  2770.53273017]
New Q values:  [14690.91003772 -6442.16912869 -8192.20126966  2770.53273017]
Reward: -1  Episode Reward:  34
xxxxx
x.. x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[40226.97557565  1181.88585694  3961.62714931  2663.84270475]
------
Step:17, Action:North
State  210
Old Q Values:  [40226.97557565  1181.88585694  3961.62714931  2663.84270475]
New Q values:  [42935.40207803  1181.88585694  3961.62714931  2663.84270475]
Reward: -1  Episode Reward:  33
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 24312.8833164   -180.00807518 89484.03949258]
------
Step:18, Action:West
State  130
Old Q Values:  [46177.80406237 24312.8833164   -180.00807518 89484.03949258]
New Q values:  [46177.80406237 24312.8833164   -180.00807518 75646.47058867]
Reward: 9  Episode Reward:  42
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.32824849e+05]
------
Step:19, Action:West
State  115
Old Q Values:  [-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.32824849e+05]
New Q values:  [-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.30002579e+05]
Reward: 100009  Episode Reward:  100051
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.01140363e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.01140363e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.19607905e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   645.83919779]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   645.83919779]
New Q values:  [ -281.736      -1150.91067548   247.18533711   726.89002601]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1543.84782298  406.7066613  -180.6       ]
------
Step:3, Action:East
State  110
Old Q Values:  [-239.29051573 1543.84782298  406.7066613  -180.6       ]
New Q values:  [-239.29051573 1543.84782298  380.14967232 -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   726.89002601]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   726.89002601]
New Q values:  [ -281.736      -1150.91067548   247.18533711   753.3103573 ]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1543.84782298  380.14967232 -180.6       ]
------
Step:5, Action:East
State  107
Old Q Values:  [-252.35169558  411.65661758  320.5027625  -252.78192178]
New Q values:  [-252.35169558  411.65661758  353.59421219 -252.78192178]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   753.3103573 ]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   753.3103573 ]
New Q values:  [ -281.736      -1150.91067548   247.18533711   771.47786265]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1569.17906578  629.31852411 -120.29354603]
------
Step:7, Action:South
State  110
Old Q Values:  [-239.29051573 1543.84782298  380.14967232 -180.6       ]
New Q values:  [-239.29051573 1667.66026202  380.14967232 -180.6       ]
Reward: 9  Episode Reward:  23
xxxxx
x   x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3482.40377611     0.        ]
------
Step:8, Action:East
State  183
Old Q Values:  [ 877.23516594  748.78287005 5431.33110949    0.        ]
New Q values:  [ 877.23516594  748.78287005 4121.4356018     0.        ]
Reward: 9  Episode Reward:  32
xxxxx
x   x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -6.13340328e+03  6.47834386e+03  0.00000000e+00]
------
Step:9, Action:East
State  200
Old Q Values:  [  62.8218634  2488.93790922 1830.98954698 1141.49622464]
New Q values:  [  62.8218634  2488.93790922 1367.75973915 1141.49622464]
Reward: 9  Episode Reward:  41
xxxxx
x   x
x gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2099.87973452  1726.22110463 -8896.20691497  1988.16379862]
------
Step:10, Action:North
State  216
Old Q Values:  [ 2099.87973452  1726.22110463 -8896.20691497  1988.16379862]
New Q values:  [ 1590.35842884  1726.22110463 -8896.20691497  1988.16379862]
Reward: -1  Episode Reward:  40
xxxxx
x gax
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2503.35511677 -2383.80019164   121.30671647]
------
Step:11, Action:South
State  136
Old Q Values:  [ -170.77177351  2503.35511677 -2383.80019164   121.30671647]
New Q values:  [ -170.77177351  1597.1911863  -2383.80019164   121.30671647]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
x  ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1590.35842884  1726.22110463 -8896.20691497  1988.16379862]
------
Step:12, Action:West
State  216
Old Q Values:  [ 1590.35842884  1726.22110463 -8896.20691497  1988.16379862]
New Q values:  [ 1590.35842884  1726.22110463 -8896.20691497  4186.02080004]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.13045176e+04 -2.50318991e+03  2.00341972e+02]
------
Step:13, Action:South
State  203
Old Q Values:  [3.60604218e+00 2.71373337e+04 2.42409341e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 1.22476467e+04 2.42409341e+03 0.00000000e+00]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 3054.85105864]
------
Step:14, Action:West
State  277
Old Q Values:  [ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  6.78888041e+03]
New Q values:  [ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  3.95422892e+03]
Reward: 9  Episode Reward:  46
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4110.92251134    26.73544252 -2918.99152898   -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [ 4110.92251134    26.73544252 -2918.99152898   -35.88578819]
New Q values:  [ 5855.55073654    26.73544252 -2918.99152898   -35.88578819]
Reward: -1  Episode Reward:  45
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  422.44659346   448.89758527 14039.27244002   154.04646645]
------
Step:16, Action:East
State  189
Old Q Values:  [  422.44659346   448.89758527 14039.27244002   154.04646645]
New Q values:  [ 422.44659346  448.89758527 5779.20583487  154.04646645]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[  0.         546.98952955   0.         198.38683706]
------
Step:17, Action:South
State  205
Old Q Values:  [  0.         546.98952955   0.         198.38683706]
New Q values:  [   0.         1404.46448665    0.          198.38683706]
Reward: -1  Episode Reward:  43
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  3.95422892e+03]
------
Step:18, Action:West
State  277
Old Q Values:  [ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  3.95422892e+03]
New Q values:  [ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  3.33775679e+03]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5855.55073654    26.73544252 -2918.99152898   -35.88578819]
------
Step:19, Action:North
State  261
Old Q Values:  [ 5855.55073654    26.73544252 -2918.99152898   -35.88578819]
New Q values:  [ 4075.38204508    26.73544252 -2918.99152898   -35.88578819]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346  448.89758527 5779.20583487  154.04646645]
------
Step:20, Action:South
State  188
Old Q Values:  [-6523.78898263  4860.52504078  3369.21098184     0.        ]
New Q values:  [-6523.78898263  3466.44217895  3369.21098184     0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4604.16691272 -2735.46306511  5076.10720881 -6102.86502307]
------
Step:21, Action:East
State  260
Old Q Values:  [-4604.16691272 -2735.46306511  5076.10720881 -6102.86502307]
New Q values:  [-4604.16691272 -2735.46306511  2686.96924968 -6102.86502307]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  2190.42122052]
------
Step:22, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  2190.42122052]
New Q values:  [   16.82637525 -5807.06396197  2053.23004103  1681.65926311]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4604.16691272 -2735.46306511  2686.96924968 -6102.86502307]
------
Step:23, Action:East
State  260
Old Q Values:  [-4604.16691272 -2735.46306511  2686.96924968 -6102.86502307]
New Q values:  [-4604.16691272 -2735.46306511  1690.15671218 -6102.86502307]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  1681.65926311]
------
Step:24, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5595.93598914 12919.5524927 ]
New Q values:  [-2527.46239811 -8521.23367799 66651.04740697 12919.5524927 ]
Reward: 100009  Episode Reward:  100046
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14690.91003772 -6442.16912869 -8192.20126966  2770.53273017]
------
Step:1, Action:North
State  288
Old Q Values:  [14690.91003772 -6442.16912869 -8192.20126966  2770.53273017]
New Q values:  [ 6764.60820408 -6442.16912869 -8192.20126966  2770.53273017]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[2047.39983025 2917.78105602 1612.00333505 2942.81396329]
------
Step:2, Action:West
State  208
Old Q Values:  [2047.39983025 2917.78105602 1612.00333505 2942.81396329]
New Q values:  [2047.39983025 2917.78105602 1612.00333505 3020.73534902]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.12736588e+03 2.28794841e+03 2.91043938e+03]
------
Step:3, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.12736588e+03 2.28794841e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.24516606e+04 2.28794841e+03 2.91043938e+03]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 66651.04740697 12919.5524927 ]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 66651.04740697 12919.5524927 ]
New Q values:  [-2527.46239811 -8521.23367799 28689.20142401 12919.5524927 ]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6764.60820408 -6442.16912869 -8192.20126966  2770.53273017]
------
Step:5, Action:North
State  288
Old Q Values:  [ 6764.60820408 -6442.16912869 -8192.20126966  2770.53273017]
New Q values:  [ 3611.46388634 -6442.16912869 -8192.20126966  2770.53273017]
Reward: -1  Episode Reward:  25
xxxxx
x. gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[2047.39983025 2917.78105602 1612.00333505 3020.73534902]
------
Step:6, Action:West
State  208
Old Q Values:  [2047.39983025 2917.78105602 1612.00333505 3020.73534902]
New Q values:  [ 2047.39983025  2917.78105602  1612.00333505 15068.60620306]
Reward: -1  Episode Reward:  24
xxxxx
x. .x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 46203.04021149 -2984.46350323  1099.96026581]
------
Step:7, Action:South
State  195
Old Q Values:  [   38.85388605  7251.41441546 15831.840814    1101.59744825]
New Q values:  [   38.85388605  4293.27894101 15831.840814    1101.59744825]
Reward: -1  Episode Reward:  23
xxxxx
x. .x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 3054.85105864]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 28689.20142401 12919.5524927 ]
New Q values:  [-2527.46239811 -8521.23367799 28689.20142401 15297.84460744]
Reward: 9  Episode Reward:  32
xxxxx
x. .x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[31421.77885639  2256.66526474 33748.74536788  1875.31501677]
------
Step:9, Action:North
State  257
Old Q Values:  [31421.77885639  2256.66526474 33748.74536788  1875.31501677]
New Q values:  [21439.61766225  2256.66526474 33748.74536788  1875.31501677]
Reward: 9  Episode Reward:  41
xxxxx
x. .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[24286.95305781 19270.74827372 29551.68706564     0.        ]
------
Step:10, Action:North
State  180
Old Q Values:  [  221.7991334   2162.68172044  2081.18692984 -4966.32149798]
New Q values:  [  270.3916271   2162.68172044  2081.18692984 -4966.32149798]
Reward: 9  Episode Reward:  50
xxxxx
xa .x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        587.5732458    5.16      -180.6      ]
------
Step:11, Action:East
State  103
Old Q Values:  [ 221.30610858 2733.85253197    5.4           0.        ]
New Q values:  [ 221.30610858 2733.85253197  416.70357267    0.        ]
Reward: -1  Episode Reward:  49
xxxxx
x a.x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[-281.736      1383.81190889    0.            0.        ]
------
Step:12, Action:West
State  118
Old Q Values:  [-281.736      1383.81190889    0.            0.        ]
New Q values:  [-281.736      1383.81190889    0.          819.55575959]
Reward: -1  Episode Reward:  48
xxxxx
xa .x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2733.85253197  416.70357267    0.        ]
------
Step:13, Action:South
State  103
Old Q Values:  [ 221.30610858 2733.85253197  416.70357267    0.        ]
New Q values:  [ 221.30610858 1694.71138828  416.70357267    0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x  .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  226.29741638 2005.90125165  -30.99112081]
------
Step:14, Action:North
State  180
Old Q Values:  [  270.3916271   2162.68172044  2081.18692984 -4966.32149798]
New Q values:  [  283.82862458  2162.68172044  2081.18692984 -4966.32149798]
Reward: -1  Episode Reward:  46
xxxxx
xa .x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        587.5732458    5.16      -180.6      ]
------
Step:15, Action:East
State  103
Old Q Values:  [ 221.30610858 1694.71138828  416.70357267    0.        ]
New Q values:  [ 221.30610858 1694.71138828  581.22500173    0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x a.x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[-281.736      1383.81190889    0.          819.55575959]
------
Step:16, Action:West
State  118
Old Q Values:  [-281.736      1383.81190889    0.          819.55575959]
New Q values:  [-281.736      1383.81190889    0.          503.49427758]
Reward: -1  Episode Reward:  44
xxxxx
xa .x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        587.5732458    5.16      -180.6      ]
------
Step:17, Action:East
State  100
Old Q Values:  [ 0.00000000e+00  1.69785211e+03 -6.00000000e-01  0.00000000e+00]
New Q values:  [ 0.00000000e+00  1.69785211e+03 -8.40000000e-01  0.00000000e+00]
Reward: -1  Episode Reward:  43
xxxxx
xga.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:NW
[0. 0. 0. 0.]
------
Step:18, Action:North
State  118
Old Q Values:  [-281.736      1383.81190889    0.          503.49427758]
New Q values:  [ 121.84917267 1383.81190889    0.          503.49427758]
Reward: -301  Episode Reward:  -258
xxxxx
x a.x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[ 121.84917267 1383.81190889    0.          503.49427758]
------
Step:19, Action:South
State  118
Old Q Values:  [ 121.84917267 1383.81190889    0.          503.49427758]
New Q values:  [ 121.84917267 2496.42792156    0.          503.49427758]
Reward: -1  Episode Reward:  -259
xxxxx
x  .x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -6.13340328e+03  6.47834386e+03  0.00000000e+00]
------
Step:20, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -6.13340328e+03  6.47834386e+03  0.00000000e+00]
New Q values:  [-2.78872080e-01 -6.13340328e+03  1.54713582e+04  0.00000000e+00]
Reward: -1  Episode Reward:  -260
xxxxx
x  .x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[42935.40207803  1181.88585694  3961.62714931  2663.84270475]
------
Step:21, Action:North
State  208
Old Q Values:  [ 2047.39983025  2917.78105602  1612.00333505 15068.60620306]
New Q values:  [83518.3011087   2917.78105602  1612.00333505 15068.60620306]
Reward: 100009  Episode Reward:  99749
xxxxx
x  ax
x g x
x   x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 28689.20142401 15297.84460744]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 28689.20142401 15297.84460744]
New Q values:  [-2527.46239811 -8521.23367799 12564.51973551 15297.84460744]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3611.46388634 -6442.16912869 -8192.20126966  2770.53273017]
------
Step:2, Action:North
State  288
Old Q Values:  [ 3611.46388634 -6442.16912869 -8192.20126966  2770.53273017]
New Q values:  [26505.47588714 -6442.16912869 -8192.20126966  2770.53273017]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[83518.3011087   2917.78105602  1612.00333505 15068.60620306]
------
Step:3, Action:North
State  208
Old Q Values:  [83518.3011087   2917.78105602  1612.00333505 15068.60620306]
New Q values:  [56106.66162008  2917.78105602  1612.00333505 15068.60620306]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 24312.8833164   -180.00807518 75646.47058867]
------
Step:4, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.19607905e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  2.56379869e+02]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x .gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   332.8133399    410.45568955]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   332.8133399    410.45568955]
New Q values:  [ -253.44886264 -1902.20915811   332.8133399    293.07926109]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x . x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  411.65661758  353.59421219 -252.78192178]
------
Step:6, Action:South
State  107
Old Q Values:  [-252.35169558  411.65661758  353.59421219 -252.78192178]
New Q values:  [-252.35169558  765.83302253  353.59421219 -252.78192178]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xa.gx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  226.29741638 2005.90125165  -30.99112081]
------
Step:7, Action:East
State  185
Old Q Values:  [ 476.02888691    0.         1322.13341882 -178.98      ]
New Q values:  [ 476.02888691    0.         3925.60864811 -178.98      ]
Reward: 9  Episode Reward:  53
xxxxx
x  gx
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.13045176e+04 -2.50318991e+03  2.00341972e+02]
------
Step:8, Action:South
State  200
Old Q Values:  [  62.8218634  2488.93790922 1367.75973915 1141.49622464]
New Q values:  [  62.8218634  5584.32854592 1367.75973915 1141.49622464]
Reward: -1  Episode Reward:  52
xxxxx
x g x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 12564.51973551 15297.84460744]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 12564.51973551 15297.84460744]
New Q values:  [-2527.46239811 -8521.23367799 12564.51973551 76278.10968297]
Reward: 100009  Episode Reward:  100061
xxxxx
xg  x
x   x
xa  x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[26505.47588714 -6442.16912869 -8192.20126966  2770.53273017]
------
Step:1, Action:North
State  288
Old Q Values:  [26505.47588714 -6442.16912869 -8192.20126966  2770.53273017]
New Q values:  [21439.58884088 -6442.16912869 -8192.20126966  2770.53273017]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. gx
x.. x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4075.38204508    26.73544252 -2918.99152898   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [ 4075.38204508    26.73544252 -2918.99152898   -35.88578819]
New Q values:  [ 2871.98349857    26.73544252 -2918.99152898   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  748.78287005 4121.4356018     0.        ]
------
Step:2, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  3482.40377611     0.        ]
New Q values:  [    0.         -5969.29177534  3017.09169281     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  5.41576727e+03  1.20371620e+03]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.24516606e+04 2.28794841e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.24516606e+04 1.77525779e+04 2.91043938e+03]
Reward: 9  Episode Reward:  17
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[56106.66162008  2917.78105602  1612.00333505 15068.60620306]
------
Step:4, Action:North
State  210
Old Q Values:  [42935.40207803  1181.88585694  3961.62714931  2663.84270475]
New Q values:  [39873.50200781  1181.88585694  3961.62714931  2663.84270475]
Reward: 9  Episode Reward:  26
xxxxx
x..ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 24312.8833164   -180.00807518 75646.47058867]
------
Step:5, Action:West
State  130
Old Q Values:  [46177.80406237 24312.8833164   -180.00807518 75646.47058867]
New Q values:  [46177.80406237 24312.8833164   -180.00807518 66386.60719816]
Reward: 9  Episode Reward:  35
xxxxx
x.a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 120408.72987564]
------
Step:6, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   332.8133399    293.07926109]
New Q values:  [ -253.44886264 -1902.20915811   332.8133399    593.38542417]
Reward: 9  Episode Reward:  44
xxxxx
xa  x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1569.17906578  629.31852411 -120.29354603]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869 1569.17906578  629.31852411 -120.29354603]
New Q values:  [-177.44732869  864.71293249  629.31852411 -120.29354603]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 792.13768726 321.41835844   0.        ]
------
Step:8, Action:South
State  191
Old Q Values:  [  3.06655861 792.13768726 321.41835844   0.        ]
New Q values:  [   3.06655861 1177.85012448  321.41835844    0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x   x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2871.98349857    26.73544252 -2918.99152898   -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [ 2871.98349857    26.73544252 -2918.99152898   -35.88578819]
New Q values:  [ 1501.54843677    26.73544252 -2918.99152898   -35.88578819]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[   3.06655861 1177.85012448  321.41835844    0.        ]
------
Step:10, Action:South
State  190
Old Q Values:  [ 1.04129094e+00 -5.70379540e+03  1.79487036e+03  0.00000000e+00]
New Q values:  [ 1.04129094e+00 -7.77507115e+03  1.79487036e+03  0.00000000e+00]
Reward: -10001  Episode Reward:  -9960
xxxxx
x   x
x   x
xg..x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  226.29741638 2005.90125165  -30.99112081]
------
Step:1, Action:East
State  180
Old Q Values:  [  283.82862458  2162.68172044  2081.18692984 -4966.32149798]
New Q values:  [  283.82862458  2162.68172044  2009.3049168  -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  3904.76714955  2474.1699074    239.04887894]
------
Step:2, Action:South
State  196
Old Q Values:  [-2469.90645144  3904.76714955  2474.1699074    239.04887894]
New Q values:  [-2469.90645144  2183.27587213  2474.1699074    239.04887894]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  2053.23004103  1681.65926311]
------
Step:3, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  2053.23004103  1681.65926311]
New Q values:  [   16.82637525 -5807.06396197  7258.56866868  1681.65926311]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
x  .x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21439.58884088 -6442.16912869 -8192.20126966  2770.53273017]
------
Step:4, Action:North
State  288
Old Q Values:  [21439.58884088 -6442.16912869 -8192.20126966  2770.53273017]
New Q values:  [25413.23402238 -6442.16912869 -8192.20126966  2770.53273017]
Reward: 9  Episode Reward:  36
xxxxx
x. .x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[56106.66162008  2917.78105602  1612.00333505 15068.60620306]
------
Step:5, Action:North
State  210
Old Q Values:  [39873.50200781  1181.88585694  3961.62714931  2663.84270475]
New Q values:  [35870.78296257  1181.88585694  3961.62714931  2663.84270475]
Reward: 9  Episode Reward:  45
xxxxx
x. ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 24312.8833164   -180.00807518 66386.60719816]
------
Step:6, Action:West
State  130
Old Q Values:  [46177.80406237 24312.8833164   -180.00807518 66386.60719816]
New Q values:  [46177.80406237 24312.8833164   -180.00807518 62676.66184196]
Reward: -1  Episode Reward:  44
xxxxx
x.a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 120408.72987564]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   771.47786265]
New Q values:  [ -281.736      -1150.91067548   247.18533711   543.74105182]
Reward: 9  Episode Reward:  53
xxxxx
xa  x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253  353.59421219 -252.78192178]
------
Step:8, Action:South
State  105
Old Q Values:  [-180.6        1626.4878578    65.14560537    0.        ]
New Q values:  [-180.6        1827.67773756   65.14560537    0.        ]
Reward: -1  Episode Reward:  52
xxxxx
x g x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 476.02888691    0.         3925.60864811 -178.98      ]
------
Step:9, Action:East
State  185
Old Q Values:  [ 476.02888691    0.         3925.60864811 -178.98      ]
New Q values:  [  476.02888691     0.         -2755.05797698  -178.98      ]
Reward: -10001  Episode Reward:  -9949
xxxxx
x   x
x g x
x.  x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4604.16691272 -2735.46306511  1690.15671218 -6102.86502307]
------
Step:1, Action:East
State  261
Old Q Values:  [ 1501.54843677    26.73544252 -2918.99152898   -35.88578819]
New Q values:  [1501.54843677   26.73544252 1015.37398901  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  7258.56866868  1681.65926311]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 12564.51973551 76278.10968297]
New Q values:  [-2527.46239811 -8521.23367799 12655.17810092 76278.10968297]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[25413.23402238 -6442.16912869 -8192.20126966  2770.53273017]
------
Step:3, Action:North
State  288
Old Q Values:  [25413.23402238 -6442.16912869 -8192.20126966  2770.53273017]
New Q values:  [20931.92849772 -6442.16912869 -8192.20126966  2770.53273017]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x..ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[35870.78296257  1181.88585694  3961.62714931  2663.84270475]
------
Step:4, Action:North
State  210
Old Q Values:  [35870.78296257  1181.88585694  3961.62714931  2663.84270475]
New Q values:  [14430.62714565  1181.88585694  3961.62714931  2663.84270475]
Reward: 9  Episode Reward:  36
xxxxx
x .ax
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  2.56379869e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  2.56379869e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  2.71074263e+02]
Reward: 9  Episode Reward:  45
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   543.74105182]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   543.74105182]
New Q values:  [ -281.736      -1150.91067548   247.18533711   332.61309899]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   385.72226087  -180.6       ]
------
Step:7, Action:East
State  107
Old Q Values:  [-252.35169558  765.83302253  353.59421219 -252.78192178]
New Q values:  [-252.35169558  765.83302253  240.62161457 -252.78192178]
Reward: -1  Episode Reward:  43
xxxxx
x a x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   332.61309899]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   332.61309899]
New Q values:  [ -281.736      -1150.91067548   247.18533711   248.16191785]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -8952.15415062   385.72226087  -180.6       ]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558  765.83302253  240.62161457 -252.78192178]
New Q values:  [-252.35169558  765.83302253  170.09722119 -252.78192178]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   248.16191785]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   248.16191785]
New Q values:  [ -281.736      -1150.91067548   247.18533711   328.4146739 ]
Reward: -1  Episode Reward:  40
xxxxx
xa  x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253  170.09722119 -252.78192178]
------
Step:11, Action:South
State  109
Old Q Values:  [-241.10880094  657.32663284   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  870.10102863   -8.57207238 -180.6       ]
Reward: 9  Episode Reward:  49
xxxxx
x g x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  226.29741638 2005.90125165  -30.99112081]
------
Step:12, Action:East
State  176
Old Q Values:  [76485.61294353  1621.55095326 79434.4219283      0.        ]
New Q values:  [76485.61294353  1621.55095326 98514.66694343     0.        ]
Reward: 100009  Episode Reward:  100058
xxxxx
xg  x
x a x
x   x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   332.8133399    593.38542417]
------
Step:1, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   332.8133399    593.38542417]
New Q values:  [ -253.44886264 -1902.20915811   332.8133399    472.50407643]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253  170.09722119 -252.78192178]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  864.71293249  629.31852411 -120.29354603]
New Q values:  [-177.44732869  953.05554849  629.31852411 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  226.29741638 2005.90125165  -30.99112081]
------
Step:3, Action:East
State  189
Old Q Values:  [ 422.44659346  448.89758527 5779.20583487  154.04646645]
New Q values:  [ 422.44659346  448.89758527 5708.43761454  154.04646645]
Reward: 9  Episode Reward:  27
xxxxx
x  gx
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.13045176e+04 -2.50318991e+03  2.00341972e+02]
------
Step:4, Action:South
State  197
Old Q Values:  [ 1.32443385e-01  1.36149628e+03 -4.51080211e+03  4.03062559e+02]
New Q values:  [ 1.32443385e-01  1.55132555e+03 -4.51080211e+03  4.03062559e+02]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  3.33775679e+03]
------
Step:5, Action:West
State  277
Old Q Values:  [ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  3.33775679e+03]
New Q values:  [ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  1.79096725e+03]
Reward: 9  Episode Reward:  45
xxxxx
x  gx
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1501.54843677   26.73544252 1015.37398901  -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [1501.54843677   26.73544252 1015.37398901  -35.88578819]
New Q values:  [1201.7897502    26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  44
xxxxx
x  .x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  226.29741638 2005.90125165  -30.99112081]
------
Step:7, Action:East
State  189
Old Q Values:  [ 422.44659346  448.89758527 5708.43761454  154.04646645]
New Q values:  [ 422.44659346  448.89758527 2704.11439181  154.04646645]
Reward: -1  Episode Reward:  43
xxxxx
x  gx
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[   0.         1404.46448665    0.          198.38683706]
------
Step:8, Action:South
State  196
Old Q Values:  [-2469.90645144  2183.27587213  2474.1699074    239.04887894]
New Q values:  [-2469.90645144  3050.28094946  2474.1699074    239.04887894]
Reward: -1  Episode Reward:  42
xxxxx
x g.x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  7258.56866868  1681.65926311]
------
Step:9, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  7258.56866868  1681.65926311]
New Q values:  [   16.82637525 -5807.06396197  9188.40601679  1681.65926311]
Reward: 9  Episode Reward:  51
xxxxx
x  .x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20931.92849772 -6442.16912869 -8192.20126966  2770.53273017]
------
Step:10, Action:North
State  288
Old Q Values:  [20931.92849772 -6442.16912869 -8192.20126966  2770.53273017]
New Q values:  [12701.35954278 -6442.16912869 -8192.20126966  2770.53273017]
Reward: -1  Episode Reward:  50
xxxxx
x  .x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[14430.62714565  1181.88585694  3961.62714931  2663.84270475]
------
Step:11, Action:North
State  210
Old Q Values:  [14430.62714565  1181.88585694  3961.62714931  2663.84270475]
New Q values:  [84580.64941085  1181.88585694  3961.62714931  2663.84270475]
Reward: 100009  Episode Reward:  100059
xxxxx
x  ax
x   x
xg  x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  870.10102863   -8.57207238 -180.6       ]
------
Step:1, Action:South
State  108
Old Q Values:  [-8463.16477134  2865.30357707   724.20787443     0.        ]
New Q values:  [-8463.16477134  1800.32594696   724.20787443     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  283.82862458  2162.68172044  2009.3049168  -4966.32149798]
------
Step:2, Action:South
State  181
Old Q Values:  [ 462.07147878  226.29741638 2005.90125165  -30.99112081]
New Q values:  [ 462.07147878  456.45589161 2005.90125165  -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
x g.x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1201.7897502    26.73544252 1015.37398901  -35.88578819]
------
Step:3, Action:North
State  260
Old Q Values:  [-4604.16691272 -2735.46306511  1690.15671218 -6102.86502307]
New Q values:  [-1193.46224895 -2735.46306511  1690.15671218 -6102.86502307]
Reward: -1  Episode Reward:  17
xxxxx
xg .x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  283.82862458  2162.68172044  2009.3049168  -4966.32149798]
------
Step:4, Action:South
State  180
Old Q Values:  [  283.82862458  2162.68172044  2009.3049168  -4966.32149798]
New Q values:  [  283.82862458  1371.51970183  2009.3049168  -4966.32149798]
Reward: -1  Episode Reward:  16
xxxxx
x  .x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1193.46224895 -2735.46306511  1690.15671218 -6102.86502307]
------
Step:5, Action:East
State  260
Old Q Values:  [-1193.46224895 -2735.46306511  1690.15671218 -6102.86502307]
New Q values:  [-1193.46224895 -2735.46306511  3437.98448991 -6102.86502307]
Reward: 9  Episode Reward:  25
xxxxx
x  .x
x ..x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  9188.40601679  1681.65926311]
------
Step:6, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  9188.40601679  1681.65926311]
New Q values:  [   16.82637525 -5807.06396197  7491.17026955  1681.65926311]
Reward: 9  Episode Reward:  34
xxxxx
x  .x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12701.35954278 -6442.16912869 -8192.20126966  2770.53273017]
------
Step:7, Action:North
State  288
Old Q Values:  [12701.35954278 -6442.16912869 -8192.20126966  2770.53273017]
New Q values:  [30460.13864037 -6442.16912869 -8192.20126966  2770.53273017]
Reward: 9  Episode Reward:  43
xxxxx
x  .x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[84580.64941085  1181.88585694  3961.62714931  2663.84270475]
------
Step:8, Action:North
State  208
Old Q Values:  [56106.66162008  2917.78105602  1612.00333505 15068.60620306]
New Q values:  [22529.38692694  2917.78105602  1612.00333505 15068.60620306]
Reward: 9  Episode Reward:  52
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  2.71074263e+02]
------
Step:9, Action:West
State  130
Old Q Values:  [46177.80406237 24312.8833164   -180.00807518 62676.66184196]
New Q values:  [46177.80406237 24312.8833164   -180.00807518 61192.68369948]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 120408.72987564]
------
Step:10, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1571.84492898   330.59086467]
New Q values:  [-9594.56523706 -8069.05606225  1571.84492898   392.66665446]
Reward: -1  Episode Reward:  50
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  870.10102863   -8.57207238 -180.6       ]
------
Step:11, Action:South
State  108
Old Q Values:  [-8463.16477134  1800.32594696   724.20787443     0.        ]
New Q values:  [-8463.16477134  1322.32185382   724.20787443     0.        ]
Reward: -1  Episode Reward:  49
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  283.82862458  1371.51970183  2009.3049168  -4966.32149798]
------
Step:12, Action:East
State  177
Old Q Values:  [24286.95305781 19270.74827372 29551.68706564     0.        ]
New Q values:  [24286.95305781 19270.74827372 78561.57299837     0.        ]
Reward: 100009  Episode Reward:  100058
xxxxx
x g x
x a x
x   x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  456.45589161 2005.90125165  -30.99112081]
------
Step:1, Action:East
State  181
Old Q Values:  [ 462.07147878  456.45589161 2005.90125165  -30.99112081]
New Q values:  [ 462.07147878  456.45589161 1543.25867277  -30.99112081]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x g x
x...x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  870.10102863   -8.57207238 -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869  953.05554849  629.31852411 -120.29354603]
New Q values:  [-177.44732869  849.59982123  629.31852411 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  456.45589161 1543.25867277  -30.99112081]
------
Step:2, Action:East
State  181
Old Q Values:  [ 462.07147878  456.45589161 1543.25867277  -30.99112081]
New Q values:  [ 462.07147878  456.45589161 4014.0587497   -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.13045176e+04 -2.50318991e+03  2.00341972e+02]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144  3050.28094946  2474.1699074    239.04887894]
New Q values:  [-2469.90645144  3472.86346065  2474.1699074    239.04887894]
Reward: 9  Episode Reward:  27
xxxxx
x g x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  7491.17026955  1681.65926311]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 12655.17810092 76278.10968297]
New Q values:  [-2527.46239811 -8521.23367799 14205.51283248 76278.10968297]
Reward: 9  Episode Reward:  36
xxxxx
x . x
x g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[30460.13864037 -6442.16912869 -8192.20126966  2770.53273017]
------
Step:5, Action:North
State  288
Old Q Values:  [30460.13864037 -6442.16912869 -8192.20126966  2770.53273017]
New Q values:  [13445.26169616 -6442.16912869 -8192.20126966  2770.53273017]
Reward: 9  Episode Reward:  45
xxxxx
x g x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1590.35842884  1726.22110463 -8896.20691497  4186.02080004]
------
Step:6, Action:West
State  208
Old Q Values:  [22529.38692694  2917.78105602  1612.00333505 15068.60620306]
New Q values:  [22529.38692694  2917.78105602  1612.00333505  6762.34065333]
Reward: -10001  Episode Reward:  -9956
xxxxx
x . x
x g x
x.  x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  748.78287005 4121.4356018     0.        ]
------
Step:1, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  3017.09169281     0.        ]
New Q values:  [    0.         -5969.29177534  2836.96685949     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  5.41576727e+03  1.20371620e+03]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.92643098e+03  5.41576727e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.92643098e+03  2.75459017e+04  1.20371620e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[84580.64941085  1181.88585694  3961.62714931  2663.84270475]
------
Step:3, Action:North
State  208
Old Q Values:  [22529.38692694  2917.78105602  1612.00333505  6762.34065333]
New Q values:  [27374.95988062  2917.78105602  1612.00333505  6762.34065333]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 24312.8833164   -180.00807518 61192.68369948]
------
Step:4, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  2.71074263e+02]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.62364487e+04]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 120408.72987564]
------
Step:5, Action:West
State  126
Old Q Values:  [   0.          331.64678262 2228.77882944 1055.33555892]
New Q values:  [   0.          331.64678262 2228.77882944  927.83230218]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1667.66026202  380.14967232 -180.6       ]
------
Step:6, Action:South
State  106
Old Q Values:  [ -180.6        -8952.15415062   385.72226087  -180.6       ]
New Q values:  [ -180.6        -5363.03361968   385.72226087  -180.6       ]
Reward: -10001  Episode Reward:  -9956
xxxxx
x   x
xg  x
x. .x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   328.4146739 ]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   328.4146739 ]
New Q values:  [ -281.736      -1150.91067548   247.18533711   637.06394817]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1667.66026202  380.14967232 -180.6       ]
------
Step:2, Action:East
State  111
Old Q Values:  [-177.44732869  849.59982123  629.31852411 -120.29354603]
New Q values:  [-177.44732869  849.59982123  442.24659409 -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   247.18533711   637.06394817]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   637.06394817]
New Q values:  [ -281.736      -1150.91067548   247.18533711   483.97548602]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253  170.09722119 -252.78192178]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869  849.59982123  442.24659409 -120.29354603]
New Q values:  [-177.44732869 1581.67060903  442.24659409 -120.29354603]
Reward: 9  Episode Reward:  16
xxxxx
x  .x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 877.23516594  748.78287005 4121.4356018     0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [ 877.23516594  748.78287005 4121.4356018     0.        ]
New Q values:  [ 877.23516594  748.78287005 2290.44630843    0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x  .x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  14.86214194 1982.97289372 2141.57355904 1915.70494401]
------
Step:6, Action:East
State  201
Old Q Values:  [ 2.33354578e+00  1.13045176e+04 -2.50318991e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  1.13045176e+04 -5.74006972e+03  2.00341972e+02]
Reward: -9991  Episode Reward:  -9976
xxxxx
x  .x
x  gx
x...x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  870.10102863   -8.57207238 -180.6       ]
------
Step:1, Action:South
State  108
Old Q Values:  [-8463.16477134  1322.32185382   724.20787443     0.        ]
New Q values:  [-8463.16477134  1137.12021657   724.20787443     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  283.82862458  1371.51970183  2009.3049168  -4966.32149798]
------
Step:2, Action:East
State  180
Old Q Values:  [  283.82862458  1371.51970183  2009.3049168  -4966.32149798]
New Q values:  [  283.82862458  1371.51970183  1850.98100491 -4966.32149798]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  3472.86346065  2474.1699074    239.04887894]
------
Step:3, Action:South
State  198
Old Q Values:  [-2.78872080e-01 -6.13340328e+03  1.54713582e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  7491.17026955  1681.65926311]
------
Step:4, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  7491.17026955  1681.65926311]
New Q values:  [   16.82637525 -5807.06396197  7035.44661667  1681.65926311]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
xg .x
x. ax
xxxxx
Step:5, Action:West
State  288
Old Q Values:  [13445.26169616 -6442.16912869 -8192.20126966  2770.53273017]
New Q values:  [13445.26169616 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  7035.44661667  1681.65926311]
------
Step:6, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  7035.44661667  1681.65926311]
New Q values:  [   16.82637525 -5807.06396197  6847.15715551  1681.65926311]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13445.26169616 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:7, Action:North
State  288
Old Q Values:  [13445.26169616 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [13595.99264265 -6442.16912869 -8192.20126966  3218.24707707]
Reward: 9  Episode Reward:  43
xxxxx
x  .x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27374.95988062  2917.78105602  1612.00333505  6762.34065333]
------
Step:8, Action:North
State  216
Old Q Values:  [ 1590.35842884  1726.22110463 -8896.20691497  4186.02080004]
New Q values:  [11512.47797191  1726.22110463 -8896.20691497  4186.02080004]
Reward: 9  Episode Reward:  52
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.62364487e+04]
------
Step:9, Action:West
State  130
Old Q Values:  [46177.80406237 24312.8833164   -180.00807518 61192.68369948]
New Q values:  [46177.80406237 24312.8833164   -180.00807518 60599.09244248]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 120408.72987564]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   483.97548602]
New Q values:  [ -281.736      -1150.91067548   247.18533711   308.70687267]
Reward: -1  Episode Reward:  50
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -5363.03361968   385.72226087  -180.6       ]
------
Step:11, Action:East
State  104
Old Q Values:  [-8652.84        6868.26603854   628.27198205 -8652.84      ]
New Q values:  [-8652.84        6868.26603854   722.26227152 -8652.84      ]
Reward: -1  Episode Reward:  49
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1571.84492898   392.66665446]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   247.18533711   308.70687267]
New Q values:  [ -281.736      -1150.91067548 10969.20873522   308.70687267]
Reward: -1  Episode Reward:  48
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.62364487e+04]
------
Step:13, Action:West
State  136
Old Q Values:  [ -170.77177351  1597.1911863  -2383.80019164   121.30671647]
New Q values:  [ -170.77177351  1597.1911863  -2383.80019164   519.47616528]
Reward: -1  Episode Reward:  47
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1571.84492898   392.66665446]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548 10969.20873522   308.70687267]
New Q values:  [ -281.736      -1150.91067548 15258.01809446   308.70687267]
Reward: -1  Episode Reward:  46
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.62364487e+04]
------
Step:15, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  3.62364487e+04]
New Q values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  1.90713849e+04]
Reward: -1  Episode Reward:  45
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 15258.01809446   308.70687267]
------
Step:16, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1571.84492898   392.66665446]
New Q values:  [-9594.56523706 -8069.05606225  1107.29532748   392.66665446]
Reward: -1  Episode Reward:  44
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1597.1911863  -2383.80019164   519.47616528]
------
Step:17, Action:South
State  138
Old Q Values:  [ 7.64171987e+01 -6.43132946e+02 -3.22965309e-01  1.90713849e+04]
New Q values:  [ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  1.90713849e+04]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[11512.47797191  1726.22110463 -8896.20691497  4186.02080004]
------
Step:18, Action:North
State  218
Old Q Values:  [1118.7863503  3567.55356402    0.          386.1281519 ]
New Q values:  [6168.33000877 3567.55356402    0.          386.1281519 ]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  1.90713849e+04]
------
Step:19, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  1.90713849e+04]
New Q values:  [ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  1.22053594e+04]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 15258.01809446   308.70687267]
------
Step:20, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548 15258.01809446   308.70687267]
New Q values:  [ -281.736      -1150.91067548  9764.21505374   308.70687267]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  1.22053594e+04]
------
Step:21, Action:West
State  130
Old Q Values:  [46177.80406237 24312.8833164   -180.00807518 60599.09244248]
New Q values:  [46177.80406237 24312.8833164   -180.00807518 60361.65593969]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 120408.72987564]
------
Step:22, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  9764.21505374   308.70687267]
New Q values:  [ -281.736      -1150.91067548  9764.21505374   238.59942733]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -5363.03361968   385.72226087  -180.6       ]
------
Step:23, Action:East
State  104
Old Q Values:  [-8652.84        6868.26603854   722.26227152 -8652.84      ]
New Q values:  [-8652.84        6868.26603854   620.49350685 -8652.84      ]
Reward: -1  Episode Reward:  37
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1107.29532748   392.66665446]
------
Step:24, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  9764.21505374   238.59942733]
New Q values:  [ -281.736      -1150.91067548  7566.69383746   238.59942733]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  1.22053594e+04]
------
Step:25, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  1.22053594e+04]
New Q values:  [ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  7.15155191e+03]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  7566.69383746   238.59942733]
------
Step:26, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  7566.69383746   238.59942733]
New Q values:  [ -281.736      -1150.91067548  5171.54310674   238.59942733]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  7.15155191e+03]
------
Step:27, Action:West
State  130
Old Q Values:  [46177.80406237 24312.8833164   -180.00807518 60361.65593969]
New Q values:  [46177.80406237 24312.8833164   -180.00807518 60266.68133857]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 120408.72987564]
------
Step:28, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  5171.54310674   238.59942733]
New Q values:  [ -281.736      -1150.91067548  5171.54310674   210.55644919]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -5363.03361968   385.72226087  -180.6       ]
------
Step:29, Action:East
State  107
Old Q Values:  [-252.35169558  765.83302253  170.09722119 -252.78192178]
New Q values:  [-252.35169558  765.83302253 1618.9018205  -252.78192178]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  5171.54310674   210.55644919]
------
Step:30, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  5171.54310674   210.55644919]
New Q values:  [ -281.736      -1150.91067548  4213.48281445   210.55644919]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  7.15155191e+03]
------
Step:31, Action:West
State  130
Old Q Values:  [46177.80406237 24312.8833164   -180.00807518 60266.68133857]
New Q values:  [46177.80406237 24312.8833164   -180.00807518 60228.69149812]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 120408.72987564]
------
Step:32, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  4213.48281445   210.55644919]
New Q values:  [ -281.736      -1150.91067548  4213.48281445   199.33925794]
Reward: -1  Episode Reward:  28
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -5363.03361968   385.72226087  -180.6       ]
------
Step:33, Action:East
State  104
Old Q Values:  [-8652.84        6868.26603854   620.49350685 -8652.84      ]
New Q values:  [-8652.84        6868.26603854   579.78600099 -8652.84      ]
Reward: -1  Episode Reward:  27
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1107.29532748   392.66665446]
------
Step:34, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  4213.48281445   199.33925794]
New Q values:  [ -281.736      -1150.91067548  3830.25869754   199.33925794]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  7.15155191e+03]
------
Step:35, Action:West
State  130
Old Q Values:  [46177.80406237 24312.8833164   -180.00807518 60228.69149812]
New Q values:  [46177.80406237 24312.8833164   -180.00807518 60213.49556194]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   28498.32154925 120408.72987564]
------
Step:36, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   28498.32154925 120408.72987564]
New Q values:  [ -180.6         3557.6642036  28498.32154925 69109.34753192]
Reward: -1  Episode Reward:  24
xxxxx
xa  x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SW
[    0.         43483.63171141 69821.51860553     0.        ]
------
Step:37, Action:East
State  107
Old Q Values:  [-252.35169558  765.83302253 1618.9018205  -252.78192178]
New Q values:  [-252.35169558  765.83302253 1796.03833746 -252.78192178]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3830.25869754   199.33925794]
------
Step:38, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   332.8133399    472.50407643]
New Q values:  [ -253.44886264 -1902.20915811  2277.99090771   472.50407643]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  7.15155191e+03]
------
Step:39, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  7.15155191e+03]
New Q values:  [ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  3.54341803e+03]
Reward: -1  Episode Reward:  21
xxxxx
x a x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2277.99090771   472.50407643]
------
Step:40, Action:East
State  121
Old Q Values:  [    0.             0.         -8868.36952603   231.43606375]
New Q values:  [    0.             0.         -9068.79045452   231.43606375]
Reward: -10001  Episode Reward:  -9980
xxxxx
x  gx
x   x
x.  x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1581.67060903  442.24659409 -120.29354603]
------
Step:1, Action:South
State  109
Old Q Values:  [-241.10880094  870.10102863   -8.57207238 -180.6       ]
New Q values:  [-241.10880094 1557.65803636   -8.57207238 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x .gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  456.45589161 4014.0587497   -30.99112081]
------
Step:2, Action:East
State  189
Old Q Values:  [ 422.44659346  448.89758527 2704.11439181  154.04646645]
New Q values:  [ 422.44659346  448.89758527 2762.3443205   154.04646645]
Reward: 9  Episode Reward:  18
xxxxx
x g.x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  5584.32854592 1367.75973915 1141.49622464]
------
Step:3, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.24516606e+04 1.77525779e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 3.18694971e+04 1.77525779e+04 2.91043938e+03]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 14205.51283248 76278.10968297]
------
Step:4, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  6847.15715551  1681.65926311]
New Q values:  [   16.82637525 -5807.06396197  6847.15715551  1709.45905222]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1193.46224895 -2735.46306511  3437.98448991 -6102.86502307]
------
Step:5, Action:East
State  260
Old Q Values:  [-1193.46224895 -2735.46306511  3437.98448991 -6102.86502307]
New Q values:  [-1193.46224895 -2735.46306511  3428.74094262 -6102.86502307]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  6847.15715551  1709.45905222]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 14205.51283248 76278.10968297]
New Q values:  [-2527.46239811 -8521.23367799  9766.40292579 76278.10968297]
Reward: 9  Episode Reward:  44
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13595.99264265 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:7, Action:North
State  288
Old Q Values:  [13595.99264265 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [13650.28502125 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  43
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27374.95988062  2917.78105602  1612.00333505  6762.34065333]
------
Step:8, Action:North
State  210
Old Q Values:  [84580.64941085  1181.88585694  3961.62714931  2663.84270475]
New Q values:  [51901.70843292  1181.88585694  3961.62714931  2663.84270475]
Reward: 9  Episode Reward:  52
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 24312.8833164   -180.00807518 60213.49556194]
------
Step:9, Action:West
State  130
Old Q Values:  [46177.80406237 24312.8833164   -180.00807518 60213.49556194]
New Q values:  [ 46177.80406237  24312.8833164    -180.00807518 104823.60248435]
Reward: 100009  Episode Reward:  100061
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  3.54341803e+03]
------
Step:1, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  3.54341803e+03]
New Q values:  [ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  2.57184482e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3830.25869754   199.33925794]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3830.25869754   199.33925794]
New Q values:  [ -281.736      -1150.91067548  2490.27054298   199.33925794]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  2.57184482e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.19589021e+03 -3.22965309e-01  2.57184482e+03]
New Q values:  [ 7.64171987e+01  1.68542686e+04 -3.22965309e-01  2.57184482e+03]
Reward: 9  Episode Reward:  17
xxxxx
x.  x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[51901.70843292  1181.88585694  3961.62714931  2663.84270475]
------
Step:4, Action:North
State  210
Old Q Values:  [51901.70843292  1181.88585694  3961.62714931  2663.84270475]
New Q values:  [25816.36395772  1181.88585694  3961.62714931  2663.84270475]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.68542686e+04 -3.22965309e-01  2.57184482e+03]
------
Step:5, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.68542686e+04 -3.22965309e-01  2.57184482e+03]
New Q values:  [ 7.64171987e+01  8.95359541e+03 -3.22965309e-01  2.57184482e+03]
Reward: -10001  Episode Reward:  -9985
xxxxx
x.  x
x..gx
x.. x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[25816.36395772  1181.88585694  3961.62714931  2663.84270475]
------
Step:1, Action:North
State  210
Old Q Values:  [25816.36395772  1181.88585694  3961.62714931  2663.84270475]
New Q values:  [13018.02420616  1181.88585694  3961.62714931  2663.84270475]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  8.95359541e+03 -3.22965309e-01  2.57184482e+03]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  8.95359541e+03 -3.22965309e-01  2.57184482e+03]
New Q values:  [ 7.64171987e+01  7.48624543e+03 -3.22965309e-01  2.57184482e+03]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x..ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13018.02420616  1181.88585694  3961.62714931  2663.84270475]
------
Step:3, Action:North
State  210
Old Q Values:  [13018.02420616  1181.88585694  3961.62714931  2663.84270475]
New Q values:  [7452.48331025 1181.88585694 3961.62714931 2663.84270475]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  7.48624543e+03 -3.22965309e-01  2.57184482e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  7.48624543e+03 -3.22965309e-01  2.57184482e+03]
New Q values:  [ 7.64171987e+01  5.22964316e+03 -3.22965309e-01  2.57184482e+03]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x..ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7452.48331025 1181.88585694 3961.62714931 2663.84270475]
------
Step:5, Action:North
State  210
Old Q Values:  [7452.48331025 1181.88585694 3961.62714931 2663.84270475]
New Q values:  [4549.28627314 1181.88585694 3961.62714931 2663.84270475]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.22964316e+03 -3.22965309e-01  2.57184482e+03]
------
Step:6, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  5.22964316e+03 -3.22965309e-01  2.57184482e+03]
New Q values:  [ 7.64171987e+01  1.03037452e+04 -3.22965309e-01  2.57184482e+03]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27374.95988062  2917.78105602  1612.00333505  6762.34065333]
------
Step:7, Action:North
State  208
Old Q Values:  [27374.95988062  2917.78105602  1612.00333505  6762.34065333]
New Q values:  [14040.50752112  2917.78105602  1612.00333505  6762.34065333]
Reward: -1  Episode Reward:  3
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.03037452e+04 -3.22965309e-01  2.57184482e+03]
------
Step:8, Action:South
State  136
Old Q Values:  [ -170.77177351  1597.1911863  -2383.80019164   519.47616528]
New Q values:  [ -170.77177351  4850.42873085 -2383.80019164   519.47616528]
Reward: -1  Episode Reward:  2
xxxxx
xg. x
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14040.50752112  2917.78105602  1612.00333505  6762.34065333]
------
Step:9, Action:North
State  208
Old Q Values:  [14040.50752112  2917.78105602  1612.00333505  6762.34065333]
New Q values:  [8706.72657732 2917.78105602 1612.00333505 6762.34065333]
Reward: -1  Episode Reward:  1
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.03037452e+04 -3.22965309e-01  2.57184482e+03]
------
Step:10, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.03037452e+04 -3.22965309e-01  2.57184482e+03]
New Q values:  [ 7.64171987e+01  5.48568397e+03 -3.22965309e-01  2.57184482e+03]
Reward: -1  Episode Reward:  0
xxxxx
x.. x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4549.28627314 1181.88585694 3961.62714931 2663.84270475]
------
Step:11, Action:North
State  210
Old Q Values:  [4549.28627314 1181.88585694 3961.62714931 2663.84270475]
New Q values:  [3464.81970138 1181.88585694 3961.62714931 2663.84270475]
Reward: -1  Episode Reward:  -1
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.48568397e+03 -3.22965309e-01  2.57184482e+03]
------
Step:12, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  5.48568397e+03 -3.22965309e-01  2.57184482e+03]
New Q values:  [ 7.64171987e+01  4.80569156e+03 -3.22965309e-01  2.57184482e+03]
Reward: -1  Episode Reward:  -2
xxxxx
x.. x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[8706.72657732 2917.78105602 1612.00333505 6762.34065333]
------
Step:13, Action:North
State  210
Old Q Values:  [3464.81970138 1181.88585694 3961.62714931 2663.84270475]
New Q values:  [2827.03534936 1181.88585694 3961.62714931 2663.84270475]
Reward: -1  Episode Reward:  -3
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.80569156e+03 -3.22965309e-01  2.57184482e+03]
------
Step:14, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.80569156e+03 -3.22965309e-01  2.57184482e+03]
New Q values:  [ 7.64171987e+01  3.11016477e+03 -3.22965309e-01  2.57184482e+03]
Reward: -1  Episode Reward:  -4
xxxxx
x.. x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2827.03534936 1181.88585694 3961.62714931 2663.84270475]
------
Step:15, Action:East
State  210
Old Q Values:  [2827.03534936 1181.88585694 3961.62714931 2663.84270475]
New Q values:  [2827.03534936 1181.88585694 2592.53900452 2663.84270475]
Reward: -301  Episode Reward:  -305
xxxxx
x.. x
x..ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2827.03534936 1181.88585694 2592.53900452 2663.84270475]
------
Step:16, Action:North
State  210
Old Q Values:  [2827.03534936 1181.88585694 2592.53900452 2663.84270475]
New Q values:  [2063.26357071 1181.88585694 2592.53900452 2663.84270475]
Reward: -1  Episode Reward:  -306
xxxxx
x..ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.11016477e+03 -3.22965309e-01  2.57184482e+03]
------
Step:17, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.11016477e+03 -3.22965309e-01  2.57184482e+03]
New Q values:  [ 7.64171987e+01  2.04261872e+03 -3.22965309e-01  2.57184482e+03]
Reward: -1  Episode Reward:  -307
xxxxx
x.. x
x..ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1181.88585694 2592.53900452 2663.84270475]
------
Step:18, Action:West
State  210
Old Q Values:  [2063.26357071 1181.88585694 2592.53900452 2663.84270475]
New Q values:  [2063.26357071 1181.88585694 2592.53900452 5820.4893261 ]
Reward: 9  Episode Reward:  -298
xxxxx
x.. x
x.a x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  4293.27894101 15831.840814    1101.59744825]
------
Step:19, Action:East
State  193
Old Q Values:  [-5922.26708831 46203.04021149 -2984.46350323  1099.96026581]
New Q values:  [-5922.26708831 46203.04021149 -4582.3674281   1099.96026581]
Reward: -10001  Episode Reward:  -10299
xxxxx
x.. x
x. gx
x ..x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9766.40292579 76278.10968297]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9766.40292579 76278.10968297]
New Q values:  [-2527.46239811 -8521.23367799  8007.04667669 76278.10968297]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13650.28502125 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:2, Action:North
State  288
Old Q Values:  [13650.28502125 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [ 7211.66080633 -6442.16912869 -8192.20126966  3218.24707707]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x..ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1181.88585694 2592.53900452 5820.4893261 ]
------
Step:3, Action:West
State  208
Old Q Values:  [8706.72657732 2917.78105602 1612.00333505 6762.34065333]
New Q values:  [8706.72657732 2917.78105602 1612.00333505 6271.18540164]
Reward: -9991  Episode Reward:  -9973
xxxxx
x...x
x.g x
x   x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1181.88585694 2592.53900452 5820.4893261 ]
------
Step:1, Action:West
State  208
Old Q Values:  [8706.72657732 2917.78105602 1612.00333505 6271.18540164]
New Q values:  [ 8706.72657732  2917.78105602  1612.00333505 16374.7862241 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 46203.04021149 -4582.3674281   1099.96026581]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831 46203.04021149 -4582.3674281   1099.96026581]
New Q values:  [-5922.26708831 19879.92925942 -4582.3674281   1099.96026581]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 3054.85105864]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8007.04667669 76278.10968297]
New Q values:  [-2527.46239811 -8521.23367799  5365.71691257 76278.10968297]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7211.66080633 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:4, Action:North
State  288
Old Q Values:  [ 7211.66080633 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [ 7796.50018976 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8706.72657732  2917.78105602  1612.00333505 16374.7862241 ]
------
Step:5, Action:West
State  208
Old Q Values:  [ 8706.72657732  2917.78105602  1612.00333505 16374.7862241 ]
New Q values:  [ 8706.72657732  2917.78105602  1612.00333505 16110.16362995]
Reward: -1  Episode Reward:  15
xxxxx
x.g.x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.18694971e+04 1.77525779e+04 2.91043938e+03]
------
Step:6, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.18694971e+04 1.77525779e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 3.56306318e+04 1.77525779e+04 2.91043938e+03]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5365.71691257 76278.10968297]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5365.71691257 76278.10968297]
New Q values:  [-2527.46239811 -8521.23367799  5365.71691257 40641.26748355]
Reward: 9  Episode Reward:  23
xxxxx
x...x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[21439.61766225  2256.66526474 33748.74536788  1875.31501677]
------
Step:8, Action:North
State  261
Old Q Values:  [1201.7897502    26.73544252 1015.37398901  -35.88578819]
New Q values:  [1690.33352499   26.73544252 1015.37398901  -35.88578819]
Reward: 9  Episode Reward:  32
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  456.45589161 4014.0587497   -30.99112081]
------
Step:9, Action:North
State  183
Old Q Values:  [ 877.23516594  748.78287005 2290.44630843    0.        ]
New Q values:  [ 864.70748286  748.78287005 2290.44630843    0.        ]
Reward: 9  Episode Reward:  41
xxxxx
xa..x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1694.71138828  581.22500173    0.        ]
------
Step:10, Action:South
State  102
Old Q Values:  [-180.6        587.5732458    5.16      -180.6      ]
New Q values:  [-180.6        1085.51935617    5.16       -180.6       ]
Reward: -1  Episode Reward:  40
xxxxx
x ..x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  2836.96685949     0.        ]
------
Step:11, Action:East
State  183
Old Q Values:  [ 864.70748286  748.78287005 2290.44630843    0.        ]
New Q values:  [ 864.70748286  748.78287005 9179.34904329    0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  2.75459017e+04  1.20371620e+03]
------
Step:12, Action:East
State  195
Old Q Values:  [   38.85388605  4293.27894101 15831.840814    1101.59744825]
New Q values:  [  38.85388605 4293.27894101 8078.28312343 1101.59744825]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1181.88585694 2592.53900452 5820.4893261 ]
------
Step:13, Action:West
State  210
Old Q Values:  [2063.26357071 1181.88585694 2592.53900452 5820.4893261 ]
New Q values:  [ 2063.26357071  1181.88585694  2592.53900452 10591.36625036]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.92643098e+03  2.75459017e+04  1.20371620e+03]
------
Step:14, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.56306318e+04 1.77525779e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 3.56306318e+04 1.19334802e+04 2.91043938e+03]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8706.72657732  2917.78105602  1612.00333505 16110.16362995]
------
Step:15, Action:North
State  208
Old Q Values:  [ 8706.72657732  2917.78105602  1612.00333505 16110.16362995]
New Q values:  [16816.66070358  2917.78105602  1612.00333505 16110.16362995]
Reward: 9  Episode Reward:  45
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 22352.84815254 -8652.84       44428.56690885]
------
Step:16, Action:South
State  128
Old Q Values:  [11374.93691792 22352.84815254 -8652.84       44428.56690885]
New Q values:  [11374.93691792 13985.53747209 -8652.84       44428.56690885]
Reward: -1  Episode Reward:  44
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16816.66070358  2917.78105602  1612.00333505 16110.16362995]
------
Step:17, Action:North
State  208
Old Q Values:  [16816.66070358  2917.78105602  1612.00333505 16110.16362995]
New Q values:  [20054.63435409  2917.78105602  1612.00333505 16110.16362995]
Reward: -1  Episode Reward:  43
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 13985.53747209 -8652.84       44428.56690885]
------
Step:18, Action:South
State  128
Old Q Values:  [11374.93691792 13985.53747209 -8652.84       44428.56690885]
New Q values:  [11374.93691792 11610.00529506 -8652.84       44428.56690885]
Reward: -1  Episode Reward:  42
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20054.63435409  2917.78105602  1612.00333505 16110.16362995]
------
Step:19, Action:North
State  208
Old Q Values:  [20054.63435409  2917.78105602  1612.00333505 16110.16362995]
New Q values:  [21349.82381429  2917.78105602  1612.00333505 16110.16362995]
Reward: -1  Episode Reward:  41
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 11610.00529506 -8652.84       44428.56690885]
------
Step:20, Action:South
State  130
Old Q Values:  [ 46177.80406237  24312.8833164    -180.00807518 104823.60248435]
New Q values:  [ 46177.80406237  16129.50047085   -180.00807518 104823.60248435]
Reward: -1  Episode Reward:  40
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21349.82381429  2917.78105602  1612.00333505 16110.16362995]
------
Step:21, Action:North
State  208
Old Q Values:  [21349.82381429  2917.78105602  1612.00333505 16110.16362995]
New Q values:  [21867.89959837  2917.78105602  1612.00333505 16110.16362995]
Reward: -1  Episode Reward:  39
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 11610.00529506 -8652.84       44428.56690885]
------
Step:22, Action:South
State  128
Old Q Values:  [11374.93691792 11610.00529506 -8652.84       44428.56690885]
New Q values:  [11374.93691792 11203.77199754 -8652.84       44428.56690885]
Reward: -1  Episode Reward:  38
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21867.89959837  2917.78105602  1612.00333505 16110.16362995]
------
Step:23, Action:West
State  208
Old Q Values:  [21867.89959837  2917.78105602  1612.00333505 16110.16362995]
New Q values:  [21867.89959837  2917.78105602  1612.00333505 17132.65497957]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.56306318e+04 1.19334802e+04 2.91043938e+03]
------
Step:24, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.56306318e+04 1.19334802e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 2.64440329e+04 1.19334802e+04 2.91043938e+03]
Reward: -1  Episode Reward:  36
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5365.71691257 40641.26748355]
------
Step:25, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5365.71691257 40641.26748355]
New Q values:  [-2527.46239811 -8521.23367799  5365.71691257 17284.52927621]
Reward: -1  Episode Reward:  35
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1193.46224895 -2735.46306511  3428.74094262 -6102.86502307]
------
Step:26, Action:East
State  260
Old Q Values:  [-1193.46224895 -2735.46306511  3428.74094262 -6102.86502307]
New Q values:  [-1193.46224895 -2735.46306511  6556.25515991 -6102.86502307]
Reward: -1  Episode Reward:  34
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5365.71691257 17284.52927621]
------
Step:27, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5365.71691257 17284.52927621]
New Q values:  [-2527.46239811 -8521.23367799  5365.71691257  8880.08825846]
Reward: -1  Episode Reward:  33
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1193.46224895 -2735.46306511  6556.25515991 -6102.86502307]
------
Step:28, Action:East
State  260
Old Q Values:  [-1193.46224895 -2735.46306511  6556.25515991 -6102.86502307]
New Q values:  [-1193.46224895 -2735.46306511  5285.9285415  -6102.86502307]
Reward: -1  Episode Reward:  32
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5365.71691257  8880.08825846]
------
Step:29, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5365.71691257  8880.08825846]
New Q values:  [-2527.46239811 -8521.23367799  5365.71691257  5137.21386583]
Reward: -1  Episode Reward:  31
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1193.46224895 -2735.46306511  5285.9285415  -6102.86502307]
------
Step:30, Action:East
State  260
Old Q Values:  [-1193.46224895 -2735.46306511  5285.9285415  -6102.86502307]
New Q values:  [-1193.46224895 -2735.46306511  3723.48649037 -6102.86502307]
Reward: -1  Episode Reward:  30
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5365.71691257  5137.21386583]
------
Step:31, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5365.71691257  5137.21386583]
New Q values:  [-2527.46239811 -8521.23367799  4484.63682196  5137.21386583]
Reward: -1  Episode Reward:  29
xxxxx
xg. x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7796.50018976 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:32, Action:North
State  288
Old Q Values:  [ 7796.50018976 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [ 9678.36995542 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  28
xxxxx
x g x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21867.89959837  2917.78105602  1612.00333505 17132.65497957]
------
Step:33, Action:North
State  208
Old Q Values:  [21867.89959837  2917.78105602  1612.00333505 17132.65497957]
New Q values:  [22075.129912    2917.78105602  1612.00333505 17132.65497957]
Reward: -1  Episode Reward:  27
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 11203.77199754 -8652.84       44428.56690885]
------
Step:34, Action:West
State  130
Old Q Values:  [ 46177.80406237  16129.50047085   -180.00807518 104823.60248435]
New Q values:  [ 46177.80406237  16129.50047085   -180.00807518 122667.64525332]
Reward: 100009  Episode Reward:  100036
xxxxx
x a x
xg  x
x   x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1557.65803636   -8.57207238 -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869 1581.67060903  442.24659409 -120.29354603]
New Q values:  [-177.44732869 1842.28586852  442.24659409 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  456.45589161 4014.0587497   -30.99112081]
------
Step:2, Action:East
State  183
Old Q Values:  [ 864.70748286  748.78287005 9179.34904329    0.        ]
New Q values:  [ 864.70748286  748.78287005 7351.43361524    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x . x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 1.22476467e+04 2.42409341e+03 0.00000000e+00]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831 19879.92925942 -4582.3674281   1099.96026581]
New Q values:  [-5922.26708831  9350.6848786  -4582.3674281   1099.96026581]
Reward: 9  Episode Reward:  27
xxxxx
x . x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 3054.85105864]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4484.63682196  5137.21386583]
New Q values:  [-2527.46239811 -8521.23367799  4702.76571541  5137.21386583]
Reward: 9  Episode Reward:  36
xxxxx
x . x
x g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9678.36995542 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:5, Action:North
State  288
Old Q Values:  [ 9678.36995542 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [ 4499.28695577 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -9991  Episode Reward:  -9955
xxxxx
x . x
x  gx
x.  x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.04261872e+03 -3.22965309e-01  2.57184482e+03]
------
Step:1, Action:West
State  136
Old Q Values:  [ -170.77177351  4850.42873085 -2383.80019164   519.47616528]
New Q values:  [ -170.77177351  4850.42873085 -2383.80019164   282.62128524]
Reward: 9  Episode Reward:  9
xxxxx
x.agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -9068.79045452   231.43606375]
------
Step:2, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1107.29532748   392.66665446]
New Q values:  [-9594.56523706 -8069.05606225  1107.29532748   629.76407269]
Reward: 9  Episode Reward:  18
xxxxx
xag x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1557.65803636   -8.57207238 -180.6       ]
------
Step:3, Action:South
State  108
Old Q Values:  [-8463.16477134  1137.12021657   724.20787443     0.        ]
New Q values:  [-8463.16477134  1500.18074031   724.20787443     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  3466.44217895  3369.21098184     0.        ]
------
Step:4, Action:South
State  189
Old Q Values:  [ 422.44659346  448.89758527 2762.3443205   154.04646645]
New Q values:  [ 422.44659346  692.05909161 2762.3443205   154.04646645]
Reward: 9  Episode Reward:  36
xxxxx
x g x
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1690.33352499   26.73544252 1015.37398901  -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [1690.33352499   26.73544252 1015.37398901  -35.88578819]
New Q values:  [1504.23670615   26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346  692.05909161 2762.3443205   154.04646645]
------
Step:6, Action:South
State  189
Old Q Values:  [ 422.44659346  692.05909161 2762.3443205   154.04646645]
New Q values:  [ 422.44659346  727.49464849 2762.3443205   154.04646645]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1504.23670615   26.73544252 1015.37398901  -35.88578819]
------
Step:7, Action:North
State  261
Old Q Values:  [1504.23670615   26.73544252 1015.37398901  -35.88578819]
New Q values:  [1429.79797861   26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346  727.49464849 2762.3443205   154.04646645]
------
Step:8, Action:East
State  189
Old Q Values:  [ 422.44659346  727.49464849 2762.3443205   154.04646645]
New Q values:  [ 422.44659346  727.49464849 4501.69300879  154.04646645]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.13045176e+04 -5.74006972e+03  2.00341972e+02]
------
Step:9, Action:South
State  205
Old Q Values:  [   0.         1404.46448665    0.          198.38683706]
New Q values:  [   0.         1104.47596846    0.          198.38683706]
Reward: 9  Episode Reward:  51
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  1.79096725e+03]
------
Step:10, Action:West
State  277
Old Q Values:  [ 1.64433000e+00  0.00000000e+00 -2.97744407e+01  1.79096725e+03]
New Q values:  [   1.64433       0.          -29.77444073 1144.72629198]
Reward: -1  Episode Reward:  50
xxxxx
x  gx
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1429.79797861   26.73544252 1015.37398901  -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [1429.79797861   26.73544252 1015.37398901  -35.88578819]
New Q values:  [1921.82709408   26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  49
xxxxx
x g x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346  727.49464849 4501.69300879  154.04646645]
------
Step:12, Action:East
State  188
Old Q Values:  [-6523.78898263  3466.44217895  3369.21098184     0.        ]
New Q values:  [-6523.78898263  3466.44217895  2014.57249182     0.        ]
Reward: -1  Episode Reward:  48
xxxxx
xg  x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         1387.82535955 2224.9603303   441.58769553]
------
Step:13, Action:East
State  204
Old Q Values:  [   0.         1387.82535955 2224.9603303   441.58769553]
New Q values:  [   0.         1387.82535955 4343.12752369  441.58769553]
Reward: -1  Episode Reward:  47
xxxxx
x g x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[11512.47797191  1726.22110463 -8896.20691497  4186.02080004]
------
Step:14, Action:North
State  216
Old Q Values:  [11512.47797191  1726.22110463 -8896.20691497  4186.02080004]
New Q values:  [   59.51980802  1726.22110463 -8896.20691497  4186.02080004]
Reward: -10001  Episode Reward:  -9954
xxxxx
x  gx
x   x
x  .x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22075.129912    2917.78105602  1612.00333505 17132.65497957]
------
Step:1, Action:North
State  216
Old Q Values:  [   59.51980802  1726.22110463 -8896.20691497  4186.02080004]
New Q values:  [ 1484.33654246  1726.22110463 -8896.20691497  4186.02080004]
Reward: 9  Episode Reward:  9
xxxxx
xg ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  4850.42873085 -2383.80019164   282.62128524]
------
Step:2, Action:South
State  136
Old Q Values:  [ -170.77177351  4850.42873085 -2383.80019164   282.62128524]
New Q values:  [ -170.77177351  8562.11046594 -2383.80019164   282.62128524]
Reward: -1  Episode Reward:  8
xxxxx
x.g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22075.129912    2917.78105602  1612.00333505 17132.65497957]
------
Step:3, Action:North
State  216
Old Q Values:  [ 1484.33654246  1726.22110463 -8896.20691497  4186.02080004]
New Q values:  [ 1364.68806392  1726.22110463 -8896.20691497  4186.02080004]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.04261872e+03 -3.22965309e-01  2.57184482e+03]
------
Step:4, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.04261872e+03 -3.22965309e-01  2.57184482e+03]
New Q values:  [ 7.64171987e+01  2.04261872e+03 -3.22965309e-01  1.71153520e+03]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2277.99090771   472.50407643]
------
Step:5, Action:East
State  121
Old Q Values:  [    0.             0.         -9068.79045452   231.43606375]
New Q values:  [    0.             0.         -7059.48304203   231.43606375]
Reward: -10001  Episode Reward:  -9995
xxxxx
x. gx
x.. x
x...x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.04261872e+03 -3.22965309e-01  1.71153520e+03]
------
Step:1, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.04261872e+03 -3.22965309e-01  1.71153520e+03]
New Q values:  [ 7.64171987e+01  1.44498646e+03 -3.22965309e-01  1.71153520e+03]
Reward: -9991  Episode Reward:  -9991
xxxxx
x.. x
x..gx
x.. x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4499.28695577 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:1, Action:North
State  288
Old Q Values:  [ 4499.28695577 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [ 8427.65375591 -6442.16912869 -8192.20126966  3218.24707707]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22075.129912    2917.78105602  1612.00333505 17132.65497957]
------
Step:2, Action:North
State  208
Old Q Values:  [22075.129912    2917.78105602  1612.00333505 17132.65497957]
New Q values:  [11404.08510458  2917.78105602  1612.00333505 17132.65497957]
Reward: 9  Episode Reward:  18
xxxxx
xg.ax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  8562.11046594 -2383.80019164   282.62128524]
------
Step:3, Action:South
State  130
Old Q Values:  [ 46177.80406237  16129.50047085   -180.00807518 122667.64525332]
New Q values:  [ 46177.80406237  11590.99668221   -180.00807518 122667.64525332]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11404.08510458  2917.78105602  1612.00333505 17132.65497957]
------
Step:4, Action:West
State  208
Old Q Values:  [11404.08510458  2917.78105602  1612.00333505 17132.65497957]
New Q values:  [11404.08510458  2917.78105602  1612.00333505  8785.67187639]
Reward: -10001  Episode Reward:  -9984
xxxxx
x.. x
x.g x
x.. x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.44498646e+03 -3.22965309e-01  1.71153520e+03]
------
Step:1, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  1.44498646e+03 -3.22965309e-01  1.71153520e+03]
New Q values:  [ 7.64171987e+01  1.44498646e+03 -3.22965309e-01  1.43709524e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2490.27054298   199.33925794]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2490.27054298   199.33925794]
New Q values:  [ -281.736      -1150.91067548  1429.0041556    199.33925794]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.44498646e+03 -3.22965309e-01  1.43709524e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.44498646e+03 -3.22965309e-01  1.43709524e+03]
New Q values:  [ 7.64171987e+01  1.83920082e+03 -3.22965309e-01  1.43709524e+03]
Reward: 9  Episode Reward:  17
xxxxx
x.  x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1364.68806392  1726.22110463 -8896.20691497  4186.02080004]
------
Step:4, Action:South
State  208
Old Q Values:  [11404.08510458  2917.78105602  1612.00333505  8785.67187639]
New Q values:  [11404.08510458  3700.80854918  1612.00333505  8785.67187639]
Reward: 9  Episode Reward:  26
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8427.65375591 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:5, Action:North
State  288
Old Q Values:  [ 8427.65375591 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [ 6547.87137747 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2063.26357071  1181.88585694  2592.53900452 10591.36625036]
------
Step:6, Action:West
State  216
Old Q Values:  [ 1364.68806392  1726.22110463 -8896.20691497  4186.02080004]
New Q values:  [ 1364.68806392  1726.22110463 -8896.20691497  3355.10688379]
Reward: 9  Episode Reward:  34
xxxxx
x.  x
xga x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  5584.32854592 1367.75973915 1141.49622464]
------
Step:7, Action:South
State  194
Old Q Values:  [-6.00000000e-01  2.92643098e+03  2.75459017e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.71713655e+03  2.75459017e+04  1.20371620e+03]
Reward: 9  Episode Reward:  43
xxxxx
x.  x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4702.76571541  5137.21386583]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4702.76571541  5137.21386583]
New Q values:  [-2527.46239811 -8521.23367799  3844.8676994   5137.21386583]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6547.87137747 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:9, Action:North
State  288
Old Q Values:  [ 6547.87137747 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [ 6039.77408236 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  41
xxxxx
x.  x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11404.08510458  3700.80854918  1612.00333505  8785.67187639]
------
Step:10, Action:North
State  208
Old Q Values:  [11404.08510458  3700.80854918  1612.00333505  8785.67187639]
New Q values:  [41361.32761783  3700.80854918  1612.00333505  8785.67187639]
Reward: -1  Episode Reward:  40
xxxxx
x. ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 46177.80406237  11590.99668221   -180.00807518 122667.64525332]
------
Step:11, Action:West
State  130
Old Q Values:  [ 46177.80406237  11590.99668221   -180.00807518 122667.64525332]
New Q values:  [46177.80406237 11590.99668221  -180.00807518 88067.23168404]
Reward: -1  Episode Reward:  39
xxxxx
x.a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.30002579e+05]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1429.0041556    199.33925794]
New Q values:  [ -281.736      -1150.91067548  1429.0041556    623.94720441]
Reward: 9  Episode Reward:  48
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253 1796.03833746 -252.78192178]
------
Step:13, Action:East
State  107
Old Q Values:  [-252.35169558  765.83302253 1796.03833746 -252.78192178]
New Q values:  [-252.35169558  765.83302253 1401.2126073  -252.78192178]
Reward: -1  Episode Reward:  47
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2277.99090771   472.50407643]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1429.0041556    623.94720441]
New Q values:  [ -281.736      -1150.91067548  1122.7619096    623.94720441]
Reward: -1  Episode Reward:  46
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.83920082e+03 -3.22965309e-01  1.43709524e+03]
------
Step:15, Action:South
State  130
Old Q Values:  [46177.80406237 11590.99668221  -180.00807518 88067.23168404]
New Q values:  [46177.80406237  7813.20854799  -180.00807518 88067.23168404]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2063.26357071  1181.88585694  2592.53900452 10591.36625036]
------
Step:16, Action:West
State  218
Old Q Values:  [6168.33000877 3567.55356402    0.          386.1281519 ]
New Q values:  [6168.33000877 3567.55356402    0.          852.63459632]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577  2329.27778519     0.        ]
------
Step:17, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.22476467e+04 2.42409341e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 1.22476467e+04 2.81953637e+03 0.00000000e+00]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[6168.33000877 3567.55356402    0.          852.63459632]
------
Step:18, Action:North
State  218
Old Q Values:  [6168.33000877 3567.55356402    0.          852.63459632]
New Q values:  [3018.49225087 3567.55356402    0.          852.63459632]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.83920082e+03 -3.22965309e-01  1.43709524e+03]
------
Step:19, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.83920082e+03 -3.22965309e-01  1.43709524e+03]
New Q values:  [ 7.64171987e+01  1.74161239e+03 -3.22965309e-01  1.43709524e+03]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1364.68806392  1726.22110463 -8896.20691497  3355.10688379]
------
Step:20, Action:South
State  216
Old Q Values:  [ 1364.68806392  1726.22110463 -8896.20691497  3355.10688379]
New Q values:  [ 1364.68806392  2501.82066656 -8896.20691497  3355.10688379]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6039.77408236 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:21, Action:North
State  288
Old Q Values:  [ 6039.77408236 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [ 3421.84169808 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1364.68806392  2501.82066656 -8896.20691497  3355.10688379]
------
Step:22, Action:West
State  216
Old Q Values:  [ 1364.68806392  2501.82066656 -8896.20691497  3355.10688379]
New Q values:  [ 1364.68806392  2501.82066656 -8896.20691497  3016.74131729]
Reward: -1  Episode Reward:  38
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  5584.32854592 1367.75973915 1141.49622464]
------
Step:23, Action:South
State  200
Old Q Values:  [  62.8218634  5584.32854592 1367.75973915 1141.49622464]
New Q values:  [  62.8218634  3774.29557812 1367.75973915 1141.49622464]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3844.8676994   5137.21386583]
------
Step:24, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3844.8676994   5137.21386583]
New Q values:  [-2527.46239811 -8521.23367799  3844.8676994  72213.85738633]
Reward: 100009  Episode Reward:  100046
xxxxx
x   x
xg  x
xa  x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.64440329e+04 1.19334802e+04 2.91043938e+03]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.64440329e+04 1.19334802e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 3.22471704e+04 1.19334802e+04 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3844.8676994  72213.85738633]
------
Step:2, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  6847.15715551  1709.45905222]
New Q values:  [   16.82637525 -5807.06396197  6847.15715551  1265.73174911]
Reward: 9  Episode Reward:  18
xxxxx
x g.x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1921.82709408   26.73544252 1015.37398901  -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [1921.82709408   26.73544252 1015.37398901  -35.88578819]
New Q values:  [1978.34846254   26.73544252 1015.37398901  -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 462.07147878  456.45589161 4014.0587497   -30.99112081]
------
Step:4, Action:North
State  181
Old Q Values:  [ 462.07147878  456.45589161 4014.0587497   -30.99112081]
New Q values:  [ 692.642008    456.45589161 4014.0587497   -30.99112081]
Reward: -1  Episode Reward:  26
xxxxx
xa..x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1694.71138828  581.22500173    0.        ]
------
Step:5, Action:South
State  103
Old Q Values:  [ 221.30610858 1694.71138828  581.22500173    0.        ]
New Q values:  [ 221.30610858 2882.71463989  581.22500173    0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 864.70748286  748.78287005 7351.43361524    0.        ]
------
Step:6, Action:East
State  181
Old Q Values:  [ 692.642008    456.45589161 4014.0587497   -30.99112081]
New Q values:  [ 692.642008    456.45589161 4410.22896346  -30.99112081]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9350.6848786  -4582.3674281   1099.96026581]
------
Step:7, Action:South
State  199
Old Q Values:  [  14.86214194 1982.97289372 2141.57355904 1915.70494401]
New Q values:  [  14.86214194 1136.00704508 2141.57355904 1915.70494401]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.          -29.77444073 1144.72629198]
------
Step:8, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  6847.15715551  1265.73174911]
New Q values:  [   16.82637525 -5807.06396197  6847.15715551  1099.19723841]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1978.34846254   26.73544252 1015.37398901  -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [1978.34846254   26.73544252 1015.37398901  -35.88578819]
New Q values:  [2996.16946959   26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 864.70748286  748.78287005 7351.43361524    0.        ]
------
Step:10, Action:East
State  181
Old Q Values:  [ 692.642008    456.45589161 4410.22896346  -30.99112081]
New Q values:  [ 692.642008    456.45589161 4568.69704896  -30.99112081]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9350.6848786  -4582.3674281   1099.96026581]
------
Step:11, Action:South
State  193
Old Q Values:  [-5922.26708831  9350.6848786  -4582.3674281   1099.96026581]
New Q values:  [-5922.26708831  4083.09183903 -4582.3674281   1099.96026581]
Reward: -1  Episode Reward:  19
xxxxx
x .gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.          -29.77444073 1144.72629198]
------
Step:12, Action:West
State  277
Old Q Values:  [   1.64433       0.          -29.77444073 1144.72629198]
New Q values:  [   1.64433       0.          -29.77444073 1356.14135767]
Reward: -1  Episode Reward:  18
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2996.16946959   26.73544252 1015.37398901  -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [2996.16946959   26.73544252 1015.37398901  -35.88578819]
New Q values:  [2568.47690252   26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 692.642008    456.45589161 4568.69704896  -30.99112081]
------
Step:14, Action:North
State  183
Old Q Values:  [ 864.70748286  748.78287005 7351.43361524    0.        ]
New Q values:  [1210.09738511  748.78287005 7351.43361524    0.        ]
Reward: -1  Episode Reward:  16
xxxxx
xa..x
x  .x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2882.71463989  581.22500173    0.        ]
------
Step:15, Action:South
State  103
Old Q Values:  [ 221.30610858 2882.71463989  581.22500173    0.        ]
New Q values:  [ 221.30610858 3357.91594053  581.22500173    0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1210.09738511  748.78287005 7351.43361524    0.        ]
------
Step:16, Action:East
State  183
Old Q Values:  [1210.09738511  748.78287005 7351.43361524    0.        ]
New Q values:  [1210.09738511  748.78287005 7581.38089632    0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:17, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.22471704e+04 1.19334802e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 3.22471704e+04 1.71871904e+04 2.91043938e+03]
Reward: 9  Episode Reward:  23
xxxxx
x ..x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[41361.32761783  3700.80854918  1612.00333505  8785.67187639]
------
Step:18, Action:North
State  208
Old Q Values:  [41361.32761783  3700.80854918  1612.00333505  8785.67187639]
New Q values:  [42970.10055234  3700.80854918  1612.00333505  8785.67187639]
Reward: 9  Episode Reward:  32
xxxxx
x .ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237  7813.20854799  -180.00807518 88067.23168404]
------
Step:19, Action:West
State  136
Old Q Values:  [ -170.77177351  8562.11046594 -2383.80019164   282.62128524]
New Q values:  [ -170.77177351  8562.11046594 -2383.80019164   118.77295748]
Reward: 9  Episode Reward:  41
xxxxx
x agx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         1.08147795]
------
Step:20, Action:West
State  127
Old Q Values:  [   0.            1.67014986  384.73306724 1279.83057755]
New Q values:  [   0.            1.67014986  384.73306724 1064.01799158]
Reward: -1  Episode Reward:  40
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1842.28586852  442.24659409 -120.29354603]
------
Step:21, Action:South
State  99
Old Q Values:  [    0.         34378.61763314 56224.12962286     0.        ]
New Q values:  [    0.         24340.91651165 56224.12962286     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         16101.90751562 35300.23152799     0.        ]
------
Step:22, Action:East
State  189
Old Q Values:  [ 422.44659346  727.49464849 4501.69300879  154.04646645]
New Q values:  [ 422.44659346  727.49464849 2131.41999405  154.04646645]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[   0.         1104.47596846    0.          198.38683706]
------
Step:23, Action:South
State  195
Old Q Values:  [  38.85388605 4293.27894101 8078.28312343 1101.59744825]
New Q values:  [  38.85388605 3110.02475123 8078.28312343 1101.59744825]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 3054.85105864]
------
Step:24, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  6847.15715551  1099.19723841]
New Q values:  [   16.82637525 -5807.06396197  6847.15715551  1209.62196612]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2568.47690252   26.73544252 1015.37398901  -35.88578819]
------
Step:25, Action:North
State  261
Old Q Values:  [2568.47690252   26.73544252 1015.37398901  -35.88578819]
New Q values:  [1666.21675923   26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346  727.49464849 2131.41999405  154.04646645]
------
Step:26, Action:South
State  191
Old Q Values:  [   3.06655861 1177.85012448  321.41835844    0.        ]
New Q values:  [  3.06655861 970.40507756 321.41835844   0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1666.21675923   26.73544252 1015.37398901  -35.88578819]
------
Step:27, Action:North
State  257
Old Q Values:  [21439.61766225  2256.66526474 33748.74536788  1875.31501677]
New Q values:  [19165.3165233   2256.66526474 33748.74536788  1875.31501677]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         16101.90751562 35300.23152799     0.        ]
------
Step:28, Action:East
State  191
Old Q Values:  [  3.06655861 970.40507756 321.41835844   0.        ]
New Q values:  [  3.06655861 970.40507756 307.92473414   0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[    0.         -1406.21014518   599.85796923     0.        ]
------
Step:29, Action:East
State  204
Old Q Values:  [   0.         1387.82535955 4343.12752369  441.58769553]
New Q values:  [   0.         1387.82535955 2641.67340466  441.58769553]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1364.68806392  2501.82066656 -8896.20691497  3016.74131729]
------
Step:30, Action:South
State  208
Old Q Values:  [42970.10055234  3700.80854918  1612.00333505  8785.67187639]
New Q values:  [42970.10055234 62512.2759291   1612.00333505  8785.67187639]
Reward: 100009  Episode Reward:  100040
xxxxx
x   x
x  gx
x  ax
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  2.75459017e+04  1.20371620e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.22471704e+04 1.71871904e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 3.22471704e+04 2.56339589e+04 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[42970.10055234 62512.2759291   1612.00333505  8785.67187639]
------
Step:2, Action:South
State  210
Old Q Values:  [ 2063.26357071  1181.88585694  2592.53900452 10591.36625036]
New Q values:  [ 2063.26357071  1504.7068522   2592.53900452 10591.36625036]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3421.84169808 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:3, Action:North
State  288
Old Q Values:  [ 3421.84169808 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [ 4545.54655434 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2063.26357071  1504.7068522   2592.53900452 10591.36625036]
------
Step:4, Action:West
State  210
Old Q Values:  [ 2063.26357071  1504.7068522   2592.53900452 10591.36625036]
New Q values:  [ 2063.26357071  1504.7068522   2592.53900452 12499.71702007]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  2.75459017e+04  1.20371620e+03]
------
Step:5, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.71713655e+03  2.75459017e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.71713655e+03  1.47676758e+04  1.20371620e+03]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2063.26357071  1504.7068522   2592.53900452 12499.71702007]
------
Step:6, Action:West
State  210
Old Q Values:  [ 2063.26357071  1504.7068522   2592.53900452 12499.71702007]
New Q values:  [2063.26357071 1504.7068522  2592.53900452 9429.5895478 ]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  1.47676758e+04  1.20371620e+03]
------
Step:7, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.71713655e+03  1.47676758e+04  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.71713655e+03  8.73534718e+03  1.20371620e+03]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1504.7068522  2592.53900452 9429.5895478 ]
------
Step:8, Action:West
State  210
Old Q Values:  [2063.26357071 1504.7068522  2592.53900452 9429.5895478 ]
New Q values:  [2063.26357071 1504.7068522  2592.53900452 6391.83997433]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  8.73534718e+03  1.20371620e+03]
------
Step:9, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.71713655e+03  8.73534718e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.71713655e+03  5.41109087e+03  1.20371620e+03]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1504.7068522  2592.53900452 6391.83997433]
------
Step:10, Action:West
State  208
Old Q Values:  [42970.10055234 62512.2759291   1612.00333505  8785.67187639]
New Q values:  [42970.10055234 62512.2759291   1612.00333505 13187.81986915]
Reward: -1  Episode Reward:  10
xxxxx
x...x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.22471704e+04 2.56339589e+04 2.91043938e+03]
------
Step:11, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.22471704e+04 2.56339589e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 3.45624254e+04 2.56339589e+04 2.91043938e+03]
Reward: -1  Episode Reward:  9
xxxxx
xg..x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3844.8676994  72213.85738633]
------
Step:12, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3844.8676994  72213.85738633]
New Q values:  [-2527.46239811 -8521.23367799  3844.8676994  30007.98890164]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1193.46224895 -2735.46306511  3723.48649037 -6102.86502307]
------
Step:13, Action:East
State  257
Old Q Values:  [19165.3165233   2256.66526474 33748.74536788  1875.31501677]
New Q values:  [19165.3165233   2256.66526474 22501.29481765  1875.31501677]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3844.8676994  30007.98890164]
------
Step:14, Action:West
State  273
Old Q Values:  [ 870.35122762 -168.92307549 4644.37724943 3054.85105864]
New Q values:  [ 870.35122762 -168.92307549 4644.37724943 7971.72886875]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[19165.3165233   2256.66526474 22501.29481765  1875.31501677]
------
Step:15, Action:East
State  257
Old Q Values:  [19165.3165233   2256.66526474 22501.29481765  1875.31501677]
New Q values:  [19165.3165233   2256.66526474 18002.31459755  1875.31501677]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3844.8676994  30007.98890164]
------
Step:16, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3844.8676994  30007.98890164]
New Q values:  [-2527.46239811 -8521.23367799  3844.8676994  17752.19051765]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[19165.3165233   2256.66526474 18002.31459755  1875.31501677]
------
Step:17, Action:North
State  260
Old Q Values:  [-1193.46224895 -2735.46306511  3723.48649037 -6102.86502307]
New Q values:  [  379.10515826 -2735.46306511  3723.48649037 -6102.86502307]
Reward: 9  Episode Reward:  23
xxxxx
x...x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  2836.96685949     0.        ]
------
Step:18, Action:East
State  180
Old Q Values:  [  283.82862458  1371.51970183  1850.98100491 -4966.32149798]
New Q values:  [  283.82862458  1371.51970183 11108.52001417 -4966.32149798]
Reward: -1  Episode Reward:  22
xxxxx
x...x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.45624254e+04 2.56339589e+04 2.91043938e+03]
------
Step:19, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.45624254e+04 2.56339589e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.91500273e+04 2.56339589e+04 2.91043938e+03]
Reward: -1  Episode Reward:  21
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3844.8676994  17752.19051765]
------
Step:20, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3844.8676994  17752.19051765]
New Q values:  [-2527.46239811 -8521.23367799  3844.8676994  12849.87116405]
Reward: -1  Episode Reward:  20
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[19165.3165233   2256.66526474 18002.31459755  1875.31501677]
------
Step:21, Action:North
State  257
Old Q Values:  [19165.3165233   2256.66526474 18002.31459755  1875.31501677]
New Q values:  [31233.99850883  2256.66526474 18002.31459755  1875.31501677]
Reward: -1  Episode Reward:  19
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[24286.95305781 19270.74827372 78561.57299837     0.        ]
------
Step:22, Action:East
State  177
Old Q Values:  [24286.95305781 19270.74827372 78561.57299837     0.        ]
New Q values:  [24286.95305781 19270.74827372 39114.21687821     0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x.g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.91500273e+04 2.56339589e+04 2.91043938e+03]
------
Step:23, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.91500273e+04 2.56339589e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.91500273e+04 2.90066664e+04 2.91043938e+03]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[42970.10055234 62512.2759291   1612.00333505 13187.81986915]
------
Step:24, Action:South
State  210
Old Q Values:  [2063.26357071 1504.7068522  2592.53900452 6391.83997433]
New Q values:  [2063.26357071 1964.94670718 2592.53900452 6391.83997433]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4545.54655434 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:25, Action:North
State  288
Old Q Values:  [ 4545.54655434 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [ 3735.17061404 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1964.94670718 2592.53900452 6391.83997433]
------
Step:26, Action:West
State  210
Old Q Values:  [2063.26357071 1964.94670718 2592.53900452 6391.83997433]
New Q values:  [2063.26357071 1964.94670718 2592.53900452 4179.46324951]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  5.41109087e+03  1.20371620e+03]
------
Step:27, Action:East
State  195
Old Q Values:  [  38.85388605 3110.02475123 8078.28312343 1101.59744825]
New Q values:  [  38.85388605 3110.02475123 4484.55222422 1101.59744825]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1964.94670718 2592.53900452 4179.46324951]
------
Step:28, Action:West
State  208
Old Q Values:  [42970.10055234 62512.2759291   1612.00333505 13187.81986915]
New Q values:  [42970.10055234 62512.2759291   1612.00333505  6499.45549937]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4083.09183903 -4582.3674281   1099.96026581]
------
Step:29, Action:South
State  195
Old Q Values:  [  38.85388605 3110.02475123 4484.55222422 1101.59744825]
New Q values:  [  38.85388605 3634.92856112 4484.55222422 1101.59744825]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 7971.72886875]
------
Step:30, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3844.8676994  12849.87116405]
New Q values:  [-2527.46239811 -8521.23367799  3844.8676994   5639.21349339]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1666.21675923   26.73544252 1015.37398901  -35.88578819]
------
Step:31, Action:North
State  261
Old Q Values:  [1666.21675923   26.73544252 1015.37398901  -35.88578819]
New Q values:  [2036.49581838   26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  9
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 692.642008    456.45589161 4568.69704896  -30.99112081]
------
Step:32, Action:North
State  181
Old Q Values:  [ 692.642008    456.45589161 4568.69704896  -30.99112081]
New Q values:  [ 695.43396647  456.45589161 4568.69704896  -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         1376.59054423    0.            0.        ]
------
Step:33, Action:South
State  101
Old Q Values:  [   0.         1376.59054423    0.            0.        ]
New Q values:  [   0.         1920.64533238    0.            0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 695.43396647  456.45589161 4568.69704896  -30.99112081]
------
Step:34, Action:East
State  181
Old Q Values:  [ 695.43396647  456.45589161 4568.69704896  -30.99112081]
New Q values:  [ 695.43396647  456.45589161 2868.73785778  -30.99112081]
Reward: -1  Episode Reward:  16
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  3472.86346065  2474.1699074    239.04887894]
------
Step:35, Action:South
State  193
Old Q Values:  [-5922.26708831  4083.09183903 -4582.3674281   1099.96026581]
New Q values:  [-5922.26708831  4024.15539624 -4582.3674281   1099.96026581]
Reward: -1  Episode Reward:  15
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 7971.72886875]
------
Step:36, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  6847.15715551  1209.62196612]
New Q values:  [   16.82637525 -5807.06396197  6847.15715551  1094.19753196]
Reward: -1  Episode Reward:  14
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2036.49581838   26.73544252 1015.37398901  -35.88578819]
------
Step:37, Action:North
State  261
Old Q Values:  [2036.49581838   26.73544252 1015.37398901  -35.88578819]
New Q values:  [1674.61968469   26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 695.43396647  456.45589161 2868.73785778  -30.99112081]
------
Step:38, Action:North
State  181
Old Q Values:  [ 695.43396647  456.45589161 2868.73785778  -30.99112081]
New Q values:  [1284.94836875  456.45589161 2868.73785778  -30.99112081]
Reward: -1  Episode Reward:  12
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 3357.91594053  581.22500173    0.        ]
------
Step:39, Action:South
State  103
Old Q Values:  [ 221.30610858 3357.91594053  581.22500173    0.        ]
New Q values:  [ 221.30610858 3616.98064511  581.22500173    0.        ]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1210.09738511  748.78287005 7581.38089632    0.        ]
------
Step:40, Action:East
State  183
Old Q Values:  [1210.09738511  748.78287005 7581.38089632    0.        ]
New Q values:  [1210.09738511  748.78287005 4655.2796183     0.        ]
Reward: -1  Episode Reward:  10
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  5.41109087e+03  1.20371620e+03]
------
Step:41, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.71713655e+03  5.41109087e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.71713655e+03  3.41767532e+03  1.20371620e+03]
Reward: -1  Episode Reward:  9
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1964.94670718 2592.53900452 4179.46324951]
------
Step:42, Action:West
State  210
Old Q Values:  [2063.26357071 1964.94670718 2592.53900452 4179.46324951]
New Q values:  [2063.26357071 1964.94670718 2592.53900452 2696.48789617]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  3.41767532e+03  1.20371620e+03]
------
Step:43, Action:East
State  195
Old Q Values:  [  38.85388605 3634.92856112 4484.55222422 1101.59744825]
New Q values:  [  38.85388605 3634.92856112 2602.16725854 1101.59744825]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1964.94670718 2592.53900452 2696.48789617]
------
Step:44, Action:West
State  208
Old Q Values:  [42970.10055234 62512.2759291   1612.00333505  6499.45549937]
New Q values:  [42970.10055234 62512.2759291   1612.00333505  3806.42881862]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4024.15539624 -4582.3674281   1099.96026581]
------
Step:45, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.91500273e+04 2.90066664e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 9.35117497e+03 2.90066664e+04 2.91043938e+03]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
x g x
x a x
xxxxx
Step:46, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3844.8676994   5639.21349339]
New Q values:  [-2527.46239811 -8521.23367799  3844.8676994   2757.47130276]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1674.61968469   26.73544252 1015.37398901  -35.88578819]
------
Step:47, Action:North
State  261
Old Q Values:  [1674.61968469   26.73544252 1015.37398901  -35.88578819]
New Q values:  [2065.83175937   26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  3
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1210.09738511  748.78287005 4655.2796183     0.        ]
------
Step:48, Action:East
State  183
Old Q Values:  [1210.09738511  748.78287005 4655.2796183     0.        ]
New Q values:  [1210.09738511  748.78287005 2886.81444369    0.        ]
Reward: -1  Episode Reward:  2
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  3.41767532e+03  1.20371620e+03]
------
Step:49, Action:East
State  195
Old Q Values:  [  38.85388605 3634.92856112 2602.16725854 1101.59744825]
New Q values:  [  38.85388605 3634.92856112 1849.21327227 1101.59744825]
Reward: -1  Episode Reward:  1
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1964.94670718 2592.53900452 2696.48789617]
------
Step:50, Action:West
State  210
Old Q Values:  [2063.26357071 1964.94670718 2592.53900452 2696.48789617]
New Q values:  [2063.26357071 1964.94670718 2592.53900452 2103.29775483]
Reward: -1  Episode Reward:  0
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  3.41767532e+03  1.20371620e+03]
------
Step:51, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.71713655e+03  3.41767532e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  2.71713655e+03  2.14423183e+03  1.20371620e+03]
Reward: -1  Episode Reward:  -1
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1964.94670718 2592.53900452 2103.29775483]
------
Step:52, Action:East
State  210
Old Q Values:  [2063.26357071 1964.94670718 2592.53900452 2103.29775483]
New Q values:  [2063.26357071 1964.94670718 1634.17730316 2103.29775483]
Reward: -301  Episode Reward:  -302
xxxxx
x ..x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1964.94670718 1634.17730316 2103.29775483]
------
Step:53, Action:West
State  210
Old Q Values:  [2063.26357071 1964.94670718 1634.17730316 2103.29775483]
New Q values:  [2063.26357071 1964.94670718 1634.17730316 1655.86006693]
Reward: -1  Episode Reward:  -303
xxxxx
x ..x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  2.14423183e+03  1.20371620e+03]
------
Step:54, Action:South
State  192
Old Q Values:  [3.89777037e-01 9.35117497e+03 2.90066664e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 4.89333030e+03 2.90066664e+04 2.91043938e+03]
Reward: -1  Episode Reward:  -304
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3844.8676994   2757.47130276]
------
Step:55, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3844.8676994   2757.47130276]
New Q values:  [-2527.46239811 -8521.23367799  2657.89826397  2757.47130276]
Reward: -1  Episode Reward:  -305
xxxxx
xg..x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3735.17061404 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:56, Action:North
State  288
Old Q Values:  [ 3735.17061404 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [20247.15102434 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  -306
xxxxx
x g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[42970.10055234 62512.2759291   1612.00333505  3806.42881862]
------
Step:57, Action:South
State  208
Old Q Values:  [42970.10055234 62512.2759291   1612.00333505  3806.42881862]
New Q values:  [42970.10055234 31078.45567894  1612.00333505  3806.42881862]
Reward: -1  Episode Reward:  -307
xxxxx
xg..x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20247.15102434 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:58, Action:North
State  288
Old Q Values:  [20247.15102434 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [20989.29057544 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  -308
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[42970.10055234 31078.45567894  1612.00333505  3806.42881862]
------
Step:59, Action:North
State  208
Old Q Values:  [42970.10055234 31078.45567894  1612.00333505  3806.42881862]
New Q values:  [43613.60972615 31078.45567894  1612.00333505  3806.42881862]
Reward: 9  Episode Reward:  -299
xxxxx
x .ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237  7813.20854799  -180.00807518 88067.23168404]
------
Step:60, Action:West
State  130
Old Q Values:  [46177.80406237  7813.20854799  -180.00807518 88067.23168404]
New Q values:  [ 46177.80406237   7813.20854799   -180.00807518 115965.09693319]
Reward: 100009  Episode Reward:  99710
xxxxx
x a x
x   x
x g x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.89333030e+03 2.90066664e+04 2.91043938e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [  62.8218634  3774.29557812 1367.75973915 1141.49622464]
New Q values:  [  62.8218634  3774.29557812 1457.52629085 1141.49622464]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1364.68806392  2501.82066656 -8896.20691497  3016.74131729]
------
Step:2, Action:West
State  208
Old Q Values:  [43613.60972615 31078.45567894  1612.00333505  3806.42881862]
New Q values:  [43613.60972615 31078.45567894  1612.00333505  4223.97143261]
Reward: -10001  Episode Reward:  -9992
xxxxx
x ..x
x.g x
x...x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2657.89826397  2757.47130276]
------
Step:1, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  6847.15715551  1094.19753196]
New Q values:  [   16.82637525 -5807.06396197  6847.15715551  1560.1249599 ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  379.10515826 -2735.46306511  3723.48649037 -6102.86502307]
------
Step:2, Action:East
State  260
Old Q Values:  [  379.10515826 -2735.46306511  3723.48649037 -6102.86502307]
New Q values:  [  379.10515826 -2735.46306511  2316.03598698 -6102.86502307]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2657.89826397  2757.47130276]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2657.89826397  2757.47130276]
New Q values:  [-2527.46239811 -8521.23367799  7365.34647822  2757.47130276]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20989.29057544 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:4, Action:North
State  288
Old Q Values:  [20989.29057544 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [21485.19914802 -6442.16912869 -8192.20126966  3218.24707707]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[43613.60972615 31078.45567894  1612.00333505  4223.97143261]
------
Step:5, Action:North
State  208
Old Q Values:  [43613.60972615 31078.45567894  1612.00333505  4223.97143261]
New Q values:  [52240.37297042 31078.45567894  1612.00333505  4223.97143261]
Reward: 9  Episode Reward:  35
xxxxx
x .ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 46177.80406237   7813.20854799   -180.00807518 115965.09693319]
------
Step:6, Action:West
State  136
Old Q Values:  [ -170.77177351  8562.11046594 -2383.80019164   118.77295748]
New Q values:  [ -170.77177351  8562.11046594 -2383.80019164   385.09778124]
Reward: 9  Episode Reward:  44
xxxxx
xga x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1107.29532748   629.76407269]
------
Step:7, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1107.29532748   629.76407269]
New Q values:  [-9594.56523706 -8069.05606225  3010.95127078   629.76407269]
Reward: -1  Episode Reward:  43
xxxxx
x gax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  8562.11046594 -2383.80019164   385.09778124]
------
Step:8, Action:South
State  136
Old Q Values:  [ -170.77177351  8562.11046594 -2383.80019164   385.09778124]
New Q values:  [ -170.77177351 19096.3560775  -2383.80019164   385.09778124]
Reward: -1  Episode Reward:  42
xxxxx
x  gx
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[52240.37297042 31078.45567894  1612.00333505  4223.97143261]
------
Step:9, Action:South
State  208
Old Q Values:  [52240.37297042 31078.45567894  1612.00333505  4223.97143261]
New Q values:  [52240.37297042 18876.34201598  1612.00333505  4223.97143261]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21485.19914802 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:10, Action:North
State  288
Old Q Values:  [21485.19914802 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [24265.59155033 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  40
xxxxx
x  gx
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[52240.37297042 18876.34201598  1612.00333505  4223.97143261]
------
Step:11, Action:South
State  208
Old Q Values:  [52240.37297042 18876.34201598  1612.00333505  4223.97143261]
New Q values:  [52240.37297042 14829.61427149  1612.00333505  4223.97143261]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[24265.59155033 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:12, Action:North
State  288
Old Q Values:  [24265.59155033 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [25377.74851126 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  38
xxxxx
x  gx
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[52240.37297042 14829.61427149  1612.00333505  4223.97143261]
------
Step:13, Action:South
State  208
Old Q Values:  [52240.37297042 14829.61427149  1612.00333505  4223.97143261]
New Q values:  [52240.37297042 13544.57026197  1612.00333505  4223.97143261]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[25377.74851126 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:14, Action:North
State  288
Old Q Values:  [25377.74851126 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [25822.61129563 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  36
xxxxx
xg  x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[52240.37297042 13544.57026197  1612.00333505  4223.97143261]
------
Step:15, Action:North
State  208
Old Q Values:  [52240.37297042 13544.57026197  1612.00333505  4223.97143261]
New Q values:  [21418.03290665 13544.57026197  1612.00333505  4223.97143261]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.74161239e+03 -3.22965309e-01  1.43709524e+03]
------
Step:16, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.74161239e+03 -3.22965309e-01  1.43709524e+03]
New Q values:  [ 7.64171987e+01  7.12145483e+03 -3.22965309e-01  1.43709524e+03]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21418.03290665 13544.57026197  1612.00333505  4223.97143261]
------
Step:17, Action:North
State  208
Old Q Values:  [21418.03290665 13544.57026197  1612.00333505  4223.97143261]
New Q values:  [10703.04961166 13544.57026197  1612.00333505  4223.97143261]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  7.12145483e+03 -3.22965309e-01  1.43709524e+03]
------
Step:18, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  7.12145483e+03 -3.22965309e-01  1.43709524e+03]
New Q values:  [ 7.64171987e+01  6.91135301e+03 -3.22965309e-01  1.43709524e+03]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10703.04961166 13544.57026197  1612.00333505  4223.97143261]
------
Step:19, Action:South
State  208
Old Q Values:  [10703.04961166 13544.57026197  1612.00333505  4223.97143261]
New Q values:  [10703.04961166 13164.01149348  1612.00333505  4223.97143261]
Reward: -1  Episode Reward:  31
xxxxx
x g x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[25822.61129563 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:20, Action:North
State  288
Old Q Values:  [25822.61129563 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [14277.64796629 -6442.16912869 -8192.20126966  3218.24707707]
Reward: -1  Episode Reward:  30
xxxxx
x  gx
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10703.04961166 13164.01149348  1612.00333505  4223.97143261]
------
Step:21, Action:South
State  208
Old Q Values:  [10703.04961166 13164.01149348  1612.00333505  4223.97143261]
New Q values:  [10703.04961166  9548.29898728  1612.00333505  4223.97143261]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14277.64796629 -6442.16912869 -8192.20126966  3218.24707707]
------
Step:22, Action:West
State  288
Old Q Values:  [14277.64796629 -6442.16912869 -8192.20126966  3218.24707707]
New Q values:  [14277.64796629 -6442.16912869 -8192.20126966  3678.21749145]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x.. x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 4644.37724943 7971.72886875]
------
Step:23, Action:West
State  273
Old Q Values:  [ 870.35122762 -168.92307549 4644.37724943 7971.72886875]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 12558.29110015]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[31233.99850883  2256.66526474 18002.31459755  1875.31501677]
------
Step:24, Action:North
State  261
Old Q Values:  [2065.83175937   26.73544252 1015.37398901  -35.88578819]
New Q values:  [1692.35406108   26.73544252 1015.37398901  -35.88578819]
Reward: 9  Episode Reward:  36
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1284.94836875  456.45589161 2868.73785778  -30.99112081]
------
Step:25, Action:East
State  177
Old Q Values:  [24286.95305781 19270.74827372 39114.21687821     0.        ]
New Q values:  [24286.95305781 19270.74827372 76858.33337016     0.        ]
Reward: 100009  Episode Reward:  100045
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1692.35406108   26.73544252 1015.37398901  -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [1692.35406108   26.73544252 1015.37398901  -35.88578819]
New Q values:  [1548.38595754   26.73544252 1015.37398901  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1210.09738511  748.78287005 2886.81444369    0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [1284.94836875  456.45589161 2868.73785778  -30.99112081]
New Q values:  [1284.94836875  456.45589161 3854.89504828  -30.99112081]
Reward: -9991  Episode Reward:  -9982
xxxxx
x...x
x g.x
x . x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  6.91135301e+03 -3.22965309e-01  1.43709524e+03]
------
Step:1, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  6.91135301e+03 -3.22965309e-01  1.43709524e+03]
New Q values:  [ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  1.43709524e+03]
Reward: -9991  Episode Reward:  -9991
xxxxx
x.. x
x. gx
x...x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10703.04961166  9548.29898728  1612.00333505  4223.97143261]
------
Step:1, Action:North
State  208
Old Q Values:  [10703.04961166  9548.29898728  1612.00333505  4223.97143261]
New Q values:  [4015.52666791 9548.29898728 1612.00333505 4223.97143261]
Reward: -9991  Episode Reward:  -9991
xxxxx
x. gx
x.. x
x...x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1842.28586852  442.24659409 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 3616.98064511  581.22500173    0.        ]
New Q values:  [ 221.30610858 2318.23659115  581.22500173    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1210.09738511  748.78287005 2886.81444369    0.        ]
------
Step:2, Action:East
State  181
Old Q Values:  [1284.94836875  456.45589161 3854.89504828  -30.99112081]
New Q values:  [1284.94836875  456.45589161 2754.60463818  -30.99112081]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4024.15539624 -4582.3674281   1099.96026581]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144  3472.86346065  2474.1699074    239.04887894]
New Q values:  [-2469.90645144  3442.69253091  2474.1699074    239.04887894]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  6847.15715551  1560.1249599 ]
------
Step:4, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7365.34647822  2757.47130276]
New Q values:  [-2527.46239811 -8521.23367799  7234.83298118  2757.47130276]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14277.64796629 -6442.16912869 -8192.20126966  3678.21749145]
------
Step:5, Action:North
State  288
Old Q Values:  [14277.64796629 -6442.16912869 -8192.20126966  3678.21749145]
New Q values:  [ 6335.43825773 -6442.16912869 -8192.20126966  3678.21749145]
Reward: 9  Episode Reward:  35
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2063.26357071 1964.94670718 1634.17730316 1655.86006693]
------
Step:6, Action:North
State  210
Old Q Values:  [2063.26357071 1964.94670718 1634.17730316 1655.86006693]
New Q values:  [35620.23450824  1964.94670718  1634.17730316  1655.86006693]
Reward: 9  Episode Reward:  44
xxxxx
x .ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 46177.80406237   7813.20854799   -180.00807518 115965.09693319]
------
Step:7, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  1.43709524e+03]
New Q values:  [ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  9.17066670e+02]
Reward: 9  Episode Reward:  53
xxxxx
x a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1122.7619096    623.94720441]
------
Step:8, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2277.99090771   472.50407643]
New Q values:  [ -253.44886264 -1902.20915811  1185.71636417   472.50407643]
Reward: -1  Episode Reward:  52
xxxxx
x  ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  9.17066670e+02]
------
Step:9, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  9.17066670e+02]
New Q values:  [ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  7.21941577e+02]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1185.71636417   472.50407643]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1122.7619096    623.94720441]
New Q values:  [ -281.736      -1150.91067548   665.08723705   623.94720441]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  7.21941577e+02]
------
Step:11, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  7.21941577e+02]
New Q values:  [ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  6.43891540e+02]
Reward: -1  Episode Reward:  49
xxxxx
x a x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1185.71636417   472.50407643]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   665.08723705   623.94720441]
New Q values:  [ -281.736      -1150.91067548   458.60235688   623.94720441]
Reward: -1  Episode Reward:  48
xxxxx
x  ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  6.43891540e+02]
------
Step:13, Action:West
State  136
Old Q Values:  [ -170.77177351 19096.3560775  -2383.80019164   385.09778124]
New Q values:  [ -170.77177351 19096.3560775  -2383.80019164 -4943.27550627]
Reward: -10001  Episode Reward:  -9953
xxxxx
x g x
x   x
x.  x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
Step:1, Action:West
State  181
Old Q Values:  [1284.94836875  456.45589161 2754.60463818  -30.99112081]
New Q values:  [1284.94836875  456.45589161 2754.60463818  633.38494313]
Reward: -301  Episode Reward:  -301
xxxxx
x...x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1284.94836875  456.45589161 2754.60463818  633.38494313]
------
Step:2, Action:East
State  181
Old Q Values:  [1284.94836875  456.45589161 2754.60463818  633.38494313]
New Q values:  [1284.94836875  456.45589161 3809.24176044  633.38494313]
Reward: -9991  Episode Reward:  -10292
xxxxx
x...x
x g.x
x.. x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1210.09738511  748.78287005 2886.81444369    0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [1210.09738511  748.78287005 2886.81444369    0.        ]
New Q values:  [1210.09738511  748.78287005 1975.26674247    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  2.14423183e+03  1.20371620e+03]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.89333030e+03 2.90066664e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 4.89333030e+03 1.44725562e+04 2.91043938e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[4015.52666791 9548.29898728 1612.00333505 4223.97143261]
------
Step:3, Action:South
State  208
Old Q Values:  [4015.52666791 9548.29898728 1612.00333505 4223.97143261]
New Q values:  [4015.52666791 5719.35107223 1612.00333505 4223.97143261]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6335.43825773 -6442.16912869 -8192.20126966  3678.21749145]
------
Step:4, Action:North
State  288
Old Q Values:  [ 6335.43825773 -6442.16912869 -8192.20126966  3678.21749145]
New Q values:  [ 4249.38062476 -6442.16912869 -8192.20126966  3678.21749145]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[4015.52666791 5719.35107223 1612.00333505 4223.97143261]
------
Step:5, Action:South
State  208
Old Q Values:  [4015.52666791 5719.35107223 1612.00333505 4223.97143261]
New Q values:  [4015.52666791 3561.95461632 1612.00333505 4223.97143261]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4249.38062476 -6442.16912869 -8192.20126966  3678.21749145]
------
Step:6, Action:West
State  288
Old Q Values:  [ 4249.38062476 -6442.16912869 -8192.20126966  3678.21749145]
New Q values:  [ 4249.38062476 -6442.16912869 -8192.20126966  3647.13689093]
Reward: 9  Episode Reward:  24
xxxxx
x...x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  7234.83298118  2757.47130276]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  7234.83298118  2757.47130276]
New Q values:  [-2527.46239811 -8521.23367799  4168.1473799   2757.47130276]
Reward: -1  Episode Reward:  23
xxxxx
x.g.x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4249.38062476 -6442.16912869 -8192.20126966  3647.13689093]
------
Step:8, Action:North
State  288
Old Q Values:  [ 4249.38062476 -6442.16912869 -8192.20126966  3647.13689093]
New Q values:  [ 2966.34367969 -6442.16912869 -8192.20126966  3647.13689093]
Reward: -1  Episode Reward:  22
xxxxx
x..gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[4015.52666791 3561.95461632 1612.00333505 4223.97143261]
------
Step:9, Action:West
State  208
Old Q Values:  [4015.52666791 3561.95461632 1612.00333505 4223.97143261]
New Q values:  [4015.52666791 3561.95461632 1612.00333505 2896.23519192]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4024.15539624 -4582.3674281   1099.96026581]
------
Step:10, Action:South
State  192
Old Q Values:  [3.89777037e-01 4.89333030e+03 1.44725562e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 3.20717633e+03 1.44725562e+04 2.91043938e+03]
Reward: -1  Episode Reward:  20
xxxxx
x...x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4168.1473799   2757.47130276]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4168.1473799   2757.47130276]
New Q values:  [-2527.46239811 -8521.23367799  2760.80001924  2757.47130276]
Reward: -1  Episode Reward:  19
xxxxx
x...x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2966.34367969 -6442.16912869 -8192.20126966  3647.13689093]
------
Step:12, Action:West
State  288
Old Q Values:  [ 2966.34367969 -6442.16912869 -8192.20126966  3647.13689093]
New Q values:  [ 2966.34367969 -6442.16912869 -8192.20126966  2286.49476215]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2760.80001924  2757.47130276]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2760.80001924  2757.47130276]
New Q values:  [-2527.46239811 -8521.23367799  1993.6231116   2757.47130276]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2966.34367969 -6442.16912869 -8192.20126966  2286.49476215]
------
Step:14, Action:North
State  288
Old Q Values:  [ 2966.34367969 -6442.16912869 -8192.20126966  2286.49476215]
New Q values:  [ 2390.59547225 -6442.16912869 -8192.20126966  2286.49476215]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[4015.52666791 3561.95461632 1612.00333505 2896.23519192]
------
Step:15, Action:North
State  210
Old Q Values:  [35620.23450824  1964.94670718  1634.17730316  1655.86006693]
New Q values:  [49043.02288325  1964.94670718  1634.17730316  1655.86006693]
Reward: 9  Episode Reward:  25
xxxxx
x..ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 46177.80406237   7813.20854799   -180.00807518 115965.09693319]
------
Step:16, Action:West
State  130
Old Q Values:  [ 46177.80406237   7813.20854799   -180.00807518 115965.09693319]
New Q values:  [46177.80406237  7813.20854799  -180.00807518 67124.24303285]
Reward: 9  Episode Reward:  34
xxxxx
x.a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  28498.32154925 69109.34753192]
------
Step:17, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3010.95127078   629.76407269]
New Q values:  [-9594.56523706 -8069.05606225  3010.95127078   805.60895034]
Reward: 9  Episode Reward:  43
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        1827.67773756   65.14560537    0.        ]
------
Step:18, Action:South
State  104
Old Q Values:  [-8652.84        6868.26603854   579.78600099 -8652.84      ]
New Q values:  [-8652.84        6965.13445598   579.78600099 -8652.84      ]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[  162.71210215     0.         14061.4268019      0.        ]
------
Step:19, Action:East
State  185
Old Q Values:  [  476.02888691     0.         -2755.05797698  -178.98      ]
New Q values:  [ 476.02888691    0.           29.66548264 -178.98      ]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  3774.29557812 1457.52629085 1141.49622464]
------
Step:20, Action:South
State  200
Old Q Values:  [  62.8218634  3774.29557812 1457.52629085 1141.49622464]
New Q values:  [  62.8218634  2336.35962208 1457.52629085 1141.49622464]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1993.6231116   2757.47130276]
------
Step:21, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1993.6231116   2757.47130276]
New Q values:  [-2527.46239811 -8521.23367799  1993.6231116  71261.9603611 ]
Reward: 100009  Episode Reward:  100049
xxxxx
xg  x
x   x
xa  x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  379.10515826 -2735.46306511  2316.03598698 -6102.86502307]
------
Step:1, Action:East
State  260
Old Q Values:  [  379.10515826 -2735.46306511  2316.03598698 -6102.86502307]
New Q values:  [  379.10515826 -2735.46306511  2985.96154145 -6102.86502307]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  6847.15715551  1560.1249599 ]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1993.6231116  71261.9603611 ]
New Q values:  [-2527.46239811 -8521.23367799  1520.02788632 71261.9603611 ]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2390.59547225 -6442.16912869 -8192.20126966  2286.49476215]
------
Step:3, Action:North
State  288
Old Q Values:  [ 2390.59547225 -6442.16912869 -8192.20126966  2286.49476215]
New Q values:  [ 2166.29618927 -6442.16912869 -8192.20126966  2286.49476215]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[4015.52666791 3561.95461632 1612.00333505 2896.23519192]
------
Step:4, Action:North
State  208
Old Q Values:  [4015.52666791 3561.95461632 1612.00333505 2896.23519192]
New Q values:  [7340.51749042 3561.95461632 1612.00333505 2896.23519192]
Reward: 9  Episode Reward:  36
xxxxx
xg.ax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 19096.3560775  -2383.80019164 -4943.27550627]
------
Step:5, Action:South
State  130
Old Q Values:  [46177.80406237  7813.20854799  -180.00807518 67124.24303285]
New Q values:  [46177.80406237  5326.83866632  -180.00807518 67124.24303285]
Reward: -1  Episode Reward:  35
xxxxx
x . x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[7340.51749042 3561.95461632 1612.00333505 2896.23519192]
------
Step:6, Action:North
State  208
Old Q Values:  [7340.51749042 3561.95461632 1612.00333505 2896.23519192]
New Q values:  [23072.87990602  3561.95461632  1612.00333505  2896.23519192]
Reward: -1  Episode Reward:  34
xxxxx
x .ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237  5326.83866632  -180.00807518 67124.24303285]
------
Step:7, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  6.43891540e+02]
New Q values:  [ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  4.50140777e+02]
Reward: 9  Episode Reward:  43
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   458.60235688   623.94720441]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   458.60235688   623.94720441]
New Q values:  [ -281.736      -1150.91067548   458.60235688   669.34266395]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253 1401.2126073  -252.78192178]
------
Step:9, Action:East
State  105
Old Q Values:  [-180.6        1827.67773756   65.14560537    0.        ]
New Q values:  [ -180.6         1827.67773756 -5071.25637662     0.        ]
Reward: -10001  Episode Reward:  -9959
xxxxx
x g x
x.. x
x   x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4024.15539624 -4582.3674281   1099.96026581]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.20717633e+03 1.44725562e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 2.26668586e+04 1.44725562e+04 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1520.02788632 71261.9603611 ]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1520.02788632 71261.9603611 ]
New Q values:  [-2527.46239811 -8521.23367799  1520.02788632 29405.97260687]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  379.10515826 -2735.46306511  2985.96154145 -6102.86502307]
------
Step:3, Action:East
State  257
Old Q Values:  [31233.99850883  2256.66526474 18002.31459755  1875.31501677]
New Q values:  [31233.99850883  2256.66526474 16022.11762108  1875.31501677]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1520.02788632 29405.97260687]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1520.02788632 29405.97260687]
New Q values:  [-2527.46239811 -8521.23367799  1520.02788632 12226.30483001]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1548.38595754   26.73544252 1015.37398901  -35.88578819]
------
Step:5, Action:North
State  260
Old Q Values:  [  379.10515826 -2735.46306511  2985.96154145 -6102.86502307]
New Q values:  [-2510.40193244 -2735.46306511  2985.96154145 -6102.86502307]
Reward: -9991  Episode Reward:  -9975
xxxxx
x.. x
xg .x
x  .x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  4.50140777e+02]
------
Step:1, Action:West
State  136
Old Q Values:  [ -170.77177351 19096.3560775  -2383.80019164 -4943.27550627]
New Q values:  [ -170.77177351 19096.3560775  -2383.80019164 -1068.62482128]
Reward: 9  Episode Reward:  9
xxxxx
xga x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  3010.95127078   805.60895034]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3010.95127078   805.60895034]
New Q values:  [-9594.56523706 -8069.05606225  6932.68733156   805.60895034]
Reward: -1  Episode Reward:  8
xxxxx
x.gax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 19096.3560775  -2383.80019164 -1068.62482128]
------
Step:3, Action:South
State  138
Old Q Values:  [ 7.64171987e+01 -1.91439123e+01 -3.22965309e-01  4.50140777e+02]
New Q values:  [ 7.64171987e+01  9.02764830e+02 -3.22965309e-01  4.50140777e+02]
Reward: 9  Episode Reward:  17
xxxxx
x.  x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1364.68806392  2501.82066656 -8896.20691497  3016.74131729]
------
Step:4, Action:South
State  210
Old Q Values:  [49043.02288325  1964.94670718  1634.17730316  1655.86006693]
New Q values:  [49043.02288325  1477.32711152  1634.17730316  1655.86006693]
Reward: 9  Episode Reward:  26
xxxxx
x.  x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2166.29618927 -6442.16912869 -8192.20126966  2286.49476215]
------
Step:5, Action:North
State  288
Old Q Values:  [ 2166.29618927 -6442.16912869 -8192.20126966  2286.49476215]
New Q values:  [ 1770.9408709  -6442.16912869 -8192.20126966  2286.49476215]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1364.68806392  2501.82066656 -8896.20691497  3016.74131729]
------
Step:6, Action:South
State  210
Old Q Values:  [49043.02288325  1477.32711152  1634.17730316  1655.86006693]
New Q values:  [49043.02288325  1276.27927325  1634.17730316  1655.86006693]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1770.9408709  -6442.16912869 -8192.20126966  2286.49476215]
------
Step:7, Action:North
State  288
Old Q Values:  [ 1770.9408709  -6442.16912869 -8192.20126966  2286.49476215]
New Q values:  [ 1612.79874355 -6442.16912869 -8192.20126966  2286.49476215]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1364.68806392  2501.82066656 -8896.20691497  3016.74131729]
------
Step:8, Action:South
State  208
Old Q Values:  [23072.87990602  3561.95461632  1612.00333505  2896.23519192]
New Q values:  [23072.87990602  2110.13027517  1612.00333505  2896.23519192]
Reward: -1  Episode Reward:  22
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1612.79874355 -6442.16912869 -8192.20126966  2286.49476215]
------
Step:9, Action:West
State  288
Old Q Values:  [ 1612.79874355 -6442.16912869 -8192.20126966  2286.49476215]
New Q values:  [ 1612.79874355 -6442.16912869 -8192.20126966  4687.4852349 ]
Reward: 9  Episode Reward:  31
xxxxx
x. gx
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4644.37724943 12558.29110015]
------
Step:10, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 12558.29110015]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 14398.91599271]
Reward: 9  Episode Reward:  40
xxxxx
x.  x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[31233.99850883  2256.66526474 16022.11762108  1875.31501677]
------
Step:11, Action:North
State  257
Old Q Values:  [31233.99850883  2256.66526474 16022.11762108  1875.31501677]
New Q values:  [23083.06886193  2256.66526474 16022.11762108  1875.31501677]
Reward: -1  Episode Reward:  39
xxxxx
x.  x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         16101.90751562 35300.23152799     0.        ]
------
Step:12, Action:East
State  177
Old Q Values:  [24286.95305781 19270.74827372 76858.33337016     0.        ]
New Q values:  [24286.95305781 19270.74827372 31955.97996693     0.        ]
Reward: 9  Episode Reward:  48
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4024.15539624 -4582.3674281   1099.96026581]
------
Step:13, Action:South
State  195
Old Q Values:  [  38.85388605 3634.92856112 1849.21327227 1101.59744825]
New Q values:  [  38.85388605 5773.04622226 1849.21327227 1101.59744825]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4644.37724943 14398.91599271]
------
Step:14, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 14398.91599271]
New Q values:  [  870.35122762  -168.92307549  4644.37724943 12683.88705566]
Reward: -1  Episode Reward:  46
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[23083.06886193  2256.66526474 16022.11762108  1875.31501677]
------
Step:15, Action:North
State  257
Old Q Values:  [23083.06886193  2256.66526474 16022.11762108  1875.31501677]
New Q values:  [18819.42153485  2256.66526474 16022.11762108  1875.31501677]
Reward: -1  Episode Reward:  45
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[24286.95305781 19270.74827372 31955.97996693     0.        ]
------
Step:16, Action:North
State  177
Old Q Values:  [24286.95305781 19270.74827372 31955.97996693     0.        ]
New Q values:  [80726.48618428 19270.74827372 31955.97996693     0.        ]
Reward: 100009  Episode Reward:  100054
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  9.02764830e+02 -3.22965309e-01  4.50140777e+02]
------
Step:1, Action:South
State  136
Old Q Values:  [ -170.77177351 19096.3560775  -2383.80019164 -1068.62482128]
New Q values:  [ -170.77177351  8548.96482619 -2383.80019164 -1068.62482128]
Reward: 9  Episode Reward:  9
xxxxx
xg. x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1364.68806392  2501.82066656 -8896.20691497  3016.74131729]
------
Step:2, Action:West
State  216
Old Q Values:  [ 1364.68806392  2501.82066656 -8896.20691497  3016.74131729]
New Q values:  [ 1364.68806392  2501.82066656 -8896.20691497  8012.15411939]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.26668586e+04 1.44725562e+04 2.91043938e+03]
------
Step:3, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.26668586e+04 1.44725562e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.27400349e+04 1.44725562e+04 2.91043938e+03]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1520.02788632 12226.30483001]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1520.02788632 12226.30483001]
New Q values:  [-2527.46239811 -8521.23367799  1520.02788632  5360.43771927]
Reward: 9  Episode Reward:  36
xxxxx
x.. x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1548.38595754   26.73544252 1015.37398901  -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [1548.38595754   26.73544252 1015.37398901  -35.88578819]
New Q values:  [24836.7002383     26.73544252  1015.37398901   -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x.. x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[80726.48618428 19270.74827372 31955.97996693     0.        ]
------
Step:6, Action:North
State  181
Old Q Values:  [1284.94836875  456.45589161 3809.24176044  633.38494313]
New Q values:  [1214.85032484  456.45589161 3809.24176044  633.38494313]
Reward: 9  Episode Reward:  44
xxxxx
xa. x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2318.23659115  581.22500173    0.        ]
------
Step:7, Action:South
State  103
Old Q Values:  [ 221.30610858 2318.23659115  581.22500173    0.        ]
New Q values:  [ 221.30610858 2069.46716459  581.22500173    0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x . x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1214.85032484  456.45589161 3809.24176044  633.38494313]
------
Step:8, Action:North
State  180
Old Q Values:  [  283.82862458  1371.51970183 11108.52001417 -4966.32149798]
New Q values:  [  438.58725668  1371.51970183 11108.52001417 -4966.32149798]
Reward: -1  Episode Reward:  42
xxxxx
xa. x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        1085.51935617    5.16       -180.6       ]
------
Step:9, Action:East
State  110
Old Q Values:  [-239.29051573 1667.66026202  380.14967232 -180.6       ]
New Q values:  [-239.29051573 1667.66026202  826.09351776 -180.6       ]
Reward: 9  Episode Reward:  51
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 2228.77882944  927.83230218]
------
Step:10, Action:East
State  126
Old Q Values:  [   0.          331.64678262 2228.77882944  927.83230218]
New Q values:  [   0.          331.64678262 1161.74098086  927.83230218]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  9.02764830e+02 -3.22965309e-01  4.50140777e+02]
------
Step:11, Action:South
State  130
Old Q Values:  [46177.80406237  5326.83866632  -180.00807518 67124.24303285]
New Q values:  [46177.80406237 16843.0423315   -180.00807518 67124.24303285]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[49043.02288325  1276.27927325  1634.17730316  1655.86006693]
------
Step:12, Action:North
State  218
Old Q Values:  [3018.49225087 3567.55356402    0.          852.63459632]
New Q values:  [1477.62634943 3567.55356402    0.          852.63459632]
Reward: -1  Episode Reward:  48
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  9.02764830e+02 -3.22965309e-01  4.50140777e+02]
------
Step:13, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  9.02764830e+02 -3.22965309e-01  4.50140777e+02]
New Q values:  [ 7.64171987e+01  2.76415217e+03 -3.22965309e-01  4.50140777e+02]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1364.68806392  2501.82066656 -8896.20691497  8012.15411939]
------
Step:14, Action:South
State  208
Old Q Values:  [23072.87990602  2110.13027517  1612.00333505  2896.23519192]
New Q values:  [23072.87990602 62255.69768054  1612.00333505  2896.23519192]
Reward: 100009  Episode Reward:  100056
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.76415217e+03 -3.22965309e-01  4.50140777e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.76415217e+03 -3.22965309e-01  4.50140777e+02]
New Q values:  [ 7.64171987e+01  2.76415217e+03 -3.22965309e-01  3.86259110e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   458.60235688   669.34266395]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1185.71636417   472.50407643]
New Q values:  [ -253.44886264 -1902.20915811  1185.71636417   747.08739113]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1842.28586852  442.24659409 -120.29354603]
------
Step:3, Action:South
State  109
Old Q Values:  [-241.10880094 1557.65803636   -8.57207238 -180.6       ]
New Q values:  [-241.10880094 1267.88921276   -8.57207238 -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x  gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346  727.49464849 2131.41999405  154.04646645]
------
Step:4, Action:East
State  189
Old Q Values:  [ 422.44659346  727.49464849 2131.41999405  154.04646645]
New Q values:  [ 422.44659346  727.49464849 1558.87588424  154.04646645]
Reward: 9  Episode Reward:  36
xxxxx
x g x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  2336.35962208 1457.52629085 1141.49622464]
------
Step:5, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  1.13045176e+04 -5.74006972e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  8.33237316e+03 -5.74006972e+03  2.00341972e+02]
Reward: 9  Episode Reward:  45
xxxxx
x  gx
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549  4644.37724943 12683.88705566]
------
Step:6, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  6847.15715551  1560.1249599 ]
New Q values:  [   16.82637525 -5807.06396197  6847.15715551  8080.46005545]
Reward: 9  Episode Reward:  54
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[24836.7002383     26.73544252  1015.37398901   -35.88578819]
------
Step:7, Action:North
State  261
Old Q Values:  [24836.7002383     26.73544252  1015.37398901   -35.88578819]
New Q values:  [10401.74286059    26.73544252  1015.37398901   -35.88578819]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346  727.49464849 1558.87588424  154.04646645]
------
Step:8, Action:South
State  189
Old Q Values:  [ 422.44659346  727.49464849 1558.87588424  154.04646645]
New Q values:  [ 422.44659346 3410.92071757 1558.87588424  154.04646645]
Reward: -1  Episode Reward:  52
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[10401.74286059    26.73544252  1015.37398901   -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [10401.74286059    26.73544252  1015.37398901   -35.88578819]
New Q values:  [5183.37335951   26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  51
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346 3410.92071757 1558.87588424  154.04646645]
------
Step:10, Action:South
State  189
Old Q Values:  [ 422.44659346 3410.92071757 1558.87588424  154.04646645]
New Q values:  [ 422.44659346 2918.78029488 1558.87588424  154.04646645]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5183.37335951   26.73544252 1015.37398901  -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [5183.37335951   26.73544252 1015.37398901  -35.88578819]
New Q values:  [2948.38343227   26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  49
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346 2918.78029488 1558.87588424  154.04646645]
------
Step:12, Action:South
State  189
Old Q Values:  [ 422.44659346 2918.78029488 1558.87588424  154.04646645]
New Q values:  [ 422.44659346 2051.42714763 1558.87588424  154.04646645]
Reward: -1  Episode Reward:  48
xxxxx
x  gx
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2948.38343227   26.73544252 1015.37398901  -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [2948.38343227   26.73544252 1015.37398901  -35.88578819]
New Q values:  [1794.1815172    26.73544252 1015.37398901  -35.88578819]
Reward: -1  Episode Reward:  47
xxxxx
x g x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346 2051.42714763 1558.87588424  154.04646645]
------
Step:14, Action:South
State  188
Old Q Values:  [-6523.78898263  3466.44217895  2014.57249182     0.        ]
New Q values:  [-6523.78898263  2281.76533402  2014.57249182     0.        ]
Reward: -1  Episode Reward:  46
xxxxx
xg  x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2510.40193244 -2735.46306511  2985.96154145 -6102.86502307]
------
Step:15, Action:East
State  260
Old Q Values:  [-2510.40193244 -2735.46306511  2985.96154145 -6102.86502307]
New Q values:  [-2510.40193244 -2735.46306511  3617.92263321 -6102.86502307]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xg  x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  6847.15715551  8080.46005545]
------
Step:16, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  6847.15715551  8080.46005545]
New Q values:  [   16.82637525 -5807.06396197  6847.15715551  4316.96081214]
Reward: -1  Episode Reward:  44
xxxxx
xg  x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2510.40193244 -2735.46306511  3617.92263321 -6102.86502307]
------
Step:17, Action:East
State  261
Old Q Values:  [1794.1815172    26.73544252 1015.37398901  -35.88578819]
New Q values:  [1794.1815172    26.73544252 2459.69674226  -35.88578819]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  6847.15715551  4316.96081214]
------
Step:18, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549  4644.37724943 12683.88705566]
New Q values:  [  870.35122762  -168.92307549 63269.39647024 12683.88705566]
Reward: 100009  Episode Reward:  100052
xxxxx
x  gx
x   x
x  ax
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1210.09738511  748.78287005 1975.26674247    0.        ]
------
Step:1, Action:East
State  181
Old Q Values:  [1214.85032484  456.45589161 3809.24176044  633.38494313]
New Q values:  [1214.85032484  456.45589161 2736.34332305  633.38494313]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4024.15539624 -4582.3674281   1099.96026581]
------
Step:2, Action:South
State  195
Old Q Values:  [  38.85388605 5773.04622226 1849.21327227 1101.59744825]
New Q values:  [   38.85388605 21295.43742998  1849.21327227  1101.59744825]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 63269.39647024 12683.88705566]
------
Step:3, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549 63269.39647024 12683.88705566]
New Q values:  [  870.35122762  -168.92307549 63269.39647024  5816.86384494]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1794.1815172    26.73544252 2459.69674226  -35.88578819]
------
Step:4, Action:East
State  261
Old Q Values:  [1794.1815172    26.73544252 2459.69674226  -35.88578819]
New Q values:  [1794.1815172    26.73544252 2591.41001268  -35.88578819]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1520.02788632  5360.43771927]
------
Step:5, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549 63269.39647024  5816.86384494]
New Q values:  [  870.35122762  -168.92307549 63269.39647024  3103.56854178]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1794.1815172    26.73544252 2591.41001268  -35.88578819]
------
Step:6, Action:East
State  261
Old Q Values:  [1794.1815172    26.73544252 2591.41001268  -35.88578819]
New Q values:  [1794.1815172    26.73544252 2644.09532085  -35.88578819]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1520.02788632  5360.43771927]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1520.02788632  5360.43771927]
New Q values:  [-2527.46239811 -8521.23367799  1520.02788632  2936.80368396]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1794.1815172    26.73544252 2644.09532085  -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [1794.1815172    26.73544252 2644.09532085  -35.88578819]
New Q values:  [1537.97560379   26.73544252 2644.09532085  -35.88578819]
Reward: -1  Episode Reward:  22
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1214.85032484  456.45589161 2736.34332305  633.38494313]
------
Step:9, Action:North
State  181
Old Q Values:  [1214.85032484  456.45589161 2736.34332305  633.38494313]
New Q values:  [1112.18027931  456.45589161 2736.34332305  633.38494313]
Reward: 9  Episode Reward:  31
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2069.46716459  581.22500173    0.        ]
------
Step:10, Action:South
State  103
Old Q Values:  [ 221.30610858 2069.46716459  581.22500173    0.        ]
New Q values:  [ 221.30610858 1648.08986275  581.22500173    0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1112.18027931  456.45589161 2736.34332305  633.38494313]
------
Step:11, Action:North
State  183
Old Q Values:  [1210.09738511  748.78287005 1975.26674247    0.        ]
New Q values:  [ 977.86591287  748.78287005 1975.26674247    0.        ]
Reward: -1  Episode Reward:  29
xxxxx
xa..x
x  .x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1648.08986275  581.22500173    0.        ]
------
Step:12, Action:South
State  102
Old Q Values:  [-180.6        1085.51935617    5.16       -180.6       ]
New Q values:  [-180.6        1284.69780031    5.16       -180.6       ]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
xa .x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  2836.96685949     0.        ]
------
Step:13, Action:East
State  183
Old Q Values:  [ 977.86591287  748.78287005 1975.26674247    0.        ]
New Q values:  [ 977.86591287  748.78287005 5430.91414721    0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:14, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.27400349e+04 1.44725562e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.27400349e+04 2.44711318e+04 2.91043938e+03]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23072.87990602 62255.69768054  1612.00333505  2896.23519192]
------
Step:15, Action:South
State  208
Old Q Values:  [23072.87990602 62255.69768054  1612.00333505  2896.23519192]
New Q values:  [23072.87990602 26307.92464269  1612.00333505  2896.23519192]
Reward: -1  Episode Reward:  35
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1612.79874355 -6442.16912869 -8192.20126966  4687.4852349 ]
------
Step:16, Action:West
State  288
Old Q Values:  [ 1612.79874355 -6442.16912869 -8192.20126966  4687.4852349 ]
New Q values:  [ 1612.79874355 -6442.16912869 -8192.20126966  2755.43519915]
Reward: -1  Episode Reward:  34
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1520.02788632  2936.80368396]
------
Step:17, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  6847.15715551  4316.96081214]
New Q values:  [   16.82637525 -5807.06396197  6847.15715551  2519.41292111]
Reward: -1  Episode Reward:  33
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1537.97560379   26.73544252 2644.09532085  -35.88578819]
------
Step:18, Action:East
State  261
Old Q Values:  [1537.97560379   26.73544252 2644.09532085  -35.88578819]
New Q values:  [1537.97560379   26.73544252 1938.07923353  -35.88578819]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1520.02788632  2936.80368396]
------
Step:19, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1520.02788632  2936.80368396]
New Q values:  [-2527.46239811 -8521.23367799  1520.02788632  1755.54524364]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1537.97560379   26.73544252 1938.07923353  -35.88578819]
------
Step:20, Action:North
State  261
Old Q Values:  [1537.97560379   26.73544252 1938.07923353  -35.88578819]
New Q values:  [1435.49323843   26.73544252 1938.07923353  -35.88578819]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1112.18027931  456.45589161 2736.34332305  633.38494313]
------
Step:21, Action:North
State  181
Old Q Values:  [1112.18027931  456.45589161 2736.34332305  633.38494313]
New Q values:  [1020.46571144  456.45589161 2736.34332305  633.38494313]
Reward: -1  Episode Reward:  29
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         1920.64533238    0.            0.        ]
------
Step:22, Action:South
State  100
Old Q Values:  [ 0.00000000e+00  1.69785211e+03 -8.40000000e-01  0.00000000e+00]
New Q values:  [ 0.00000000e+00  4.01109685e+03 -8.40000000e-01  0.00000000e+00]
Reward: -1  Episode Reward:  28
xxxxx
xg..x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  438.58725668  1371.51970183 11108.52001417 -4966.32149798]
------
Step:23, Action:East
State  181
Old Q Values:  [1020.46571144  456.45589161 2736.34332305  633.38494313]
New Q values:  [1020.46571144  456.45589161 2126.74508849  633.38494313]
Reward: -1  Episode Reward:  27
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  3442.69253091  2474.1699074    239.04887894]
------
Step:24, Action:South
State  193
Old Q Values:  [-5922.26708831  4024.15539624 -4582.3674281   1099.96026581]
New Q values:  [-5922.26708831 20589.88109957 -4582.3674281   1099.96026581]
Reward: -1  Episode Reward:  26
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 63269.39647024  3103.56854178]
------
Step:25, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549 63269.39647024  3103.56854178]
New Q values:  [  870.35122762  -168.92307549 26133.78914784  3103.56854178]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1612.79874355 -6442.16912869 -8192.20126966  2755.43519915]
------
Step:26, Action:West
State  288
Old Q Values:  [ 1612.79874355 -6442.16912869 -8192.20126966  2755.43519915]
New Q values:  [ 1612.79874355 -6442.16912869 -8192.20126966  8941.71082401]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 26133.78914784  3103.56854178]
------
Step:27, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1520.02788632  1755.54524364]
New Q values:  [-2527.46239811 -8521.23367799  1520.02788632  1283.04186752]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1435.49323843   26.73544252 1938.07923353  -35.88578819]
------
Step:28, Action:North
State  261
Old Q Values:  [1435.49323843   26.73544252 1938.07923353  -35.88578819]
New Q values:  [2202.87153954   26.73544252 1938.07923353  -35.88578819]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 977.86591287  748.78287005 5430.91414721    0.        ]
------
Step:29, Action:East
State  183
Old Q Values:  [ 977.86591287  748.78287005 5430.91414721    0.        ]
New Q values:  [ 977.86591287  748.78287005 2986.90662389    0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  2.14423183e+03  1.20371620e+03]
------
Step:30, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.27400349e+04 2.44711318e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.27400349e+04 1.76802301e+04 2.91043938e+03]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23072.87990602 26307.92464269  1612.00333505  2896.23519192]
------
Step:31, Action:South
State  208
Old Q Values:  [23072.87990602 26307.92464269  1612.00333505  2896.23519192]
New Q values:  [23072.87990602 13205.08310428  1612.00333505  2896.23519192]
Reward: -1  Episode Reward:  19
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1612.79874355 -6442.16912869 -8192.20126966  8941.71082401]
------
Step:32, Action:West
State  288
Old Q Values:  [ 1612.79874355 -6442.16912869 -8192.20126966  8941.71082401]
New Q values:  [ 1612.79874355 -6442.16912869 -8192.20126966  4032.0926955 ]
Reward: -1  Episode Reward:  18
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1520.02788632  1283.04186752]
------
Step:33, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1520.02788632  1283.04186752]
New Q values:  [-2527.46239811 -8521.23367799  1817.03896318  1283.04186752]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1612.79874355 -6442.16912869 -8192.20126966  4032.0926955 ]
------
Step:34, Action:North
State  288
Old Q Values:  [ 1612.79874355 -6442.16912869 -8192.20126966  4032.0926955 ]
New Q values:  [ 7566.38346923 -6442.16912869 -8192.20126966  4032.0926955 ]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23072.87990602 13205.08310428  1612.00333505  2896.23519192]
------
Step:35, Action:North
State  210
Old Q Values:  [49043.02288325  1276.27927325  1634.17730316  1655.86006693]
New Q values:  [39759.88206316  1276.27927325  1634.17730316  1655.86006693]
Reward: 9  Episode Reward:  25
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 16843.0423315   -180.00807518 67124.24303285]
------
Step:36, Action:West
State  130
Old Q Values:  [46177.80406237 16843.0423315   -180.00807518 67124.24303285]
New Q values:  [ 46177.80406237  16843.0423315    -180.00807518 107587.90147272]
Reward: 100009  Episode Reward:  100034
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1842.28586852  442.24659409 -120.29354603]
------
Step:1, Action:South
State  109
Old Q Values:  [-241.10880094 1267.88921276   -8.57207238 -180.6       ]
New Q values:  [-241.10880094 1150.57921165   -8.57207238 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1020.46571144  456.45589161 2126.74508849  633.38494313]
------
Step:2, Action:East
State  181
Old Q Values:  [1020.46571144  456.45589161 2126.74508849  633.38494313]
New Q values:  [ 1020.46571144   456.45589161 -4117.09420533   633.38494313]
Reward: -10001  Episode Reward:  -9992
xxxxx
x ..x
x g.x
x...x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.27400349e+04 1.76802301e+04 2.91043938e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [  62.8218634  2336.35962208 1457.52629085 1141.49622464]
New Q values:  [  62.8218634  2336.35962208 2992.05675215 1141.49622464]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1364.68806392  2501.82066656 -8896.20691497  8012.15411939]
------
Step:2, Action:West
State  208
Old Q Values:  [23072.87990602 13205.08310428  1612.00333505  2896.23519192]
New Q values:  [23072.87990602 13205.08310428  1612.00333505  6461.96311046]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.27400349e+04 1.76802301e+04 2.91043938e+03]
------
Step:3, Action:East
State  200
Old Q Values:  [  62.8218634  2336.35962208 2992.05675215 1141.49622464]
New Q values:  [  62.8218634  2336.35962208 3599.86893668 1141.49622464]
Reward: -1  Episode Reward:  7
xxxxx
x g.x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1364.68806392  2501.82066656 -8896.20691497  8012.15411939]
------
Step:4, Action:West
State  208
Old Q Values:  [23072.87990602 13205.08310428  1612.00333505  6461.96311046]
New Q values:  [23072.87990602 13205.08310428  1612.00333505  1888.25427787]
Reward: -10001  Episode Reward:  -9994
xxxxx
x ..x
x.g x
x...x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[39759.88206316  1276.27927325  1634.17730316  1655.86006693]
------
Step:1, Action:North
State  210
Old Q Values:  [39759.88206316  1276.27927325  1634.17730316  1655.86006693]
New Q values:  [16738.59847564  1276.27927325  1634.17730316  1655.86006693]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.76415217e+03 -3.22965309e-01  3.86259110e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.76415217e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01  2.02692484e+03 -3.22965309e-01  3.86259110e+02]
Reward: -10001  Episode Reward:  -9992
xxxxx
x.. x
x..gx
x. .x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1842.28586852  442.24659409 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 1648.08986275  581.22500173    0.        ]
New Q values:  [221.30610858 970.77565853 581.22500173   0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1020.46571144   456.45589161 -4117.09420533   633.38494313]
------
Step:2, Action:North
State  183
Old Q Values:  [ 977.86591287  748.78287005 2986.90662389    0.        ]
New Q values:  [ 681.77906271  748.78287005 2986.90662389    0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xa..x
x  .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[221.30610858 970.77565853 581.22500173   0.        ]
------
Step:3, Action:South
State  103
Old Q Values:  [221.30610858 970.77565853 581.22500173   0.        ]
New Q values:  [221.30610858 693.84997684 581.22500173   0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1020.46571144   456.45589161 -4117.09420533   633.38494313]
------
Step:4, Action:North
State  181
Old Q Values:  [ 1020.46571144   456.45589161 -4117.09420533   633.38494313]
New Q values:  [  752.76004807   456.45589161 -4117.09420533   633.38494313]
Reward: -1  Episode Reward:  6
xxxxx
xa.gx
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094 1150.57921165   -8.57207238 -180.6       ]
------
Step:5, Action:South
State  109
Old Q Values:  [-241.10880094 1150.57921165   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  685.45969908   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  5
xxxxx
x g.x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  752.76004807   456.45589161 -4117.09420533   633.38494313]
------
Step:6, Action:North
State  180
Old Q Values:  [  438.58725668  1371.51970183 11108.52001417 -4966.32149798]
New Q values:  [-5375.11087523  1371.51970183 11108.52001417 -4966.32149798]
Reward: -10001  Episode Reward:  -9996
xxxxx
xg..x
x  .x
x...x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1842.28586852  442.24659409 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [221.30610858 693.84997684 581.22500173   0.        ]
New Q values:  [ 221.30610858 1179.0119779   581.22500173    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 681.77906271  748.78287005 2986.90662389    0.        ]
------
Step:2, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  2836.96685949     0.        ]
New Q values:  [    0.         -5969.29177534  5781.59419402     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:3, Action:East
State  200
Old Q Values:  [  62.8218634  2336.35962208 3599.86893668 1141.49622464]
New Q values:  [  62.8218634  2336.35962208 8367.21154648 1141.49622464]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23072.87990602 13205.08310428  1612.00333505  1888.25427787]
------
Step:4, Action:North
State  216
Old Q Values:  [ 1364.68806392  2501.82066656 -8896.20691497  8012.15411939]
New Q values:  [ 3115.96467343  2501.82066656 -8896.20691497  8012.15411939]
Reward: 9  Episode Reward:  36
xxxxx
xg.ax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  8548.96482619 -2383.80019164 -1068.62482128]
------
Step:5, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.02692484e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01  3.21381617e+03 -3.22965309e-01  3.86259110e+02]
Reward: -1  Episode Reward:  35
xxxxx
x . x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3115.96467343  2501.82066656 -8896.20691497  8012.15411939]
------
Step:6, Action:West
State  216
Old Q Values:  [ 3115.96467343  2501.82066656 -8896.20691497  8012.15411939]
New Q values:  [ 3115.96467343  2501.82066656 -8896.20691497  -285.5748883 ]
Reward: -10001  Episode Reward:  -9966
xxxxx
x . x
x g x
x.. x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2202.87153954   26.73544252 1938.07923353  -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [2202.87153954   26.73544252 1938.07923353  -35.88578819]
New Q values:  [1782.62060298   26.73544252 1938.07923353  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 681.77906271  748.78287005 2986.90662389    0.        ]
------
Step:2, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  5781.59419402     0.        ]
New Q values:  [    0.         -5969.29177534  3127.17864261     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.71713655e+03  2.14423183e+03  1.20371620e+03]
------
Step:3, Action:South
State  194
Old Q Values:  [-6.00000000e-01  2.71713655e+03  2.14423183e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -2.85359823e+03  2.14423183e+03  1.20371620e+03]
Reward: -9991  Episode Reward:  -9983
xxxxx
x...x
x  .x
x g.x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[16738.59847564  1276.27927325  1634.17730316  1655.86006693]
------
Step:1, Action:North
State  208
Old Q Values:  [23072.87990602 13205.08310428  1612.00333505  1888.25427787]
New Q values:  [10198.69681383 13205.08310428  1612.00333505  1888.25427787]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.21381617e+03 -3.22965309e-01  3.86259110e+02]
------
Step:2, Action:West
State  136
Old Q Values:  [ -170.77177351  8548.96482619 -2383.80019164 -1068.62482128]
New Q values:  [ -170.77177351  8548.96482619 -2383.80019164  -352.61910939]
Reward: 9  Episode Reward:  18
xxxxx
x.agx
x.. x
x.. x
xxxxx
Step:3, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  6932.68733156   805.60895034]
New Q values:  [-9594.56523706 -8069.05606225 -1147.71886791   805.60895034]
Reward: -10001  Episode Reward:  -9983
xxxxx
x.g x
x.. x
x.. x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.85359823e+03  2.14423183e+03  1.20371620e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.27400349e+04 1.76802301e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 1.27400349e+04 1.10390170e+04 2.91043938e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10198.69681383 13205.08310428  1612.00333505  1888.25427787]
------
Step:2, Action:South
State  208
Old Q Values:  [10198.69681383 13205.08310428  1612.00333505  1888.25427787]
New Q values:  [10198.69681383  7557.34828248  1612.00333505  1888.25427787]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x.  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7566.38346923 -6442.16912869 -8192.20126966  4032.0926955 ]
------
Step:3, Action:North
State  288
Old Q Values:  [ 7566.38346923 -6442.16912869 -8192.20126966  4032.0926955 ]
New Q values:  [ 6085.56243184 -6442.16912869 -8192.20126966  4032.0926955 ]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x. ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10198.69681383  7557.34828248  1612.00333505  1888.25427787]
------
Step:4, Action:North
State  208
Old Q Values:  [10198.69681383  7557.34828248  1612.00333505  1888.25427787]
New Q values:  [11413.44879819  7557.34828248  1612.00333505  1888.25427787]
Reward: -9991  Episode Reward:  -9974
xxxxx
x..gx
x.  x
x . x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   458.60235688   669.34266395]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   458.60235688   669.34266395]
New Q values:  [ -281.736      -1150.91067548   458.60235688   773.43514419]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1667.66026202  826.09351776 -180.6       ]
------
Step:2, Action:East
State  108
Old Q Values:  [-8463.16477134  1500.18074031   724.20787443     0.        ]
New Q values:  [-8463.16477134  1500.18074031   530.76583487     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xga.x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225 -1147.71886791   805.60895034]
------
Step:3, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225 -1147.71886791   805.60895034]
New Q values:  [-9594.56523706 -8069.05606225  2111.00190069   805.60895034]
Reward: 9  Episode Reward:  17
xxxxx
x gax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  8548.96482619 -2383.80019164  -352.61910939]
------
Step:4, Action:South
State  136
Old Q Values:  [ -170.77177351  8548.96482619 -2383.80019164  -352.61910939]
New Q values:  [ -170.77177351  6849.02056993 -2383.80019164  -352.61910939]
Reward: 9  Episode Reward:  26
xxxxx
x  gx
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11413.44879819  7557.34828248  1612.00333505  1888.25427787]
------
Step:5, Action:South
State  208
Old Q Values:  [11413.44879819  7557.34828248  1612.00333505  1888.25427787]
New Q values:  [11413.44879819  4854.00804254  1612.00333505  1888.25427787]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x..gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6085.56243184 -6442.16912869 -8192.20126966  4032.0926955 ]
------
Step:6, Action:West
State  288
Old Q Values:  [ 6085.56243184 -6442.16912869 -8192.20126966  4032.0926955 ]
New Q values:  [ 6085.56243184 -6442.16912869 -8192.20126966  9458.37382255]
Reward: 9  Episode Reward:  44
xxxxx
x  gx
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 26133.78914784  3103.56854178]
------
Step:7, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549 26133.78914784  3103.56854178]
New Q values:  [  870.35122762  -168.92307549 13290.4278059   3103.56854178]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6085.56243184 -6442.16912869 -8192.20126966  9458.37382255]
------
Step:8, Action:West
State  288
Old Q Values:  [ 6085.56243184 -6442.16912869 -8192.20126966  9458.37382255]
New Q values:  [ 6085.56243184 -6442.16912869 -8192.20126966  7769.87787079]
Reward: -1  Episode Reward:  42
xxxxx
x  gx
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 13290.4278059   3103.56854178]
------
Step:9, Action:East
State  273
Old Q Values:  [  870.35122762  -168.92307549 13290.4278059   3103.56854178]
New Q values:  [ 870.35122762 -168.92307549 7646.5344836  3103.56854178]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6085.56243184 -6442.16912869 -8192.20126966  7769.87787079]
------
Step:10, Action:West
State  288
Old Q Values:  [ 6085.56243184 -6442.16912869 -8192.20126966  7769.87787079]
New Q values:  [ 6085.56243184 -6442.16912869 -8192.20126966  5401.3114934 ]
Reward: -1  Episode Reward:  40
xxxxx
x  gx
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 7646.5344836  3103.56854178]
------
Step:11, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1817.03896318  1283.04186752]
New Q values:  [-2527.46239811 -8521.23367799  2551.88431482  1283.04186752]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6085.56243184 -6442.16912869 -8192.20126966  5401.3114934 ]
------
Step:12, Action:North
State  288
Old Q Values:  [ 6085.56243184 -6442.16912869 -8192.20126966  5401.3114934 ]
New Q values:  [ 5857.65961219 -6442.16912869 -8192.20126966  5401.3114934 ]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11413.44879819  4854.00804254  1612.00333505  1888.25427787]
------
Step:13, Action:North
State  208
Old Q Values:  [11413.44879819  4854.00804254  1612.00333505  1888.25427787]
New Q values:  [5528.9243707  4854.00804254 1612.00333505 1888.25427787]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.21381617e+03 -3.22965309e-01  3.86259110e+02]
------
Step:14, Action:West
State  136
Old Q Values:  [ -170.77177351  6849.02056993 -2383.80019164  -352.61910939]
New Q values:  [ -170.77177351  6849.02056993 -2383.80019164   -72.21682463]
Reward: -1  Episode Reward:  36
xxxxx
x agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7059.48304203   231.43606375]
------
Step:15, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2111.00190069   805.60895034]
New Q values:  [-9594.56523706 -8069.05606225  2111.00190069   869.9469014 ]
Reward: -1  Episode Reward:  35
xxxxx
xag x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6         1827.67773756 -5071.25637662     0.        ]
------
Step:16, Action:South
State  108
Old Q Values:  [-8463.16477134  1500.18074031   530.76583487     0.        ]
New Q values:  [-8463.16477134  3938.02830038   530.76583487     0.        ]
Reward: 9  Episode Reward:  44
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5375.11087523  1371.51970183 11108.52001417 -4966.32149798]
------
Step:17, Action:East
State  176
Old Q Values:  [76485.61294353  1621.55095326 98514.66694343     0.        ]
New Q values:  [ 76485.61294353   1621.55095326 103233.27724906      0.        ]
Reward: 100009  Episode Reward:  100053
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.27400349e+04 1.10390170e+04 2.91043938e+03]
------
Step:1, Action:South
State  196
Old Q Values:  [-2469.90645144  3442.69253091  2474.1699074    239.04887894]
New Q values:  [-2469.90645144  3436.62415902  2474.1699074    239.04887894]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  6847.15715551  2519.41292111]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2551.88431482  1283.04186752]
New Q values:  [-2527.46239811 -8521.23367799  2783.45160959  1283.04186752]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5857.65961219 -6442.16912869 -8192.20126966  5401.3114934 ]
------
Step:3, Action:North
State  288
Old Q Values:  [ 5857.65961219 -6442.16912869 -8192.20126966  5401.3114934 ]
New Q values:  [ 4007.14115609 -6442.16912869 -8192.20126966  5401.3114934 ]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[5528.9243707  4854.00804254 1612.00333505 1888.25427787]
------
Step:4, Action:North
State  208
Old Q Values:  [5528.9243707  4854.00804254 1612.00333505 1888.25427787]
New Q values:  [-1728.32408074  4854.00804254  1612.00333505  1888.25427787]
Reward: -9991  Episode Reward:  -9964
xxxxx
x .gx
x.  x
x.  x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4007.14115609 -6442.16912869 -8192.20126966  5401.3114934 ]
------
Step:1, Action:West
State  288
Old Q Values:  [ 4007.14115609 -6442.16912869 -8192.20126966  5401.3114934 ]
New Q values:  [ 4007.14115609 -6442.16912869 -8192.20126966  3000.96008023]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2783.45160959  1283.04186752]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2783.45160959  1283.04186752]
New Q values:  [-2527.46239811 -8521.23367799  2314.92299066  1283.04186752]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. .x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4007.14115609 -6442.16912869 -8192.20126966  3000.96008023]
------
Step:3, Action:North
State  288
Old Q Values:  [ 4007.14115609 -6442.16912869 -8192.20126966  3000.96008023]
New Q values:  [ 3064.4588752  -6442.16912869 -8192.20126966  3000.96008023]
Reward: 9  Episode Reward:  17
xxxxx
x...x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1728.32408074  4854.00804254  1612.00333505  1888.25427787]
------
Step:4, Action:South
State  210
Old Q Values:  [16738.59847564  1276.27927325  1634.17730316  1655.86006693]
New Q values:  [16738.59847564  1429.24937186  1634.17730316  1655.86006693]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x.  x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3064.4588752  -6442.16912869 -8192.20126966  3000.96008023]
------
Step:5, Action:North
State  288
Old Q Values:  [ 3064.4588752  -6442.16912869 -8192.20126966  3000.96008023]
New Q values:  [ 2681.38596284 -6442.16912869 -8192.20126966  3000.96008023]
Reward: -1  Episode Reward:  15
xxxxx
x...x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1728.32408074  4854.00804254  1612.00333505  1888.25427787]
------
Step:6, Action:South
State  208
Old Q Values:  [-1728.32408074  4854.00804254  1612.00333505  1888.25427787]
New Q values:  [-1728.32408074  2841.29124109  1612.00333505  1888.25427787]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2681.38596284 -6442.16912869 -8192.20126966  3000.96008023]
------
Step:7, Action:West
State  288
Old Q Values:  [ 2681.38596284 -6442.16912869 -8192.20126966  3000.96008023]
New Q values:  [ 2681.38596284 -6442.16912869 -8192.20126966  3493.74437717]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 7646.5344836  3103.56854178]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2314.92299066  1283.04186752]
New Q values:  [-2527.46239811 -8521.23367799  1973.49250942  1283.04186752]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2681.38596284 -6442.16912869 -8192.20126966  3493.74437717]
------
Step:9, Action:West
State  288
Old Q Values:  [ 2681.38596284 -6442.16912869 -8192.20126966  3493.74437717]
New Q values:  [ 2681.38596284 -6442.16912869 -8192.20126966 -4011.05449631]
Reward: -10001  Episode Reward:  -9989
xxxxx
x...x
x.  x
x.g x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1728.32408074  2841.29124109  1612.00333505  1888.25427787]
------
Step:1, Action:South
State  208
Old Q Values:  [-1728.32408074  2841.29124109  1612.00333505  1888.25427787]
New Q values:  [-1728.32408074  1946.33228529  1612.00333505  1888.25427787]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2681.38596284 -6442.16912869 -8192.20126966 -4011.05449631]
------
Step:2, Action:North
State  288
Old Q Values:  [ 2681.38596284 -6442.16912869 -8192.20126966 -4011.05449631]
New Q values:  [ 1655.85407072 -6442.16912869 -8192.20126966 -4011.05449631]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1728.32408074  1946.33228529  1612.00333505  1888.25427787]
------
Step:3, Action:South
State  210
Old Q Values:  [16738.59847564  1429.24937186  1634.17730316  1655.86006693]
New Q values:  [16738.59847564  1067.85596996  1634.17730316  1655.86006693]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
x.. x
x.gax
xxxxx
Step:4, Action:North
State  288
Old Q Values:  [ 1655.85407072 -6442.16912869 -8192.20126966 -4011.05449631]
New Q values:  [ 1245.64131388 -6442.16912869 -8192.20126966 -4011.05449631]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1728.32408074  1946.33228529  1612.00333505  1888.25427787]
------
Step:5, Action:South
State  208
Old Q Values:  [-1728.32408074  1946.33228529  1612.00333505  1888.25427787]
New Q values:  [-1728.32408074  1151.62530828  1612.00333505  1888.25427787]
Reward: -1  Episode Reward:  5
xxxxx
x g.x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1245.64131388 -6442.16912869 -8192.20126966 -4011.05449631]
------
Step:6, Action:North
State  288
Old Q Values:  [ 1245.64131388 -6442.16912869 -8192.20126966 -4011.05449631]
New Q values:  [ 1064.13280891 -6442.16912869 -8192.20126966 -4011.05449631]
Reward: -1  Episode Reward:  4
xxxxx
x .gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1728.32408074  1151.62530828  1612.00333505  1888.25427787]
------
Step:7, Action:West
State  208
Old Q Values:  [-1728.32408074  1151.62530828  1612.00333505  1888.25427787]
New Q values:  [-1728.32408074  1151.62530828  1612.00333505  6937.66604102]
Reward: 9  Episode Reward:  13
xxxxx
x ..x
x.agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 20589.88109957 -4582.3674281   1099.96026581]
------
Step:8, Action:South
State  195
Old Q Values:  [   38.85388605 21295.43742998  1849.21327227  1101.59744825]
New Q values:  [   38.85388605 10817.53531707  1849.21327227  1101.59744825]
Reward: 9  Episode Reward:  22
xxxxx
x ..x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 7646.5344836  3103.56854178]
------
Step:9, Action:West
State  273
Old Q Values:  [ 870.35122762 -168.92307549 7646.5344836  3103.56854178]
New Q values:  [ 870.35122762 -168.92307549 7646.5344836  1828.25118677]
Reward: 9  Episode Reward:  31
xxxxx
x ..x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1782.62060298   26.73544252 1938.07923353  -35.88578819]
------
Step:10, Action:East
State  261
Old Q Values:  [1782.62060298   26.73544252 1938.07923353  -35.88578819]
New Q values:  [1782.62060298   26.73544252 3068.59203849  -35.88578819]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x.  x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 7646.5344836  1828.25118677]
------
Step:11, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1973.49250942  1283.04186752]
New Q values:  [-2527.46239811 -8521.23367799  1973.49250942  1433.19435855]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1782.62060298   26.73544252 3068.59203849  -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [1782.62060298   26.73544252 3068.59203849  -35.88578819]
New Q values:  [1614.52022836   26.73544252 3068.59203849  -35.88578819]
Reward: 9  Episode Reward:  38
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 681.77906271  748.78287005 2986.90662389    0.        ]
------
Step:13, Action:East
State  181
Old Q Values:  [  752.76004807   456.45589161 -4117.09420533   633.38494313]
New Q values:  [ 752.76004807  456.45589161 4529.52664774  633.38494313]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 20589.88109957 -4582.3674281   1099.96026581]
------
Step:14, Action:South
State  193
Old Q Values:  [-5922.26708831 20589.88109957 -4582.3674281   1099.96026581]
New Q values:  [-5922.26708831 10529.31278491 -4582.3674281   1099.96026581]
Reward: -1  Episode Reward:  36
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 7646.5344836  1828.25118677]
------
Step:15, Action:East
State  273
Old Q Values:  [ 870.35122762 -168.92307549 7646.5344836  1828.25118677]
New Q values:  [ 870.35122762 -168.92307549 3377.25363611 1828.25118677]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1064.13280891 -6442.16912869 -8192.20126966 -4011.05449631]
------
Step:16, Action:West
State  288
Old Q Values:  [ 1064.13280891 -6442.16912869 -8192.20126966 -4011.05449631]
New Q values:  [ 1064.13280891 -6442.16912869 -8192.20126966  -591.84570769]
Reward: -1  Episode Reward:  34
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 3377.25363611 1828.25118677]
------
Step:17, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  6847.15715551  2519.41292111]
New Q values:  [   16.82637525 -5807.06396197  3057.50270488  2519.41292111]
Reward: -1  Episode Reward:  33
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1064.13280891 -6442.16912869 -8192.20126966  -591.84570769]
------
Step:18, Action:North
State  288
Old Q Values:  [ 1064.13280891 -6442.16912869 -8192.20126966  -591.84570769]
New Q values:  [ 2506.35293587 -6442.16912869 -8192.20126966  -591.84570769]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1728.32408074  1151.62530828  1612.00333505  6937.66604102]
------
Step:19, Action:West
State  208
Old Q Values:  [-1728.32408074  1151.62530828  1612.00333505  6937.66604102]
New Q values:  [-1728.32408074  1151.62530828  1612.00333505  3805.45366411]
Reward: -1  Episode Reward:  31
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  3436.62415902  2474.1699074    239.04887894]
------
Step:20, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.27400349e+04 1.10390170e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 5.68746172e+03 1.10390170e+04 2.91043938e+03]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1973.49250942  1433.19435855]
------
Step:21, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1973.49250942  1433.19435855]
New Q values:  [-2527.46239811 -8521.23367799  1540.70288453  1433.19435855]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2506.35293587 -6442.16912869 -8192.20126966  -591.84570769]
------
Step:22, Action:North
State  288
Old Q Values:  [ 2506.35293587 -6442.16912869 -8192.20126966  -591.84570769]
New Q values:  [ 2143.57727358 -6442.16912869 -8192.20126966  -591.84570769]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1728.32408074  1151.62530828  1612.00333505  3805.45366411]
------
Step:23, Action:East
State  208
Old Q Values:  [-1728.32408074  1151.62530828  1612.00333505  3805.45366411]
New Q values:  [-1728.32408074  1151.62530828  1605.83743325  3805.45366411]
Reward: -301  Episode Reward:  -273
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1728.32408074  1151.62530828  1605.83743325  3805.45366411]
------
Step:24, Action:West
State  208
Old Q Values:  [-1728.32408074  1151.62530828  1605.83743325  3805.45366411]
New Q values:  [-1728.32408074  1151.62530828  1605.83743325  4833.28655851]
Reward: -1  Episode Reward:  -274
xxxxx
xg..x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.68746172e+03 1.10390170e+04 2.91043938e+03]
------
Step:25, Action:East
State  196
Old Q Values:  [-2469.90645144  3436.62415902  2474.1699074    239.04887894]
New Q values:  [-2469.90645144  3436.62415902  2439.05393051   239.04887894]
Reward: -1  Episode Reward:  -275
xxxxx
x g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1728.32408074  1151.62530828  1605.83743325  4833.28655851]
------
Step:26, Action:West
State  208
Old Q Values:  [-1728.32408074  1151.62530828  1605.83743325  4833.28655851]
New Q values:  [-1728.32408074  1151.62530828  1605.83743325  -755.58028374]
Reward: -10001  Episode Reward:  -10276
xxxxx
x ..x
x g x
x   x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2510.40193244 -2735.46306511  3617.92263321 -6102.86502307]
------
Step:1, Action:East
State  261
Old Q Values:  [1614.52022836   26.73544252 3068.59203849  -35.88578819]
New Q values:  [1614.52022836   26.73544252 2150.08762686  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  3057.50270488  2519.41292111]
------
Step:2, Action:East
State  273
Old Q Values:  [ 870.35122762 -168.92307549 3377.25363611 1828.25118677]
New Q values:  [ 870.35122762 -168.92307549 1999.37463652 1828.25118677]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2143.57727358 -6442.16912869 -8192.20126966  -591.84570769]
------
Step:3, Action:North
State  288
Old Q Values:  [ 2143.57727358 -6442.16912869 -8192.20126966  -591.84570769]
New Q values:  [-4655.41786059 -6442.16912869 -8192.20126966  -591.84570769]
Reward: -9991  Episode Reward:  -9973
xxxxx
x ..x
x..gx
x   x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-1728.32408074  1151.62530828  1605.83743325  -755.58028374]
------
Step:1, Action:East
State  210
Old Q Values:  [16738.59847564  1067.85596996  1634.17730316  1655.86006693]
New Q values:  [16738.59847564  1067.85596996  5494.65046396  1655.86006693]
Reward: -301  Episode Reward:  -301
xxxxx
x...x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[16738.59847564  1067.85596996  5494.65046396  1655.86006693]
------
Step:2, Action:North
State  208
Old Q Values:  [-1728.32408074  1151.62530828  1605.83743325  -755.58028374]
New Q values:  [ 278.21521913 1151.62530828 1605.83743325 -755.58028374]
Reward: 9  Episode Reward:  -292
xxxxx
x..ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.21381617e+03 -3.22965309e-01  3.86259110e+02]
------
Step:3, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.21381617e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01  6.30650601e+03 -3.22965309e-01  3.86259110e+02]
Reward: -1  Episode Reward:  -293
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[16738.59847564  1067.85596996  5494.65046396  1655.86006693]
------
Step:4, Action:North
State  208
Old Q Values:  [ 278.21521913 1151.62530828 1605.83743325 -755.58028374]
New Q values:  [2002.63789103 1151.62530828 1605.83743325 -755.58028374]
Reward: -1  Episode Reward:  -294
xxxxx
x..ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  6.30650601e+03 -3.22965309e-01  3.86259110e+02]
------
Step:5, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  6.30650601e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01  3.45679181e+03 -3.22965309e-01  3.86259110e+02]
Reward: -1  Episode Reward:  -295
xxxxx
x.. x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3115.96467343  2501.82066656 -8896.20691497  -285.5748883 ]
------
Step:6, Action:North
State  210
Old Q Values:  [16738.59847564  1067.85596996  5494.65046396  1655.86006693]
New Q values:  [7731.87693222 1067.85596996 5494.65046396 1655.86006693]
Reward: -1  Episode Reward:  -296
xxxxx
x..ax
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.45679181e+03 -3.22965309e-01  3.86259110e+02]
------
Step:7, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.45679181e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01  3.70167980e+03 -3.22965309e-01  3.86259110e+02]
Reward: -1  Episode Reward:  -297
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7731.87693222 1067.85596996 5494.65046396 1655.86006693]
------
Step:8, Action:North
State  208
Old Q Values:  [2002.63789103 1151.62530828 1605.83743325 -755.58028374]
New Q values:  [1910.95909709 1151.62530828 1605.83743325 -755.58028374]
Reward: -1  Episode Reward:  -298
xxxxx
x..ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.70167980e+03 -3.22965309e-01  3.86259110e+02]
------
Step:9, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.70167980e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01  2.41486132e+03 -3.22965309e-01  3.86259110e+02]
Reward: -1  Episode Reward:  -299
xxxxx
x.. x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3115.96467343  2501.82066656 -8896.20691497  -285.5748883 ]
------
Step:10, Action:North
State  216
Old Q Values:  [ 3115.96467343  2501.82066656 -8896.20691497  -285.5748883 ]
New Q values:  [ 3300.49204035  2501.82066656 -8896.20691497  -285.5748883 ]
Reward: -1  Episode Reward:  -300
xxxxx
xg.ax
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  6849.02056993 -2383.80019164   -72.21682463]
------
Step:11, Action:South
State  136
Old Q Values:  [ -170.77177351  6849.02056993 -2383.80019164   -72.21682463]
New Q values:  [ -170.77177351  3729.15584008 -2383.80019164   -72.21682463]
Reward: -1  Episode Reward:  -301
xxxxx
x.g x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3300.49204035  2501.82066656 -8896.20691497  -285.5748883 ]
------
Step:12, Action:North
State  208
Old Q Values:  [1910.95909709 1151.62530828 1605.83743325 -755.58028374]
New Q values:  [1488.24203572 1151.62530828 1605.83743325 -755.58028374]
Reward: -1  Episode Reward:  -302
xxxxx
x..ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.41486132e+03 -3.22965309e-01  3.86259110e+02]
------
Step:13, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.41486132e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01  1.95549214e+03 -3.22965309e-01  3.86259110e+02]
Reward: -1  Episode Reward:  -303
xxxxx
x.. x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3300.49204035  2501.82066656 -8896.20691497  -285.5748883 ]
------
Step:14, Action:North
State  208
Old Q Values:  [1488.24203572 1151.62530828 1605.83743325 -755.58028374]
New Q values:  [1181.34445667 1151.62530828 1605.83743325 -755.58028374]
Reward: -1  Episode Reward:  -304
xxxxx
x..ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.95549214e+03 -3.22965309e-01  3.86259110e+02]
------
Step:15, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.95549214e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01  1.77174447e+03 -3.22965309e-01  3.86259110e+02]
Reward: -1  Episode Reward:  -305
xxxxx
x.. x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3300.49204035  2501.82066656 -8896.20691497  -285.5748883 ]
------
Step:16, Action:North
State  210
Old Q Values:  [7731.87693222 1067.85596996 5494.65046396 1655.86006693]
New Q values:  [3623.67411347 1067.85596996 5494.65046396 1655.86006693]
Reward: -1  Episode Reward:  -306
xxxxx
x..ax
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.77174447e+03 -3.22965309e-01  3.86259110e+02]
------
Step:17, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.77174447e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01  2.35649293e+03 -3.22965309e-01  3.86259110e+02]
Reward: -1  Episode Reward:  -307
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3623.67411347 1067.85596996 5494.65046396 1655.86006693]
------
Step:18, Action:East
State  210
Old Q Values:  [3623.67411347 1067.85596996 5494.65046396 1655.86006693]
New Q values:  [3623.67411347 1067.85596996 3665.65532477 1655.86006693]
Reward: -301  Episode Reward:  -608
xxxxx
x.. x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3623.67411347 1067.85596996 3665.65532477 1655.86006693]
------
Step:19, Action:East
State  210
Old Q Values:  [3623.67411347 1067.85596996 3665.65532477 1655.86006693]
New Q values:  [3623.67411347 1067.85596996 2385.35872734 1655.86006693]
Reward: -301  Episode Reward:  -909
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3623.67411347 1067.85596996 2385.35872734 1655.86006693]
------
Step:20, Action:North
State  210
Old Q Values:  [3623.67411347 1067.85596996 2385.35872734 1655.86006693]
New Q values:  [2155.81752338 1067.85596996 2385.35872734 1655.86006693]
Reward: -1  Episode Reward:  -910
xxxxx
x..ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.35649293e+03 -3.22965309e-01  3.86259110e+02]
------
Step:21, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.35649293e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01  1.65760479e+03 -3.22965309e-01  3.86259110e+02]
Reward: -1  Episode Reward:  -911
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2155.81752338 1067.85596996 2385.35872734 1655.86006693]
------
Step:22, Action:East
State  210
Old Q Values:  [2155.81752338 1067.85596996 2385.35872734 1655.86006693]
New Q values:  [2155.81752338 1067.85596996 1489.15110914 1655.86006693]
Reward: -301  Episode Reward:  -1212
xxxxx
x.. x
x. ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2155.81752338 1067.85596996 1489.15110914 1655.86006693]
------
Step:23, Action:North
State  210
Old Q Values:  [2155.81752338 1067.85596996 1489.15110914 1655.86006693]
New Q values:  [1359.00844601 1067.85596996 1489.15110914 1655.86006693]
Reward: -1  Episode Reward:  -1213
xxxxx
x..ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.65760479e+03 -3.22965309e-01  3.86259110e+02]
------
Step:24, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.65760479e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01  1.14419315e+03 -3.22965309e-01  3.86259110e+02]
Reward: -1  Episode Reward:  -1214
xxxxx
x.. x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[1181.34445667 1151.62530828 1605.83743325 -755.58028374]
------
Step:25, Action:East
State  210
Old Q Values:  [1359.00844601 1067.85596996 1489.15110914 1655.86006693]
New Q values:  [1359.00844601 1067.85596996  911.81846373 1655.86006693]
Reward: -301  Episode Reward:  -1515
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1359.00844601 1067.85596996  911.81846373 1655.86006693]
------
Step:26, Action:West
State  210
Old Q Values:  [1359.00844601 1067.85596996  911.81846373 1655.86006693]
New Q values:  [1359.00844601 1067.85596996  911.81846373 1305.01357573]
Reward: -1  Episode Reward:  -1516
xxxxx
x.. x
x.a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -2.85359823e+03  2.14423183e+03  1.20371620e+03]
------
Step:27, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -2.85359823e+03  2.14423183e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01 -2.85359823e+03  1.26479527e+03  1.20371620e+03]
Reward: -1  Episode Reward:  -1517
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1359.00844601 1067.85596996  911.81846373 1305.01357573]
------
Step:28, Action:North
State  210
Old Q Values:  [1359.00844601 1067.85596996  911.81846373 1305.01357573]
New Q values:  [ 886.26132206 1067.85596996  911.81846373 1305.01357573]
Reward: -1  Episode Reward:  -1518
xxxxx
x..ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.14419315e+03 -3.22965309e-01  3.86259110e+02]
------
Step:29, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.14419315e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.86259110e+02]
Reward: -10001  Episode Reward:  -11519
xxxxx
x.. x
x. gx
x...x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4655.41786059 -6442.16912869 -8192.20126966  -591.84570769]
------
Step:1, Action:West
State  288
Old Q Values:  [-4655.41786059 -6442.16912869 -8192.20126966  -591.84570769]
New Q values:  [-4655.41786059 -6442.16912869 -8192.20126966   368.47410788]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 1999.37463652 1828.25118677]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1540.70288453  1433.19435855]
New Q values:  [-2527.46239811 -8521.23367799   726.22338618  1433.19435855]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4655.41786059 -6442.16912869 -8192.20126966   368.47410788]
------
Step:3, Action:West
State  288
Old Q Values:  [-4655.41786059 -6442.16912869 -8192.20126966   368.47410788]
New Q values:  [-4655.41786059 -6442.16912869 -8192.20126966   576.74795072]
Reward: -1  Episode Reward:  7
xxxxx
xg..x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   726.22338618  1433.19435855]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   726.22338618  1433.19435855]
New Q values:  [-2527.46239811 -8521.23367799   726.22338618  6224.50420388]
Reward: 9  Episode Reward:  16
xxxxx
x.g.x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[18819.42153485  2256.66526474 16022.11762108  1875.31501677]
------
Step:5, Action:North
State  261
Old Q Values:  [1614.52022836   26.73544252 2150.08762686  -35.88578819]
New Q values:  [2010.06608566   26.73544252 2150.08762686  -35.88578819]
Reward: 9  Episode Reward:  25
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 752.76004807  456.45589161 4529.52664774  633.38494313]
------
Step:6, Action:North
State  180
Old Q Values:  [-5375.11087523  1371.51970183 11108.52001417 -4966.32149798]
New Q values:  [-1759.23501     1371.51970183 11108.52001417 -4966.32149798]
Reward: 9  Episode Reward:  34
xxxxx
xa..x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        1284.69780031    5.16       -180.6       ]
------
Step:7, Action:East
State  100
Old Q Values:  [ 0.00000000e+00  4.01109685e+03 -8.40000000e-01  0.00000000e+00]
New Q values:  [   0.         4011.09684676    5.064         0.        ]
Reward: 9  Episode Reward:  43
xxxxx
xga.x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:NW
[0. 0. 0. 0.]
------
Step:8, Action:North
State  118
Old Q Values:  [ 121.84917267 2496.42792156    0.          503.49427758]
New Q values:  [ 617.06804554 2496.42792156    0.          503.49427758]
Reward: -301  Episode Reward:  -258
xxxxx
x a.x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[ 617.06804554 2496.42792156    0.          503.49427758]
------
Step:9, Action:South
State  116
Old Q Values:  [0. 0. 0. 0.]
New Q values:  [   0.         1036.38724771    0.            0.        ]
Reward: 9  Episode Reward:  -249
xxxxx
xg .x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  3436.62415902  2439.05393051   239.04887894]
------
Step:10, Action:South
State  196
Old Q Values:  [-2469.90645144  3436.62415902  2439.05393051   239.04887894]
New Q values:  [-2469.90645144  2291.30047507  2439.05393051   239.04887894]
Reward: -1  Episode Reward:  -250
xxxxx
x  .x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  3057.50270488  2519.41292111]
------
Step:11, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  3057.50270488  2519.41292111]
New Q values:  [   16.82637525 -5807.06396197  1395.42546717  2519.41292111]
Reward: -1  Episode Reward:  -251
xxxxx
x  .x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4655.41786059 -6442.16912869 -8192.20126966   576.74795072]
------
Step:12, Action:West
State  288
Old Q Values:  [-4655.41786059 -6442.16912869 -8192.20126966   576.74795072]
New Q values:  [-4655.41786059 -6442.16912869 -8192.20126966   985.92305662]
Reward: -1  Episode Reward:  -252
xxxxx
x  .x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1395.42546717  2519.41292111]
------
Step:13, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1395.42546717  2519.41292111]
New Q values:  [   16.82637525 -5807.06396197  1395.42546717  1652.1914565 ]
Reward: -1  Episode Reward:  -253
xxxxx
x  .x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2010.06608566   26.73544252 2150.08762686  -35.88578819]
------
Step:14, Action:East
State  261
Old Q Values:  [2010.06608566   26.73544252 2150.08762686  -35.88578819]
New Q values:  [ 2010.06608566    26.73544252 -4644.9075123    -35.88578819]
Reward: -10001  Episode Reward:  -10254
xxxxx
x  .x
x   x
x g x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1185.71636417   747.08739113]
------
Step:1, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1185.71636417   747.08739113]
New Q values:  [ -253.44886264 -1902.20915811   595.56427871   747.08739113]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.86259110e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.86259110e+02]
New Q values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.78029861e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   595.56427871   747.08739113]
------
Step:3, Action:West
State  121
Old Q Values:  [    0.             0.         -7059.48304203   231.43606375]
New Q values:  [    0.             0.         -7059.48304203   303.61233522]
Reward: 9  Episode Reward:  17
xxxxx
xa gx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  685.45969908   -8.57207238 -180.6       ]
------
Step:4, Action:South
State  109
Old Q Values:  [-241.10880094  685.45969908   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  895.01202392   -8.57207238 -180.6       ]
Reward: 9  Episode Reward:  26
xxxxx
x g x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346 2051.42714763 1558.87588424  154.04646645]
------
Step:5, Action:South
State  188
Old Q Values:  [-6523.78898263  2281.76533402  2014.57249182     0.        ]
New Q values:  [-6523.78898263  2003.48292357  2014.57249182     0.        ]
Reward: 9  Episode Reward:  35
xxxxx
xg  x
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2510.40193244 -2735.46306511  3617.92263321 -6102.86502307]
------
Step:6, Action:East
State  261
Old Q Values:  [ 2010.06608566    26.73544252 -4644.9075123    -35.88578819]
New Q values:  [2010.06608566   26.73544252   14.78825624  -35.88578819]
Reward: 9  Episode Reward:  44
xxxxx
x g x
x . x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   726.22338618  6224.50420388]
------
Step:7, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1395.42546717  1652.1914565 ]
New Q values:  [   16.82637525 -5807.06396197  1395.42546717  1263.2964083 ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2010.06608566   26.73544252   14.78825624  -35.88578819]
------
Step:8, Action:North
State  260
Old Q Values:  [-2510.40193244 -2735.46306511  3617.92263321 -6102.86502307]
New Q values:  [-3672.20476873 -2735.46306511  3617.92263321 -6102.86502307]
Reward: -10001  Episode Reward:  -9958
xxxxx
x   x
xg. x
x  .x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   458.60235688   773.43514419]
------
Step:1, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2111.00190069   869.9469014 ]
New Q values:  [-9594.56523706 -8069.05606225  2111.00190069   621.88236774]
Reward: 9  Episode Reward:  9
xxxxx
xag.x
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  895.01202392   -8.57207238 -180.6       ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 1842.28586852  442.24659409 -120.29354603]
New Q values:  [-177.44732869 2101.17234173  442.24659409 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 752.76004807  456.45589161 4529.52664774  633.38494313]
------
Step:3, Action:North
State  183
Old Q Values:  [ 681.77906271  748.78287005 2986.90662389    0.        ]
New Q values:  [ 902.4633276   748.78287005 2986.90662389    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
xa .x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2101.17234173  442.24659409 -120.29354603]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 2101.17234173  442.24659409 -120.29354603]
New Q values:  [-177.44732869 1735.94092386  442.24659409 -120.29354603]
Reward: -1  Episode Reward:  16
xxxxx
x  .x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   748.78287005 2986.90662389    0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [ 902.4633276   748.78287005 2986.90662389    0.        ]
New Q values:  [ 902.4633276   748.78287005 5835.57009978    0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x  .x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:6, Action:East
State  200
Old Q Values:  [  62.8218634  2336.35962208 8367.21154648 1141.49622464]
New Q values:  [  62.8218634  2336.35962208 4342.4322307  1141.49622464]
Reward: 9  Episode Reward:  24
xxxxx
x  .x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3300.49204035  2501.82066656 -8896.20691497  -285.5748883 ]
------
Step:7, Action:North
State  218
Old Q Values:  [1477.62634943 3567.55356402    0.          852.63459632]
New Q values:  [ 709.85949819 3567.55356402    0.          852.63459632]
Reward: 9  Episode Reward:  33
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.78029861e+02]
------
Step:8, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.78029861e+02]
New Q values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.74738162e+02]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   595.56427871   747.08739113]
------
Step:9, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   595.56427871   747.08739113]
New Q values:  [ -253.44886264 -1902.20915811   595.56427871   819.01723361]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1735.94092386  442.24659409 -120.29354603]
------
Step:10, Action:South
State  111
Old Q Values:  [-177.44732869 1735.94092386  442.24659409 -120.29354603]
New Q values:  [-177.44732869 1309.20451383  442.24659409 -120.29354603]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346 2051.42714763 1558.87588424  154.04646645]
------
Step:11, Action:South
State  188
Old Q Values:  [-6523.78898263  2003.48292357  2014.57249182     0.        ]
New Q values:  [-6523.78898263  1892.16995939  2014.57249182     0.        ]
Reward: 9  Episode Reward:  39
xxxxx
x   x
xg  x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3672.20476873 -2735.46306511  3617.92263321 -6102.86502307]
------
Step:12, Action:East
State  260
Old Q Values:  [-3672.20476873 -2735.46306511  3617.92263321 -6102.86502307]
New Q values:  [-3672.20476873 -2735.46306511  1871.19669344 -6102.86502307]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1395.42546717  1263.2964083 ]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   726.22338618  6224.50420388]
New Q values:  [-2527.46239811 -8521.23367799 60591.66627146  6224.50420388]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2010.06608566   26.73544252   14.78825624  -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [2010.06608566   26.73544252   14.78825624  -35.88578819]
New Q values:  [2168.28442859   26.73544252   14.78825624  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 752.76004807  456.45589161 4529.52664774  633.38494313]
------
Step:2, Action:East
State  183
Old Q Values:  [ 902.4633276   748.78287005 5835.57009978    0.        ]
New Q values:  [ 902.4633276   748.78287005 5584.88863503    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 10817.53531707  1849.21327227  1101.59744825]
------
Step:3, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -2.85359823e+03  1.26479527e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.10414606e+04  1.26479527e+03  1.20371620e+03]
Reward: -9991  Episode Reward:  -9973
xxxxx
x...x
x  .x
x g x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2168.28442859   26.73544252   14.78825624  -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [2168.28442859   26.73544252   14.78825624  -35.88578819]
New Q values:  [2231.57176576   26.73544252   14.78825624  -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 752.76004807  456.45589161 4529.52664774  633.38494313]
------
Step:2, Action:East
State  181
Old Q Values:  [ 752.76004807  456.45589161 4529.52664774  633.38494313]
New Q values:  [ 752.76004807  456.45589161 -871.08424804  633.38494313]
Reward: -9991  Episode Reward:  -9982
xxxxx
x.. x
x g.x
x ..x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  895.01202392   -8.57207238 -180.6       ]
------
Step:1, Action:South
State  108
Old Q Values:  [-8463.16477134  3938.02830038   530.76583487     0.        ]
New Q values:  [-8463.16477134  4913.1673244    530.76583487     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1759.23501     1371.51970183 11108.52001417 -4966.32149798]
------
Step:2, Action:East
State  180
Old Q Values:  [-1759.23501     1371.51970183 11108.52001417 -4966.32149798]
New Q values:  [-1759.23501     1371.51970183  5180.52418482 -4966.32149798]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2291.30047507  2439.05393051   239.04887894]
------
Step:3, Action:East
State  200
Old Q Values:  [  62.8218634  2336.35962208 4342.4322307  1141.49622464]
New Q values:  [  62.8218634  2336.35962208 2732.52050438 1141.49622464]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3300.49204035  2501.82066656 -8896.20691497  -285.5748883 ]
------
Step:4, Action:North
State  218
Old Q Values:  [ 709.85949819 3567.55356402    0.          852.63459632]
New Q values:  [ 401.76524785 3567.55356402    0.          852.63459632]
Reward: 9  Episode Reward:  36
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.74738162e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.74738162e+02]
New Q values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.81325808e+02]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x   x
xg..x
xxxxx
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   458.60235688   773.43514419]
New Q values:  [ -281.736      -1150.91067548   297.23868516   773.43514419]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.81325808e+02]
------
Step:7, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.81325808e+02]
New Q values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.83960866e+02]
Reward: -1  Episode Reward:  33
xxxxx
x a x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   297.23868516   773.43514419]
------
Step:8, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2111.00190069   621.88236774]
New Q values:  [-9594.56523706 -8069.05606225  2111.00190069 -4277.89685558]
Reward: -10001  Episode Reward:  -9968
xxxxx
xg  x
x   x
x...x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 60591.66627146  6224.50420388]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 60591.66627146  6224.50420388]
New Q values:  [-2527.46239811 -8521.23367799 24537.84342557  6224.50420388]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4655.41786059 -6442.16912869 -8192.20126966   985.92305662]
------
Step:2, Action:West
State  288
Old Q Values:  [-4655.41786059 -6442.16912869 -8192.20126966   985.92305662]
New Q values:  [-4655.41786059 -6442.16912869 -8192.20126966  7755.12225032]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x...x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 24537.84342557  6224.50420388]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 24537.84342557  6224.50420388]
New Q values:  [-2527.46239811 -8521.23367799 12141.07404532  6224.50420388]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4655.41786059 -6442.16912869 -8192.20126966  7755.12225032]
------
Step:4, Action:West
State  288
Old Q Values:  [-4655.41786059 -6442.16912869 -8192.20126966  7755.12225032]
New Q values:  [-4655.41786059 -6442.16912869 -8192.20126966  6743.77111372]
Reward: -1  Episode Reward:  6
xxxxx
xg..x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 12141.07404532  6224.50420388]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 12141.07404532  6224.50420388]
New Q values:  [-2527.46239811 -8521.23367799  6878.96095225  6224.50420388]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4655.41786059 -6442.16912869 -8192.20126966  6743.77111372]
------
Step:6, Action:West
State  288
Old Q Values:  [-4655.41786059 -6442.16912869 -8192.20126966  6743.77111372]
New Q values:  [-4655.41786059 -6442.16912869 -8192.20126966  4760.59673116]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
x...x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6878.96095225  6224.50420388]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6878.96095225  6224.50420388]
New Q values:  [-2527.46239811 -8521.23367799  4179.16340025  6224.50420388]
Reward: -1  Episode Reward:  3
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4655.41786059 -6442.16912869 -8192.20126966  4760.59673116]
------
Step:8, Action:West
State  288
Old Q Values:  [-4655.41786059 -6442.16912869 -8192.20126966  4760.59673116]
New Q values:  [-4655.41786059 -6442.16912869 -8192.20126966  3770.98995363]
Reward: -1  Episode Reward:  2
xxxxx
x ..x
x.g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4179.16340025  6224.50420388]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4179.16340025  6224.50420388]
New Q values:  [-2527.46239811 -8521.23367799  4179.16340025  3164.67321128]
Reward: 9  Episode Reward:  11
xxxxx
x g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2231.57176576   26.73544252   14.78825624  -35.88578819]
------
Step:10, Action:North
State  261
Old Q Values:  [2231.57176576   26.73544252   14.78825624  -35.88578819]
New Q values:  [1123.85672072   26.73544252   14.78825624  -35.88578819]
Reward: 9  Episode Reward:  20
xxxxx
x .gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 752.76004807  456.45589161 -871.08424804  633.38494313]
------
Step:11, Action:North
State  181
Old Q Values:  [ 752.76004807  456.45589161 -871.08424804  633.38494313]
New Q values:  [ 654.2076126   456.45589161 -871.08424804  633.38494313]
Reward: -1  Episode Reward:  19
xxxxx
xa..x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1179.0119779   581.22500173    0.        ]
------
Step:12, Action:South
State  109
Old Q Values:  [-241.10880094  895.01202392   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  553.66709335   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  18
xxxxx
x .gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 654.2076126   456.45589161 -871.08424804  633.38494313]
------
Step:13, Action:North
State  181
Old Q Values:  [ 654.2076126   456.45589161 -871.08424804  633.38494313]
New Q values:  [ 427.18317304  456.45589161 -871.08424804  633.38494313]
Reward: -1  Episode Reward:  17
xxxxx
xag.x
x ..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  553.66709335   -8.57207238 -180.6       ]
------
Step:14, Action:South
State  103
Old Q Values:  [ 221.30610858 1179.0119779   581.22500173    0.        ]
New Q values:  [221.30610858 661.0202741  581.22500173   0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 427.18317304  456.45589161 -871.08424804  633.38494313]
------
Step:15, Action:West
State  181
Old Q Values:  [ 427.18317304  456.45589161 -871.08424804  633.38494313]
New Q values:  [ 427.18317304  456.45589161 -871.08424804  262.76946019]
Reward: -301  Episode Reward:  -285
xxxxx
x ..x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 427.18317304  456.45589161 -871.08424804  262.76946019]
------
Step:16, Action:South
State  181
Old Q Values:  [ 427.18317304  456.45589161 -871.08424804  262.76946019]
New Q values:  [ 427.18317304  519.13937286 -871.08424804  262.76946019]
Reward: -1  Episode Reward:  -286
xxxxx
x ..x
x g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1123.85672072   26.73544252   14.78825624  -35.88578819]
------
Step:17, Action:North
State  261
Old Q Values:  [1123.85672072   26.73544252   14.78825624  -35.88578819]
New Q values:  [2124.4092788    26.73544252   14.78825624  -35.88578819]
Reward: -1  Episode Reward:  -287
xxxxx
x ..x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   748.78287005 5584.88863503    0.        ]
------
Step:18, Action:East
State  183
Old Q Values:  [ 902.4633276   748.78287005 5584.88863503    0.        ]
New Q values:  [ 902.4633276   748.78287005 2881.82752172    0.        ]
Reward: 9  Episode Reward:  -278
xxxxx
x ..x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  14.86214194 1136.00704508 2141.57355904 1915.70494401]
------
Step:19, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.10414606e+04  1.26479527e+03  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.10414606e+04  9.02822179e+02  1.20371620e+03]
Reward: 9  Episode Reward:  -269
xxxxx
x ..x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 886.26132206 1067.85596996  911.81846373 1305.01357573]
------
Step:20, Action:West
State  210
Old Q Values:  [ 886.26132206 1067.85596996  911.81846373 1305.01357573]
New Q values:  [ 886.26132206 1067.85596996  911.81846373 3766.66602541]
Reward: -1  Episode Reward:  -270
xxxxx
x ..x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 10817.53531707  1849.21327227  1101.59744825]
------
Step:21, Action:South
State  193
Old Q Values:  [-5922.26708831 10529.31278491 -4582.3674281   1099.96026581]
New Q values:  [-5922.26708831  4810.93750492 -4582.3674281   1099.96026581]
Reward: -1  Episode Reward:  -271
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 1999.37463652 1828.25118677]
------
Step:22, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4179.16340025  3164.67321128]
New Q values:  [-2527.46239811 -8521.23367799  2802.36234619  3164.67321128]
Reward: -1  Episode Reward:  -272
xxxxx
x ..x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4655.41786059 -6442.16912869 -8192.20126966  3770.98995363]
------
Step:23, Action:West
State  288
Old Q Values:  [-4655.41786059 -6442.16912869 -8192.20126966  3770.98995363]
New Q values:  [-4655.41786059 -6442.16912869 -8192.20126966  2107.60837241]
Reward: -1  Episode Reward:  -273
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 1999.37463652 1828.25118677]
------
Step:24, Action:East
State  273
Old Q Values:  [ 870.35122762 -168.92307549 1999.37463652 1828.25118677]
New Q values:  [ 870.35122762 -168.92307549 1431.43236633 1828.25118677]
Reward: -1  Episode Reward:  -274
xxxxx
x .gx
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4655.41786059 -6442.16912869 -8192.20126966  2107.60837241]
------
Step:25, Action:West
State  288
Old Q Values:  [-4655.41786059 -6442.16912869 -8192.20126966  2107.60837241]
New Q values:  [-4655.41786059 -6442.16912869 -8192.20126966  1390.91870499]
Reward: -1  Episode Reward:  -275
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 1431.43236633 1828.25118677]
------
Step:26, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2802.36234619  3164.67321128]
New Q values:  [-2527.46239811 -8521.23367799  2802.36234619  1902.59206815]
Reward: -1  Episode Reward:  -276
xxxxx
x ..x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2124.4092788    26.73544252   14.78825624  -35.88578819]
------
Step:27, Action:North
State  261
Old Q Values:  [2124.4092788    26.73544252   14.78825624  -35.88578819]
New Q values:  [1004.90552338   26.73544252   14.78825624  -35.88578819]
Reward: -1  Episode Reward:  -277
xxxxx
x g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 427.18317304  519.13937286 -871.08424804  262.76946019]
------
Step:28, Action:South
State  181
Old Q Values:  [ 427.18317304  519.13937286 -871.08424804  262.76946019]
New Q values:  [ 427.18317304  508.52740616 -871.08424804  262.76946019]
Reward: -1  Episode Reward:  -278
xxxxx
x .gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1004.90552338   26.73544252   14.78825624  -35.88578819]
------
Step:29, Action:North
State  261
Old Q Values:  [1004.90552338   26.73544252   14.78825624  -35.88578819]
New Q values:  [553.9204312   26.73544252  14.78825624 -35.88578819]
Reward: -1  Episode Reward:  -279
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 427.18317304  508.52740616 -871.08424804  262.76946019]
------
Step:30, Action:South
State  181
Old Q Values:  [ 427.18317304  508.52740616 -871.08424804  262.76946019]
New Q values:  [ 427.18317304  368.98709182 -871.08424804  262.76946019]
Reward: -1  Episode Reward:  -280
xxxxx
x ..x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[553.9204312   26.73544252  14.78825624 -35.88578819]
------
Step:31, Action:North
State  261
Old Q Values:  [553.9204312   26.73544252  14.78825624 -35.88578819]
New Q values:  [1085.516429     26.73544252   14.78825624  -35.88578819]
Reward: -1  Episode Reward:  -281
xxxxx
x ..x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   748.78287005 2881.82752172    0.        ]
------
Step:32, Action:East
State  183
Old Q Values:  [ 902.4633276   748.78287005 2881.82752172    0.        ]
New Q values:  [ 902.4633276   748.78287005 4397.39160381    0.        ]
Reward: -1  Episode Reward:  -282
xxxxx
x ..x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 10817.53531707  1849.21327227  1101.59744825]
------
Step:33, Action:South
State  193
Old Q Values:  [-5922.26708831  4810.93750492 -4582.3674281   1099.96026581]
New Q values:  [-5922.26708831  2472.250358   -4582.3674281   1099.96026581]
Reward: -1  Episode Reward:  -283
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 1431.43236633 1828.25118677]
------
Step:34, Action:West
State  273
Old Q Values:  [ 870.35122762 -168.92307549 1431.43236633 1828.25118677]
New Q values:  [ 870.35122762 -168.92307549 1431.43236633 1056.35540341]
Reward: -1  Episode Reward:  -284
xxxxx
x ..x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1085.516429     26.73544252   14.78825624  -35.88578819]
------
Step:35, Action:North
State  261
Old Q Values:  [1085.516429     26.73544252   14.78825624  -35.88578819]
New Q values:  [561.76152351  26.73544252  14.78825624 -35.88578819]
Reward: -1  Episode Reward:  -285
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 427.18317304  368.98709182 -871.08424804  262.76946019]
------
Step:36, Action:North
State  181
Old Q Values:  [ 427.18317304  368.98709182 -871.08424804  262.76946019]
New Q values:  [ 368.57935145  368.98709182 -871.08424804  262.76946019]
Reward: -1  Episode Reward:  -286
xxxxx
xa..x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[221.30610858 661.0202741  581.22500173   0.        ]
------
Step:37, Action:South
State  103
Old Q Values:  [221.30610858 661.0202741  581.22500173   0.        ]
New Q values:  [221.30610858 374.50423719 581.22500173   0.        ]
Reward: -1  Episode Reward:  -287
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 368.57935145  368.98709182 -871.08424804  262.76946019]
------
Step:38, Action:South
State  181
Old Q Values:  [ 368.57935145  368.98709182 -871.08424804  262.76946019]
New Q values:  [ 368.57935145  315.52329378 -871.08424804  262.76946019]
Reward: -1  Episode Reward:  -288
xxxxx
x .gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[561.76152351  26.73544252  14.78825624 -35.88578819]
------
Step:39, Action:North
State  261
Old Q Values:  [561.76152351  26.73544252  14.78825624 -35.88578819]
New Q values:  [334.67841484  26.73544252  14.78825624 -35.88578819]
Reward: -1  Episode Reward:  -289
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 368.57935145  315.52329378 -871.08424804  262.76946019]
------
Step:40, Action:North
State  181
Old Q Values:  [ 368.57935145  315.52329378 -871.08424804  262.76946019]
New Q values:  [ 723.02534029  315.52329378 -871.08424804  262.76946019]
Reward: -1  Episode Reward:  -290
xxxxx
xa.gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         1920.64533238    0.            0.        ]
------
Step:41, Action:South
State  103
Old Q Values:  [221.30610858 374.50423719 581.22500173   0.        ]
New Q values:  [221.30610858 366.10929696 581.22500173   0.        ]
Reward: -1  Episode Reward:  -291
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 723.02534029  315.52329378 -871.08424804  262.76946019]
------
Step:42, Action:North
State  181
Old Q Values:  [ 723.02534029  315.52329378 -871.08424804  262.76946019]
New Q values:  [ 864.80373583  315.52329378 -871.08424804  262.76946019]
Reward: -1  Episode Reward:  -292
xxxxx
xa.gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         1920.64533238    0.            0.        ]
------
Step:43, Action:South
State  103
Old Q Values:  [221.30610858 366.10929696 581.22500173   0.        ]
New Q values:  [221.30610858 405.28483953 581.22500173   0.        ]
Reward: -1  Episode Reward:  -293
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 864.80373583  315.52329378 -871.08424804  262.76946019]
------
Step:44, Action:North
State  181
Old Q Values:  [ 864.80373583  315.52329378 -871.08424804  262.76946019]
New Q values:  [ 921.51509405  315.52329378 -871.08424804  262.76946019]
Reward: -1  Episode Reward:  -294
xxxxx
xa.gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         1920.64533238    0.            0.        ]
------
Step:45, Action:South
State  103
Old Q Values:  [221.30610858 405.28483953 581.22500173   0.        ]
New Q values:  [221.30610858 437.96846403 581.22500173   0.        ]
Reward: -1  Episode Reward:  -295
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 921.51509405  315.52329378 -871.08424804  262.76946019]
------
Step:46, Action:North
State  181
Old Q Values:  [ 921.51509405  315.52329378 -871.08424804  262.76946019]
New Q values:  [ 944.19963733  315.52329378 -871.08424804  262.76946019]
Reward: -1  Episode Reward:  -296
xxxxx
xa.gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         1920.64533238    0.            0.        ]
------
Step:47, Action:South
State  101
Old Q Values:  [   0.         1920.64533238    0.            0.        ]
New Q values:  [   0.         1050.91802415    0.            0.        ]
Reward: -1  Episode Reward:  -297
xxxxx
x g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 944.19963733  315.52329378 -871.08424804  262.76946019]
------
Step:48, Action:North
State  180
Old Q Values:  [-1759.23501     1371.51970183  5180.52418482 -4966.32149798]
New Q values:  [-5500.96494997  1371.51970183  5180.52418482 -4966.32149798]
Reward: -10001  Episode Reward:  -10298
xxxxx
xg..x
x   x
x   x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-4655.41786059 -6442.16912869 -8192.20126966  1390.91870499]
------
Step:1, Action:North
State  288
Old Q Values:  [-4655.41786059 -6442.16912869 -8192.20126966  1390.91870499]
New Q values:  [ -726.76733661 -6442.16912869 -8192.20126966  1390.91870499]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 886.26132206 1067.85596996  911.81846373 3766.66602541]
------
Step:2, Action:West
State  208
Old Q Values:  [1181.34445667 1151.62530828 1605.83743325 -755.58028374]
New Q values:  [1181.34445667 1151.62530828 1605.83743325 3014.87297937]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.68746172e+03 1.10390170e+04 2.91043938e+03]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 5.68746172e+03 1.10390170e+04 2.91043938e+03]
New Q values:  [3.89777037e-01 5.68746172e+03 5.31946868e+03 2.91043938e+03]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[1181.34445667 1151.62530828 1605.83743325 3014.87297937]
------
Step:4, Action:East
State  208
Old Q Values:  [1181.34445667 1151.62530828 1605.83743325 3014.87297937]
New Q values:  [ 1181.34445667  1151.62530828 -4633.80313289  3014.87297937]
Reward: -10301  Episode Reward:  -10284
xxxxx
x...x
x. gx
x.  x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   297.23868516   773.43514419]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516   773.43514419]
New Q values:  [ -281.736      -1150.91067548   297.23868516   815.07213628]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1667.66026202  826.09351776 -180.6       ]
------
Step:2, Action:East
State  111
Old Q Values:  [-177.44732869 1309.20451383  442.24659409 -120.29354603]
New Q values:  [-177.44732869 1309.20451383  420.82027852 -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   297.23868516   815.07213628]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516   815.07213628]
New Q values:  [ -281.736      -1150.91067548   297.23868516   825.72693312]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1667.66026202  826.09351776 -180.6       ]
------
Step:4, Action:East
State  108
Old Q Values:  [-8463.16477134  4913.1673244    530.76583487     0.        ]
New Q values:  [-8463.16477134  4913.1673244    845.00690416     0.        ]
Reward: -1  Episode Reward:  6
xxxxx
xga.x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2111.00190069 -4277.89685558]
------
Step:5, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2111.00190069 -4277.89685558]
New Q values:  [-9594.56523706 -8069.05606225  1968.5475123  -4277.89685558]
Reward: 9  Episode Reward:  15
xxxxx
x gax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3729.15584008 -2383.80019164   -72.21682463]
------
Step:6, Action:South
State  136
Old Q Values:  [ -170.77177351  3729.15584008 -2383.80019164   -72.21682463]
New Q values:  [ -170.77177351  2401.52422984 -2383.80019164   -72.21682463]
Reward: 9  Episode Reward:  24
xxxxx
x  gx
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1181.34445667  1151.62530828 -4633.80313289  3014.87297937]
------
Step:7, Action:West
State  216
Old Q Values:  [ 3300.49204035  2501.82066656 -8896.20691497  -285.5748883 ]
New Q values:  [ 3300.49204035  2501.82066656 -8896.20691497  1597.4085592 ]
Reward: 9  Episode Reward:  33
xxxxx
x g x
x.a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.68746172e+03 5.31946868e+03 2.91043938e+03]
------
Step:8, Action:South
State  192
Old Q Values:  [3.89777037e-01 5.68746172e+03 5.31946868e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 3.12109339e+03 5.31946868e+03 2.91043938e+03]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x.g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2802.36234619  1902.59206815]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2802.36234619  1902.59206815]
New Q values:  [-2527.46239811 -8521.23367799  1543.62054997  1902.59206815]
Reward: 9  Episode Reward:  51
xxxxx
x g x
x.  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -726.76733661 -6442.16912869 -8192.20126966  1390.91870499]
------
Step:10, Action:West
State  288
Old Q Values:  [ -726.76733661 -6442.16912869 -8192.20126966  1390.91870499]
New Q values:  [ -726.76733661 -6442.16912869 -8192.20126966  1126.54510244]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1543.62054997  1902.59206815]
------
Step:11, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1543.62054997  1902.59206815]
New Q values:  [-2527.46239811 -8521.23367799  1543.62054997 10914.00866725]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[33845.23946664 10195.91087381  5576.40109469   644.94785455]
------
Step:12, Action:South
State  257
Old Q Values:  [18819.42153485  2256.66526474 16022.11762108  1875.31501677]
New Q values:  [18819.42153485  6367.89256635 16022.11762108  1875.31501677]
Reward: -301  Episode Reward:  -252
xxxxx
x   x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[18819.42153485  6367.89256635 16022.11762108  1875.31501677]
------
Step:13, Action:North
State  257
Old Q Values:  [18819.42153485  6367.89256635 16022.11762108  1875.31501677]
New Q values:  [91751.11446922  6367.89256635 16022.11762108  1875.31501677]
Reward: 100009  Episode Reward:  99757
xxxxx
x g x
xa  x
x   x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -726.76733661 -6442.16912869 -8192.20126966  1126.54510244]
------
Step:1, Action:West
State  288
Old Q Values:  [ -726.76733661 -6442.16912869 -8192.20126966  1126.54510244]
New Q values:  [ -726.76733661 -6442.16912869 -8192.20126966  3730.22064115]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1543.62054997 10914.00866725]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1543.62054997 10914.00866725]
New Q values:  [-2527.46239811 -8521.23367799  1543.62054997  4932.36247493]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3672.20476873 -2735.46306511  1871.19669344 -6102.86502307]
------
Step:3, Action:East
State  260
Old Q Values:  [-3672.20476873 -2735.46306511  1871.19669344 -6102.86502307]
New Q values:  [-3672.20476873 -2735.46306511  2227.58741985 -6102.86502307]
Reward: -1  Episode Reward:  17
xxxxx
xg .x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1543.62054997  4932.36247493]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1543.62054997  4932.36247493]
New Q values:  [-2527.46239811 -8521.23367799  1543.62054997  2640.62121593]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
xg..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3672.20476873 -2735.46306511  2227.58741985 -6102.86502307]
------
Step:5, Action:East
State  260
Old Q Values:  [-3672.20476873 -2735.46306511  2227.58741985 -6102.86502307]
New Q values:  [-3672.20476873 -2735.46306511  1682.62133272 -6102.86502307]
Reward: -1  Episode Reward:  15
xxxxx
xg .x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1543.62054997  2640.62121593]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1543.62054997  2640.62121593]
New Q values:  [-2527.46239811 -8521.23367799  1543.62054997  1560.43488619]
Reward: -1  Episode Reward:  14
xxxxx
xg .x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3672.20476873 -2735.46306511  1682.62133272 -6102.86502307]
------
Step:7, Action:East
State  261
Old Q Values:  [334.67841484  26.73544252  14.78825624 -35.88578819]
New Q values:  [334.67841484  26.73544252 473.44576835 -35.88578819]
Reward: -1  Episode Reward:  13
xxxxx
x.g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1543.62054997  1560.43488619]
------
Step:8, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1543.62054997  1560.43488619]
New Q values:  [-2527.46239811 -8521.23367799  1543.62054997  1128.36035429]
Reward: -1  Episode Reward:  12
xxxxx
xg .x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3672.20476873 -2735.46306511  1682.62133272 -6102.86502307]
------
Step:9, Action:East
State  261
Old Q Values:  [334.67841484  26.73544252 473.44576835 -35.88578819]
New Q values:  [334.67841484  26.73544252 651.86447233 -35.88578819]
Reward: -1  Episode Reward:  11
xxxxx
x.g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1543.62054997  1128.36035429]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1543.62054997  1128.36035429]
New Q values:  [-2527.46239811 -8521.23367799  1735.91441234  1128.36035429]
Reward: -1  Episode Reward:  10
xxxxx
xg .x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -726.76733661 -6442.16912869 -8192.20126966  3730.22064115]
------
Step:11, Action:West
State  288
Old Q Values:  [ -726.76733661 -6442.16912869 -8192.20126966  3730.22064115]
New Q values:  [ -726.76733661 -6442.16912869 -8192.20126966  2012.26258016]
Reward: -1  Episode Reward:  9
xxxxx
x.g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1735.91441234  1128.36035429]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1735.91441234  1128.36035429]
New Q values:  [-2527.46239811 -8521.23367799  1297.44453898  1128.36035429]
Reward: -1  Episode Reward:  8
xxxxx
xg .x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -726.76733661 -6442.16912869 -8192.20126966  2012.26258016]
------
Step:13, Action:West
State  288
Old Q Values:  [ -726.76733661 -6442.16912869 -8192.20126966  2012.26258016]
New Q values:  [ -726.76733661 -6442.16912869 -8192.20126966  1193.53839376]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1297.44453898  1128.36035429]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1297.44453898  1128.36035429]
New Q values:  [-2527.46239811 -8521.23367799   876.43933372  1128.36035429]
Reward: -1  Episode Reward:  6
xxxxx
x. .x
x.g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -726.76733661 -6442.16912869 -8192.20126966  1193.53839376]
------
Step:15, Action:West
State  288
Old Q Values:  [ -726.76733661 -6442.16912869 -8192.20126966  1193.53839376]
New Q values:  [ -726.76733661 -6442.16912869 -8192.20126966   815.32346379]
Reward: -1  Episode Reward:  5
xxxxx
x. .x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   876.43933372  1128.36035429]
------
Step:16, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   876.43933372  1128.36035429]
New Q values:  [-2527.46239811 -8521.23367799   876.43933372 27976.07848248]
Reward: -1  Episode Reward:  4
xxxxx
x. .x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[91751.11446922  6367.89256635 16022.11762108  1875.31501677]
------
Step:17, Action:North
State  261
Old Q Values:  [334.67841484  26.73544252 651.86447233 -35.88578819]
New Q values:  [1458.48884708   26.73544252  651.86447233  -35.88578819]
Reward: 9  Episode Reward:  13
xxxxx
x. .x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   748.78287005 4397.39160381    0.        ]
------
Step:18, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  3127.17864261     0.        ]
New Q values:  [    0.         -5969.29177534  5897.67890727     0.        ]
Reward: 9  Episode Reward:  22
xxxxx
x. .x
x a.x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:19, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.12109339e+03 5.31946868e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 3.12109339e+03 3.03764937e+03 2.91043938e+03]
Reward: 9  Episode Reward:  31
xxxxx
x. .x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1181.34445667  1151.62530828 -4633.80313289  3014.87297937]
------
Step:20, Action:West
State  208
Old Q Values:  [ 1181.34445667  1151.62530828 -4633.80313289  3014.87297937]
New Q values:  [ 1181.34445667  1151.62530828 -4633.80313289 -3858.32279129]
Reward: -10001  Episode Reward:  -9970
xxxxx
x. .x
x g x
x   x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2472.250358   -4582.3674281   1099.96026581]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831  2472.250358   -4582.3674281   1099.96026581]
New Q values:  [-5922.26708831  1423.7298531  -4582.3674281   1099.96026581]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x. gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 870.35122762 -168.92307549 1431.43236633 1056.35540341]
------
Step:2, Action:East
State  273
Old Q Values:  [ 870.35122762 -168.92307549 1431.43236633 1056.35540341]
New Q values:  [  870.35122762  -168.92307549 -5177.43001433  1056.35540341]
Reward: -9991  Episode Reward:  -9982
xxxxx
x.. x
x. .x
x. gx
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -726.76733661 -6442.16912869 -8192.20126966   815.32346379]
------
Step:1, Action:North
State  288
Old Q Values:  [ -726.76733661 -6442.16912869 -8192.20126966   815.32346379]
New Q values:  [  844.69287298 -6442.16912869 -8192.20126966   815.32346379]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 886.26132206 1067.85596996  911.81846373 3766.66602541]
------
Step:2, Action:West
State  210
Old Q Values:  [ 886.26132206 1067.85596996  911.81846373 3766.66602541]
New Q values:  [ 886.26132206 1067.85596996  911.81846373 4824.50458659]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.10414606e+04  9.02822179e+02  1.20371620e+03]
------
Step:3, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.10414606e+04  9.02822179e+02  1.20371620e+03]
New Q values:  [-6.00000000e-01  1.10414606e+04  9.02822179e+02  2.25619015e+03]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  5897.67890727     0.        ]
------
Step:4, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  5897.67890727     0.        ]
New Q values:  [    0.         -5969.29177534  5670.90973934     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.10414606e+04  9.02822179e+02  2.25619015e+03]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.12109339e+03 3.03764937e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 9.64066090e+03 3.03764937e+03 2.91043938e+03]
Reward: -1  Episode Reward:  25
xxxxx
x...x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   876.43933372 27976.07848248]
------
Step:6, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   876.43933372 27976.07848248]
New Q values:  [-2527.46239811 -8521.23367799   876.43933372  5700.61779281]
Reward: -9991  Episode Reward:  -9966
xxxxx
x...x
x   x
xg  x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 -5177.43001433  1056.35540341]
------
Step:1, Action:West
State  273
Old Q Values:  [  870.35122762  -168.92307549 -5177.43001433  1056.35540341]
New Q values:  [  870.35122762  -168.92307549 -5177.43001433   865.48881549]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1458.48884708   26.73544252  651.86447233  -35.88578819]
------
Step:2, Action:North
State  261
Old Q Values:  [1458.48884708   26.73544252  651.86447233  -35.88578819]
New Q values:  [872.05543003  26.73544252 651.86447233 -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
xa. x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 944.19963733  315.52329378 -871.08424804  262.76946019]
------
Step:3, Action:North
State  180
Old Q Values:  [-5500.96494997  1371.51970183  5180.52418482 -4966.32149798]
New Q values:  [-6991.65692596  1371.51970183  5180.52418482 -4966.32149798]
Reward: -9991  Episode Reward:  -9973
xxxxx
xg..x
x . x
x  .x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.83960866e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.83960866e+02]
New Q values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.06702427e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   297.23868516   825.72693312]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   595.56427871   819.01723361]
New Q values:  [ -253.44886264 -1902.20915811   595.56427871   725.76824759]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1309.20451383  420.82027852 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 1309.20451383  420.82027852 -120.29354603]
New Q values:  [-177.44732869 1138.50994982  420.82027852 -120.29354603]
Reward: -1  Episode Reward:  17
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346 2051.42714763 1558.87588424  154.04646645]
------
Step:4, Action:South
State  180
Old Q Values:  [-6991.65692596  1371.51970183  5180.52418482 -4966.32149798]
New Q values:  [-6991.65692596  1058.79428055  5180.52418482 -4966.32149798]
Reward: 9  Episode Reward:  26
xxxxx
x   x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3672.20476873 -2735.46306511  1682.62133272 -6102.86502307]
------
Step:5, Action:East
State  261
Old Q Values:  [872.05543003  26.73544252 651.86447233 -35.88578819]
New Q values:  [872.05543003  26.73544252 684.77342908 -35.88578819]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1395.42546717  1263.2964083 ]
------
Step:6, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   876.43933372  5700.61779281]
New Q values:  [-2527.46239811 -8521.23367799   609.38359538  5700.61779281]
Reward: 9  Episode Reward:  44
xxxxx
x   x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  844.69287298 -6442.16912869 -8192.20126966   815.32346379]
------
Step:7, Action:North
State  288
Old Q Values:  [  844.69287298 -6442.16912869 -8192.20126966   815.32346379]
New Q values:  [ 1790.62852517 -6442.16912869 -8192.20126966   815.32346379]
Reward: 9  Episode Reward:  53
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 886.26132206 1067.85596996  911.81846373 4824.50458659]
------
Step:8, Action:West
State  208
Old Q Values:  [ 1181.34445667  1151.62530828 -4633.80313289 -3858.32279129]
New Q values:  [ 1181.34445667  1151.62530828 -4633.80313289 61354.26915369]
Reward: 100009  Episode Reward:  100062
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1138.50994982  420.82027852 -120.29354603]
------
Step:1, Action:South
State  110
Old Q Values:  [-239.29051573 1667.66026202  826.09351776 -180.6       ]
New Q values:  [-239.29051573 2373.73702661  826.09351776 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  5670.90973934     0.        ]
------
Step:2, Action:East
State  180
Old Q Values:  [-6991.65692596  1058.79428055  5180.52418482 -4966.32149798]
New Q values:  [-6991.65692596  1058.79428055  2809.32585308 -4966.32149798]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
xga.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2291.30047507  2439.05393051   239.04887894]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.10414606e+04  9.02822179e+02  2.25619015e+03]
New Q values:  [-6.00000000e-01  1.10414606e+04  1.81388025e+03  2.25619015e+03]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x  ax
xg .x
xxxxx
Step:4, Action:East
State  208
Old Q Values:  [ 1181.34445667  1151.62530828 -4633.80313289 61354.26915369]
New Q values:  [ 1181.34445667  1151.62530828 16372.15949295 61354.26915369]
Reward: -301  Episode Reward:  -274
xxxxx
x ..x
xg ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1181.34445667  1151.62530828 16372.15949295 61354.26915369]
------
Step:5, Action:West
State  208
Old Q Values:  [ 1181.34445667  1151.62530828 16372.15949295 61354.26915369]
New Q values:  [ 1181.34445667  1151.62530828 16372.15949295 27433.30593169]
Reward: -1  Episode Reward:  -275
xxxxx
xg..x
x a x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.64066090e+03 3.03764937e+03 2.91043938e+03]
------
Step:6, Action:South
State  192
Old Q Values:  [3.89777037e-01 9.64066090e+03 3.03764937e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 5.56584970e+03 3.03764937e+03 2.91043938e+03]
Reward: -1  Episode Reward:  -276
xxxxx
x ..x
xg  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   609.38359538  5700.61779281]
------
Step:7, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1395.42546717  1263.2964083 ]
New Q values:  [   16.82637525 -5807.06396197  1395.42546717   772.33519233]
Reward: 9  Episode Reward:  -267
xxxxx
x ..x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[872.05543003  26.73544252 684.77342908 -35.88578819]
------
Step:8, Action:North
State  260
Old Q Values:  [-3672.20476873 -2735.46306511  1682.62133272 -6102.86502307]
New Q values:  [-6626.68415157 -2735.46306511  1682.62133272 -6102.86502307]
Reward: -10001  Episode Reward:  -10268
xxxxx
x ..x
xg  x
x  .x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  553.66709335   -8.57207238 -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869 1138.50994982  420.82027852 -120.29354603]
New Q values:  [-177.44732869  744.06387113  420.82027852 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 944.19963733  315.52329378 -871.08424804  262.76946019]
------
Step:2, Action:North
State  181
Old Q Values:  [ 944.19963733  315.52329378 -871.08424804  262.76946019]
New Q values:  [ 543.17998294  315.52329378 -871.08424804  262.76946019]
Reward: -1  Episode Reward:  8
xxxxx
xa.gx
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  553.66709335   -8.57207238 -180.6       ]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869  744.06387113  420.82027852 -120.29354603]
New Q values:  [-177.44732869  459.97954333  420.82027852 -120.29354603]
Reward: -1  Episode Reward:  7
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 543.17998294  315.52329378 -871.08424804  262.76946019]
------
Step:4, Action:North
State  181
Old Q Values:  [ 543.17998294  315.52329378 -871.08424804  262.76946019]
New Q values:  [ 382.77212118  315.52329378 -871.08424804  262.76946019]
Reward: -1  Episode Reward:  6
xxxxx
xa.gx
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[-241.10880094  553.66709335   -8.57207238 -180.6       ]
------
Step:5, Action:South
State  109
Old Q Values:  [-241.10880094  553.66709335   -8.57207238 -180.6       ]
New Q values:  [-241.10880094  335.69847369   -8.57207238 -180.6       ]
Reward: -1  Episode Reward:  5
xxxxx
x g x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 382.77212118  315.52329378 -871.08424804  262.76946019]
------
Step:6, Action:North
State  181
Old Q Values:  [ 382.77212118  315.52329378 -871.08424804  262.76946019]
New Q values:  [ 290.50271147  315.52329378 -871.08424804  262.76946019]
Reward: -1  Episode Reward:  4
xxxxx
xa. x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  459.97954333  420.82027852 -120.29354603]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869  459.97954333  420.82027852 -120.29354603]
New Q values:  [-177.44732869 1502.60929848  420.82027852 -120.29354603]
Reward: -1  Episode Reward:  3
xxxxx
x . x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   748.78287005 4397.39160381    0.        ]
------
Step:8, Action:East
State  183
Old Q Values:  [ 902.4633276   748.78287005 4397.39160381    0.        ]
New Q values:  [ 902.4633276   748.78287005 5438.65063945    0.        ]
Reward: 9  Episode Reward:  12
xxxxx
x . x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 1.22476467e+04 2.81953637e+03 0.00000000e+00]
------
Step:9, Action:South
State  194
Old Q Values:  [-6.00000000e-01  1.10414606e+04  1.81388025e+03  2.25619015e+03]
New Q values:  [-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.25619015e+03]
Reward: -9991  Episode Reward:  -9979
xxxxx
x . x
x  .x
x.g.x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.25619015e+03]
------
Step:1, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.25619015e+03]
New Q values:  [-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.60914898e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa .x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  5670.90973934     0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 902.4633276   748.78287005 5438.65063945    0.        ]
New Q values:  [ 902.4633276   748.78287005 2957.60495036    0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.60914898e+03]
------
Step:3, Action:West
State  195
Old Q Values:  [   38.85388605 10817.53531707  1849.21327227  1101.59744825]
New Q values:  [   38.85388605 10817.53531707  1849.21327227  1327.32046441]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xa .x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   748.78287005 2957.60495036    0.        ]
------
Step:4, Action:East
State  181
Old Q Values:  [ 290.50271147  315.52329378 -871.08424804  262.76946019]
New Q values:  [290.50271147 315.52329378  78.08525671 262.76946019]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1423.7298531  -4582.3674281   1099.96026581]
------
Step:5, Action:South
State  193
Old Q Values:  [-5922.26708831  1423.7298531  -4582.3674281   1099.96026581]
New Q values:  [-5922.26708831   835.99730953 -4582.3674281   1099.96026581]
Reward: 9  Episode Reward:  15
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  870.35122762  -168.92307549 -5177.43001433   865.48881549]
------
Step:6, Action:North
State  273
Old Q Values:  [  870.35122762  -168.92307549 -5177.43001433   865.48881549]
New Q values:  [  677.52857079  -168.92307549 -5177.43001433   865.48881549]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   835.99730953 -4582.3674281   1099.96026581]
------
Step:7, Action:West
State  193
Old Q Values:  [-5922.26708831   835.99730953 -4582.3674281   1099.96026581]
New Q values:  [-5922.26708831   835.99730953 -4582.3674281    534.04109446]
Reward: -1  Episode Reward:  13
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 315.52329378  78.08525671 262.76946019]
------
Step:8, Action:South
State  181
Old Q Values:  [290.50271147 315.52329378  78.08525671 262.76946019]
New Q values:  [290.50271147 387.22594652  78.08525671 262.76946019]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[872.05543003  26.73544252 684.77342908 -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [872.05543003  26.73544252 684.77342908 -35.88578819]
New Q values:  [1235.50365712   26.73544252  684.77342908  -35.88578819]
Reward: -1  Episode Reward:  11
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   748.78287005 2957.60495036    0.        ]
------
Step:10, Action:East
State  181
Old Q Values:  [290.50271147 387.22594652  78.08525671 262.76946019]
New Q values:  [290.50271147 387.22594652 281.43329554 262.76946019]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831   835.99730953 -4582.3674281    534.04109446]
------
Step:11, Action:South
State  193
Old Q Values:  [-5922.26708831   835.99730953 -4582.3674281    534.04109446]
New Q values:  [-5922.26708831   593.44556846 -4582.3674281    534.04109446]
Reward: -1  Episode Reward:  9
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  677.52857079  -168.92307549 -5177.43001433   865.48881549]
------
Step:12, Action:West
State  273
Old Q Values:  [  677.52857079  -168.92307549 -5177.43001433   865.48881549]
New Q values:  [  677.52857079  -168.92307549 -5177.43001433   716.24662333]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1235.50365712   26.73544252  684.77342908  -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [1235.50365712   26.73544252  684.77342908  -35.88578819]
New Q values:  [609.76924681  26.73544252 684.77342908 -35.88578819]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 387.22594652 281.43329554 262.76946019]
------
Step:14, Action:South
State  180
Old Q Values:  [-6991.65692596  1058.79428055  2809.32585308 -4966.32149798]
New Q values:  [-6991.65692596   927.70411204  2809.32585308 -4966.32149798]
Reward: -1  Episode Reward:  6
xxxxx
xg..x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-6626.68415157 -2735.46306511  1682.62133272 -6102.86502307]
------
Step:15, Action:East
State  260
Old Q Values:  [-6626.68415157 -2735.46306511  1682.62133272 -6102.86502307]
New Q values:  [-6626.68415157 -2735.46306511  1091.07617324 -6102.86502307]
Reward: -1  Episode Reward:  5
xxxxx
xg..x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1395.42546717   772.33519233]
------
Step:16, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1395.42546717   772.33519233]
New Q values:  [   16.82637525 -5807.06396197  1100.75874442   772.33519233]
Reward: 9  Episode Reward:  14
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1790.62852517 -6442.16912869 -8192.20126966   815.32346379]
------
Step:17, Action:North
State  288
Old Q Values:  [ 1790.62852517 -6442.16912869 -8192.20126966   815.32346379]
New Q values:  [ 8951.64318957 -6442.16912869 -8192.20126966   815.32346379]
Reward: 9  Episode Reward:  23
xxxxx
x..gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1181.34445667  1151.62530828 16372.15949295 27433.30593169]
------
Step:18, Action:West
State  208
Old Q Values:  [ 1181.34445667  1151.62530828 16372.15949295 27433.30593169]
New Q values:  [ 1181.34445667  1151.62530828 16372.15949295 12642.47728211]
Reward: -1  Episode Reward:  22
xxxxx
x.g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.56584970e+03 3.03764937e+03 2.91043938e+03]
------
Step:19, Action:South
State  192
Old Q Values:  [3.89777037e-01 5.56584970e+03 3.03764937e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 3.93592522e+03 3.03764937e+03 2.91043938e+03]
Reward: -1  Episode Reward:  21
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   609.38359538  5700.61779281]
------
Step:20, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   609.38359538  5700.61779281]
New Q values:  [-2527.46239811 -8521.23367799   609.38359538  2606.9699691 ]
Reward: -1  Episode Reward:  20
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-6626.68415157 -2735.46306511  1091.07617324 -6102.86502307]
------
Step:21, Action:East
State  260
Old Q Values:  [-6626.68415157 -2735.46306511  1091.07617324 -6102.86502307]
New Q values:  [-6626.68415157 -2735.46306511  1217.92146002 -6102.86502307]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   609.38359538  2606.9699691 ]
------
Step:22, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   609.38359538  2606.9699691 ]
New Q values:  [-2527.46239811 -8521.23367799  2928.64639503  2606.9699691 ]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8951.64318957 -6442.16912869 -8192.20126966   815.32346379]
------
Step:23, Action:North
State  288
Old Q Values:  [ 8951.64318957 -6442.16912869 -8192.20126966   815.32346379]
New Q values:  [ 5027.40865181 -6442.16912869 -8192.20126966   815.32346379]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 886.26132206 1067.85596996  911.81846373 4824.50458659]
------
Step:24, Action:West
State  210
Old Q Values:  [ 886.26132206 1067.85596996  911.81846373 4824.50458659]
New Q values:  [ 886.26132206 1067.85596996  911.81846373 2711.94652922]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.60914898e+03]
------
Step:25, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.60914898e+03]
New Q values:  [-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.74433251e+03]
Reward: -1  Episode Reward:  15
xxxxx
x...x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  5670.90973934     0.        ]
------
Step:26, Action:East
State  180
Old Q Values:  [-6991.65692596   927.70411204  2809.32585308 -4966.32149798]
New Q values:  [-6991.65692596   927.70411204  2303.90790636 -4966.32149798]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.93592522e+03 3.03764937e+03 2.91043938e+03]
------
Step:27, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.93592522e+03 3.03764937e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.45236401e+03 3.03764937e+03 2.91043938e+03]
Reward: -1  Episode Reward:  13
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2928.64639503  2606.9699691 ]
------
Step:28, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2928.64639503  2606.9699691 ]
New Q values:  [-2527.46239811 -8521.23367799  2679.08115355  2606.9699691 ]
Reward: -1  Episode Reward:  12
xxxxx
x.g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5027.40865181 -6442.16912869 -8192.20126966   815.32346379]
------
Step:29, Action:North
State  288
Old Q Values:  [ 5027.40865181 -6442.16912869 -8192.20126966   815.32346379]
New Q values:  [ 6922.01130861 -6442.16912869 -8192.20126966   815.32346379]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1181.34445667  1151.62530828 16372.15949295 12642.47728211]
------
Step:30, Action:East
State  208
Old Q Values:  [ 1181.34445667  1151.62530828 16372.15949295 12642.47728211]
New Q values:  [ 1181.34445667  1151.62530828  5279.91164507 12642.47728211]
Reward: -10301  Episode Reward:  -10290
xxxxx
x...x
x  gx
x   x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.06702427e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.06702427e+02]
New Q values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.15799051e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   297.23868516   825.72693312]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516   825.72693312]
New Q values:  [ -281.736      -1150.91067548   297.23868516   786.47356279]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1502.60929848  420.82027852 -120.29354603]
------
Step:3, Action:South
State  110
Old Q Values:  [-239.29051573 2373.73702661  826.09351776 -180.6       ]
New Q values:  [-239.29051573 2656.16773245  826.09351776 -180.6       ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  5670.90973934     0.        ]
------
Step:4, Action:East
State  183
Old Q Values:  [ 902.4633276   748.78287005 2957.60495036    0.        ]
New Q values:  [ 902.4633276   748.78287005 5829.84943037    0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:5, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.22476467e+04 2.81953637e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 1.22476467e+04 2.20348062e+03 0.00000000e+00]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 401.76524785 3567.55356402    0.          852.63459632]
------
Step:6, Action:West
State  216
Old Q Values:  [ 3300.49204035  2501.82066656 -8896.20691497  1597.4085592 ]
New Q values:  [ 3300.49204035  2501.82066656 -8896.20691497  3138.07537093]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  8.33237316e+03 -5.74006972e+03  2.00341972e+02]
------
Step:7, Action:South
State  200
Old Q Values:  [  62.8218634  2336.35962208 2732.52050438 1141.49622464]
New Q values:  [  62.8218634  1737.6681949  2732.52050438 1141.49622464]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2679.08115355  2606.9699691 ]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2679.08115355  2606.9699691 ]
New Q values:  [-2527.46239811 -8521.23367799  3153.635854    2606.9699691 ]
Reward: 9  Episode Reward:  52
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6922.01130861 -6442.16912869 -8192.20126966   815.32346379]
------
Step:9, Action:North
State  288
Old Q Values:  [ 6922.01130861 -6442.16912869 -8192.20126966   815.32346379]
New Q values:  [ 3581.78848221 -6442.16912869 -8192.20126966   815.32346379]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 886.26132206 1067.85596996  911.81846373 2711.94652922]
------
Step:10, Action:West
State  218
Old Q Values:  [ 401.76524785 3567.55356402    0.          852.63459632]
New Q values:  [ 401.76524785 3567.55356402    0.         1039.23717408]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -7041.23396577  2329.27778519     0.        ]
------
Step:11, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.22476467e+04 2.20348062e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 1.22476467e+04 1.95105832e+03 0.00000000e+00]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 401.76524785 3567.55356402    0.         1039.23717408]
------
Step:12, Action:West
State  216
Old Q Values:  [ 3300.49204035  2501.82066656 -8896.20691497  3138.07537093]
New Q values:  [ 3300.49204035  2501.82066656 -8896.20691497  3754.34209561]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  8.33237316e+03 -5.74006972e+03  2.00341972e+02]
------
Step:13, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  8.33237316e+03 -5.74006972e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  3.54722325e+03 -5.74006972e+03  2.00341972e+02]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  677.52857079  -168.92307549 -5177.43001433   716.24662333]
------
Step:14, Action:West
State  273
Old Q Values:  [  677.52857079  -168.92307549 -5177.43001433   716.24662333]
New Q values:  [  677.52857079  -168.92307549 -5177.43001433 87817.2329901 ]
Reward: 100009  Episode Reward:  100056
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 387.22594652 281.43329554 262.76946019]
------
Step:1, Action:South
State  180
Old Q Values:  [-6991.65692596   927.70411204  2303.90790636 -4966.32149798]
New Q values:  [-6991.65692596   741.85808282  2303.90790636 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-6626.68415157 -2735.46306511  1217.92146002 -6102.86502307]
------
Step:2, Action:East
State  261
Old Q Values:  [609.76924681  26.73544252 684.77342908 -35.88578819]
New Q values:  [609.76924681  26.73544252 609.53699496 -35.88578819]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1100.75874442   772.33519233]
------
Step:3, Action:East
State  273
Old Q Values:  [  677.52857079  -168.92307549 -5177.43001433 87817.2329901 ]
New Q values:  [  677.52857079  -168.92307549  -991.03546107 87817.2329901 ]
Reward: 9  Episode Reward:  27
xxxxx
x. gx
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3581.78848221 -6442.16912869 -8192.20126966   815.32346379]
------
Step:4, Action:North
State  288
Old Q Values:  [ 3581.78848221 -6442.16912869 -8192.20126966   815.32346379]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966   815.32346379]
Reward: -9991  Episode Reward:  -9964
xxxxx
x. .x
x .gx
x   x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-6626.68415157 -2735.46306511  1217.92146002 -6102.86502307]
------
Step:1, Action:East
State  260
Old Q Values:  [-6626.68415157 -2735.46306511  1217.92146002 -6102.86502307]
New Q values:  [-6626.68415157 -2735.46306511   822.79620733 -6102.86502307]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1100.75874442   772.33519233]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3153.635854    2606.9699691 ]
New Q values:  [-2527.46239811 -8521.23367799  1511.45138074  2606.9699691 ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966   815.32346379]
------
Step:3, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966   815.32346379]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966  1107.62037625]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1511.45138074  2606.9699691 ]
------
Step:4, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1511.45138074  2606.9699691 ]
New Q values:  [-2527.46239811 -8521.23367799  1511.45138074 -4710.97315016]
Reward: -10001  Episode Reward:  -9984
xxxxx
x ..x
x...x
xg  x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2401.52422984 -2383.80019164   -72.21682463]
------
Step:1, Action:South
State  136
Old Q Values:  [ -170.77177351  2401.52422984 -2383.80019164   -72.21682463]
New Q values:  [ -170.77177351  2092.31232062 -2383.80019164   -72.21682463]
Reward: 9  Episode Reward:  9
xxxxx
x g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3300.49204035  2501.82066656 -8896.20691497  3754.34209561]
------
Step:2, Action:West
State  216
Old Q Values:  [ 3300.49204035  2501.82066656 -8896.20691497  3754.34209561]
New Q values:  [ 3300.49204035  2501.82066656 -8896.20691497  2418.4316485 ]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.45236401e+03 3.03764937e+03 2.91043938e+03]
------
Step:3, Action:East
State  200
Old Q Values:  [  62.8218634  1737.6681949  2732.52050438 1141.49622464]
New Q values:  [  62.8218634  1737.6681949  2082.55581386 1141.49622464]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3300.49204035  2501.82066656 -8896.20691497  2418.4316485 ]
------
Step:4, Action:North
State  216
Old Q Values:  [ 3300.49204035  2501.82066656 -8896.20691497  2418.4316485 ]
New Q values:  [ 1444.3365313   2501.82066656 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  16
xxxxx
x .ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.15799051e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.15799051e+02]
New Q values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.07661689e+02]
Reward: 9  Episode Reward:  25
xxxxx
x a x
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   297.23868516   786.47356279]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516   786.47356279]
New Q values:  [ -281.736      -1150.91067548   297.23868516   734.35320731]
Reward: -1  Episode Reward:  24
xxxxx
xa  x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253 1401.2126073  -252.78192178]
------
Step:7, Action:East
State  105
Old Q Values:  [ -180.6         1827.67773756 -5071.25637662     0.        ]
New Q values:  [ -180.6         1827.67773756 -7438.53829696     0.        ]
Reward: -10001  Episode Reward:  -9977
xxxxx
x g x
x.  x
x...x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1511.45138074 -4710.97315016]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1511.45138074 -4710.97315016]
New Q values:  [-2527.46239811 -8521.23367799   942.26666517 -4710.97315016]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966  1107.62037625]
------
Step:2, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966  1107.62037625]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966   725.12815005]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   942.26666517 -4710.97315016]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   942.26666517 -4710.97315016]
New Q values:  [-2527.46239811 -8521.23367799   593.84511108 -4710.97315016]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966   725.12815005]
------
Step:4, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966   725.12815005]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966   467.60479334]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
x.g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   593.84511108 -4710.97315016]
------
Step:5, Action:East
State  273
Old Q Values:  [  677.52857079  -168.92307549  -991.03546107 87817.2329901 ]
New Q values:  [  677.52857079  -168.92307549  -256.73274642 87817.2329901 ]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966   467.60479334]
------
Step:6, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966   467.60479334]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966 26531.61181437]
Reward: -1  Episode Reward:  4
xxxxx
x .gx
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  677.52857079  -168.92307549  -256.73274642 87817.2329901 ]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   593.84511108 -4710.97315016]
New Q values:  [-2527.46239811 -8521.23367799   593.84511108 -1696.05848602]
Reward: 9  Episode Reward:  13
xxxxx
x g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[609.76924681  26.73544252 609.53699496 -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [609.76924681  26.73544252 609.53699496 -35.88578819]
New Q values:  [365.47548268  26.73544252 609.53699496 -35.88578819]
Reward: 9  Episode Reward:  22
xxxxx
x .gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 387.22594652 281.43329554 262.76946019]
------
Step:9, Action:South
State  181
Old Q Values:  [290.50271147 387.22594652 281.43329554 262.76946019]
New Q values:  [290.50271147 337.1514771  281.43329554 262.76946019]
Reward: -1  Episode Reward:  21
xxxxx
x g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[365.47548268  26.73544252 609.53699496 -35.88578819]
------
Step:10, Action:East
State  261
Old Q Values:  [365.47548268  26.73544252 609.53699496 -35.88578819]
New Q values:  [  365.47548268    26.73544252 26588.38469501   -35.88578819]
Reward: -1  Episode Reward:  20
xxxxx
x .gx
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  677.52857079  -168.92307549  -256.73274642 87817.2329901 ]
------
Step:11, Action:West
State  273
Old Q Values:  [  677.52857079  -168.92307549  -256.73274642 87817.2329901 ]
New Q values:  [  677.52857079  -168.92307549  -256.73274642 43102.80860454]
Reward: -1  Episode Reward:  19
xxxxx
x ..x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  365.47548268    26.73544252 26588.38469501   -35.88578819]
------
Step:12, Action:East
State  261
Old Q Values:  [  365.47548268    26.73544252 26588.38469501   -35.88578819]
New Q values:  [  365.47548268    26.73544252 23565.59645937   -35.88578819]
Reward: -1  Episode Reward:  18
xxxxx
x ..x
x ..x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  677.52857079  -168.92307549  -256.73274642 43102.80860454]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   593.84511108 -1696.05848602]
New Q values:  [-2527.46239811 -8521.23367799   593.84511108  6390.6555434 ]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
x ..x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  365.47548268    26.73544252 23565.59645937   -35.88578819]
------
Step:14, Action:North
State  261
Old Q Values:  [  365.47548268    26.73544252 23565.59645937   -35.88578819]
New Q values:  [ 1894.54502218    26.73544252 23565.59645937   -35.88578819]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   748.78287005 5829.84943037    0.        ]
------
Step:15, Action:East
State  183
Old Q Values:  [ 902.4633276   748.78287005 5829.84943037    0.        ]
New Q values:  [ 902.4633276   748.78287005 6978.74722237    0.        ]
Reward: 9  Episode Reward:  25
xxxxx
x ..x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:16, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.45236401e+03 3.03764937e+03 2.91043938e+03]
New Q values:  [3.89777037e-01 2.45236401e+03 5.01320293e+03 2.91043938e+03]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1181.34445667  1151.62530828  5279.91164507 12642.47728211]
------
Step:17, Action:East
State  210
Old Q Values:  [ 886.26132206 1067.85596996  911.81846373 2711.94652922]
New Q values:  [ 886.26132206 1067.85596996  997.71134426 2711.94652922]
Reward: -301  Episode Reward:  -267
xxxxx
x ..x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 886.26132206 1067.85596996  997.71134426 2711.94652922]
------
Step:18, Action:West
State  210
Old Q Values:  [ 886.26132206 1067.85596996  997.71134426 2711.94652922]
New Q values:  [ 886.26132206 1067.85596996  997.71134426 1907.47836606]
Reward: -1  Episode Reward:  -268
xxxxx
x ..x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.74433251e+03]
------
Step:19, Action:West
State  192
Old Q Values:  [3.89777037e-01 2.45236401e+03 5.01320293e+03 2.91043938e+03]
New Q values:  [ 3.89777037e-01  2.45236401e+03  5.01320293e+03 -4.14525188e+03]
Reward: -10001  Episode Reward:  -10269
xxxxx
x ..x
xg  x
x   x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   593.84511108  6390.6555434 ]
------
Step:1, Action:West
State  273
Old Q Values:  [  677.52857079  -168.92307549  -256.73274642 43102.80860454]
New Q values:  [  677.52857079  -168.92307549  -256.73274642 24316.20237963]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1894.54502218    26.73544252 23565.59645937   -35.88578819]
------
Step:2, Action:East
State  261
Old Q Values:  [ 1894.54502218    26.73544252 23565.59645937   -35.88578819]
New Q values:  [ 1894.54502218    26.73544252 16720.49929764   -35.88578819]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  677.52857079  -168.92307549  -256.73274642 24316.20237963]
------
Step:3, Action:West
State  273
Old Q Values:  [  677.52857079  -168.92307549  -256.73274642 24316.20237963]
New Q values:  [  677.52857079  -168.92307549  -256.73274642 14742.03074114]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1894.54502218    26.73544252 16720.49929764   -35.88578819]
------
Step:4, Action:East
State  261
Old Q Values:  [ 1894.54502218    26.73544252 16720.49929764   -35.88578819]
New Q values:  [1894.54502218   26.73544252 8604.79638207  -35.88578819]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   593.84511108  6390.6555434 ]
------
Step:5, Action:West
State  273
Old Q Values:  [  677.52857079  -168.92307549  -256.73274642 14742.03074114]
New Q values:  [ 677.52857079 -168.92307549 -256.73274642 8477.65121108]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1894.54502218   26.73544252 8604.79638207  -35.88578819]
------
Step:6, Action:East
State  261
Old Q Values:  [1894.54502218   26.73544252 8604.79638207  -35.88578819]
New Q values:  [1894.54502218   26.73544252 5984.61391615  -35.88578819]
Reward: -1  Episode Reward:  4
xxxxx
x...x
x. .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 -256.73274642 8477.65121108]
------
Step:7, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 -256.73274642 8477.65121108]
New Q values:  [ 677.52857079 -168.92307549 -256.73274642 5185.84465928]
Reward: -1  Episode Reward:  3
xxxxx
x...x
x. .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1894.54502218   26.73544252 5984.61391615  -35.88578819]
------
Step:8, Action:East
State  261
Old Q Values:  [1894.54502218   26.73544252 5984.61391615  -35.88578819]
New Q values:  [ 1894.54502218    26.73544252 -1689.55777052   -35.88578819]
Reward: -10001  Episode Reward:  -9998
xxxxx
x...x
x. .x
x g.x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.07661689e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.07661689e+02]
New Q values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.88770638e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   297.23868516   734.35320731]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516   734.35320731]
New Q values:  [ -281.736      -1150.91067548   297.23868516   719.50506511]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253 1401.2126073  -252.78192178]
------
Step:3, Action:East
State  110
Old Q Values:  [-239.29051573 2656.16773245  826.09351776 -180.6       ]
New Q values:  [-239.29051573 2656.16773245  545.68892664 -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x a x
xg..x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   297.23868516   719.50506511]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516   719.50506511]
New Q values:  [ -281.736      -1150.91067548   297.23868516   707.56580823]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253 1401.2126073  -252.78192178]
------
Step:5, Action:East
State  110
Old Q Values:  [-239.29051573 2656.16773245  545.68892664 -180.6       ]
New Q values:  [-239.29051573 2656.16773245  429.94531313 -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
x a x
xg..x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   297.23868516   707.56580823]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516   707.56580823]
New Q values:  [ -281.736      -1150.91067548   297.23868516  1079.27664303]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
x...x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 2656.16773245  429.94531313 -180.6       ]
------
Step:7, Action:South
State  110
Old Q Values:  [-239.29051573 2656.16773245  429.94531313 -180.6       ]
New Q values:  [ -239.29051573 -4240.96053511   429.94531313  -180.6       ]
Reward: -9991  Episode Reward:  -9977
xxxxx
x   x
xg..x
x.. x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1181.34445667  1151.62530828  5279.91164507 12642.47728211]
------
Step:1, Action:West
State  208
Old Q Values:  [ 1181.34445667  1151.62530828  5279.91164507 12642.47728211]
New Q values:  [1181.34445667 1151.62530828 5279.91164507  566.35179234]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x g x
x...x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[1181.34445667 1151.62530828 5279.91164507  566.35179234]
------
Step:1, Action:East
State  210
Old Q Values:  [ 886.26132206 1067.85596996  997.71134426 1907.47836606]
New Q values:  [ 886.26132206 1067.85596996  790.72804752 1907.47836606]
Reward: -301  Episode Reward:  -301
xxxxx
x...x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 886.26132206 1067.85596996  790.72804752 1907.47836606]
------
Step:2, Action:West
State  210
Old Q Values:  [ 886.26132206 1067.85596996  790.72804752 1907.47836606]
New Q values:  [ 886.26132206 1067.85596996  790.72804752 1585.6911008 ]
Reward: -1  Episode Reward:  -302
xxxxx
x...x
x.a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.74433251e+03]
------
Step:3, Action:West
State  192
Old Q Values:  [ 3.89777037e-01  2.45236401e+03  5.01320293e+03 -4.14525188e+03]
New Q values:  [ 3.89777037e-01  2.45236401e+03  5.01320293e+03 -6.96152838e+03]
Reward: -9991  Episode Reward:  -10293
xxxxx
x...x
xg  x
x...x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   748.78287005 6978.74722237    0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [ 902.4633276   748.78287005 6978.74722237    0.        ]
New Q values:  [ 902.4633276   748.78287005 3620.19864332    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.74433251e+03]
------
Step:2, Action:West
State  195
Old Q Values:  [   38.85388605 10817.53531707  1849.21327227  1327.32046441]
New Q values:  [   38.85388605 10817.53531707  1849.21327227  1616.38777876]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   748.78287005 3620.19864332    0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [ 902.4633276   748.78287005 3620.19864332    0.        ]
New Q values:  [ 902.4633276   748.78287005 2270.7792117     0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.74433251e+03]
------
Step:4, Action:West
State  192
Old Q Values:  [ 3.89777037e-01  2.45236401e+03  5.01320293e+03 -6.96152838e+03]
New Q values:  [ 3.89777037e-01  2.45236401e+03  5.01320293e+03 -2.68406591e+03]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xag.x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 337.1514771  281.43329554 262.76946019]
------
Step:5, Action:South
State  183
Old Q Values:  [ 902.4633276   748.78287005 2270.7792117     0.        ]
New Q values:  [ 902.4633276   873.27665467 2270.7792117     0.        ]
Reward: 9  Episode Reward:  15
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1894.54502218    26.73544252 -1689.55777052   -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [ 1894.54502218    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  858.363452      26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xag.x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 337.1514771  281.43329554 262.76946019]
------
Step:7, Action:South
State  181
Old Q Values:  [290.50271147 337.1514771  281.43329554 262.76946019]
New Q values:  [290.50271147 391.76962644 281.43329554 262.76946019]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x  gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  858.363452      26.73544252 -1689.55777052   -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [  858.363452      26.73544252 -1689.55777052   -35.88578819]
New Q values:  [ 1023.97914431    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  12
xxxxx
x...x
xa .x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   873.27665467 2270.7792117     0.        ]
------
Step:9, Action:East
State  183
Old Q Values:  [ 902.4633276   873.27665467 2270.7792117     0.        ]
New Q values:  [ 902.4633276   873.27665467 1731.01143905    0.        ]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.74433251e+03]
------
Step:10, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.74433251e+03]
New Q values:  [-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.79840593e+03]
Reward: -1  Episode Reward:  10
xxxxx
x...x
xa .x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  5670.90973934     0.        ]
------
Step:11, Action:East
State  183
Old Q Values:  [ 902.4633276   873.27665467 1731.01143905    0.        ]
New Q values:  [ 902.4633276   873.27665467 1531.32635391    0.        ]
Reward: -1  Episode Reward:  9
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.79840593e+03]
------
Step:12, Action:West
State  195
Old Q Values:  [   38.85388605 10817.53531707  1849.21327227  1616.38777876]
New Q values:  [   38.85388605 10817.53531707  1849.21327227  1105.35301768]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa .x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   873.27665467 1531.32635391    0.        ]
------
Step:13, Action:East
State  183
Old Q Values:  [ 902.4633276   873.27665467 1531.32635391    0.        ]
New Q values:  [ 902.4633276   873.27665467 1451.45231985    0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.79840593e+03]
------
Step:14, Action:West
State  195
Old Q Values:  [   38.85388605 10817.53531707  1849.21327227  1105.35301768]
New Q values:  [   38.85388605 10817.53531707  1849.21327227   876.97690303]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xa .x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   873.27665467 1451.45231985    0.        ]
------
Step:15, Action:East
State  183
Old Q Values:  [ 902.4633276   873.27665467 1451.45231985    0.        ]
New Q values:  [ 902.4633276   873.27665467 3825.24152306    0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 10817.53531707  1849.21327227   876.97690303]
------
Step:16, Action:South
State  193
Old Q Values:  [-5922.26708831   593.44556846 -4582.3674281    534.04109446]
New Q values:  [-5922.26708831  1798.53162517 -4582.3674281    534.04109446]
Reward: 9  Episode Reward:  14
xxxxx
x...x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 -256.73274642 5185.84465928]
------
Step:17, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   593.84511108  6390.6555434 ]
New Q values:  [-2527.46239811 -8521.23367799   593.84511108  2862.85596065]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1023.97914431    26.73544252 -1689.55777052   -35.88578819]
------
Step:18, Action:North
State  261
Old Q Values:  [ 1023.97914431    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  526.52254566    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  12
xxxxx
x.g.x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 391.76962644 281.43329554 262.76946019]
------
Step:19, Action:South
State  180
Old Q Values:  [-6991.65692596   741.85808282  2303.90790636 -4966.32149798]
New Q values:  [-6991.65692596   542.98209533  2303.90790636 -4966.32149798]
Reward: -1  Episode Reward:  11
xxxxx
xg..x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-6626.68415157 -2735.46306511   822.79620733 -6102.86502307]
------
Step:20, Action:East
State  260
Old Q Values:  [-6626.68415157 -2735.46306511   822.79620733 -6102.86502307]
New Q values:  [-6626.68415157 -2735.46306511  1187.37527113 -6102.86502307]
Reward: -1  Episode Reward:  10
xxxxx
x...x
xg .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   593.84511108  2862.85596065]
------
Step:21, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   593.84511108  2862.85596065]
New Q values:  [-2527.46239811 -8521.23367799   593.84511108 -4499.2450344 ]
Reward: -10001  Episode Reward:  -9991
xxxxx
x...x
x  .x
xg  x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966 26531.61181437]
------
Step:1, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966 26531.61181437]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966 12173.79812353]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 -256.73274642 5185.84465928]
------
Step:2, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 -256.73274642 5185.84465928]
New Q values:  [ 677.52857079 -168.92307549 -256.73274642 2237.69462741]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  526.52254566    26.73544252 -1689.55777052   -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [  526.52254566    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  333.53990619    26.73544252 -1689.55777052   -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 391.76962644 281.43329554 262.76946019]
------
Step:4, Action:South
State  183
Old Q Values:  [ 902.4633276   873.27665467 3825.24152306    0.        ]
New Q values:  [ 902.4633276   448.77263373 3825.24152306    0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  333.53990619    26.73544252 -1689.55777052   -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [  333.53990619    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  250.34685041    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 391.76962644 281.43329554 262.76946019]
------
Step:6, Action:South
State  180
Old Q Values:  [-6991.65692596   542.98209533  2303.90790636 -4966.32149798]
New Q values:  [-6991.65692596   572.80541947  2303.90790636 -4966.32149798]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-6626.68415157 -2735.46306511  1187.37527113 -6102.86502307]
------
Step:7, Action:East
State  260
Old Q Values:  [-6626.68415157 -2735.46306511  1187.37527113 -6102.86502307]
New Q values:  [-6626.68415157 -2735.46306511   652.50364178 -6102.86502307]
Reward: -1  Episode Reward:  23
xxxxx
xg..x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   593.84511108 -4499.2450344 ]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   593.84511108 -4499.2450344 ]
New Q values:  [-2527.46239811 -8521.23367799  3889.07748149 -4499.2450344 ]
Reward: -1  Episode Reward:  22
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966 12173.79812353]
------
Step:9, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966 12173.79812353]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966  6035.64249386]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3889.07748149 -4499.2450344 ]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3889.07748149 -4499.2450344 ]
New Q values:  [-2527.46239811 -8521.23367799  3365.72374075 -4499.2450344 ]
Reward: -1  Episode Reward:  20
xxxxx
x.g.x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966  6035.64249386]
------
Step:11, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966  6035.64249386]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966  3423.37411977]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3365.72374075 -4499.2450344 ]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3365.72374075 -4499.2450344 ]
New Q values:  [-2527.46239811 -8521.23367799  2372.70173223 -4499.2450344 ]
Reward: -1  Episode Reward:  18
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966  3423.37411977]
------
Step:13, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966  3423.37411977]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966  2080.56016758]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2372.70173223 -4499.2450344 ]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2372.70173223 -4499.2450344 ]
New Q values:  [-2527.46239811 -8521.23367799  1572.64874317 -4499.2450344 ]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966  2080.56016758]
------
Step:15, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966  2080.56016758]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966  1502.93245525]
Reward: -1  Episode Reward:  15
xxxxx
x..gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 -256.73274642 2237.69462741]
------
Step:16, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 -256.73274642 2237.69462741]
New Q values:  [ 677.52857079 -168.92307549 -256.73274642  969.58190609]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  250.34685041    26.73544252 -1689.55777052   -35.88578819]
------
Step:17, Action:North
State  261
Old Q Values:  [  250.34685041    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  217.0696281     26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  13
xxxxx
x..gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 391.76962644 281.43329554 262.76946019]
------
Step:18, Action:South
State  181
Old Q Values:  [290.50271147 391.76962644 281.43329554 262.76946019]
New Q values:  [290.50271147 221.228739   281.43329554 262.76946019]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  217.0696281     26.73544252 -1689.55777052   -35.88578819]
------
Step:19, Action:North
State  261
Old Q Values:  [  217.0696281     26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  173.37866468    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  11
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 221.228739   281.43329554 262.76946019]
------
Step:20, Action:North
State  180
Old Q Values:  [-6991.65692596   572.80541947  2303.90790636 -4966.32149798]
New Q values:  [-2405.85343029   572.80541947  2303.90790636 -4966.32149798]
Reward: 9  Episode Reward:  20
xxxxx
xa..x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        1284.69780031    5.16       -180.6       ]
------
Step:21, Action:East
State  100
Old Q Values:  [   0.         4011.09684676    5.064         0.        ]
New Q values:  [   0.         4011.09684676  318.34177431    0.        ]
Reward: 9  Episode Reward:  29
xxxxx
xga.x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:NW
[   0.         1036.38724771    0.            0.        ]
------
Step:22, Action:South
State  118
Old Q Values:  [ 617.06804554 2496.42792156    0.          503.49427758]
New Q values:  [ 617.06804554 1735.68734778    0.          503.49427758]
Reward: 9  Episode Reward:  38
xxxxx
x  .x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2291.30047507  2439.05393051   239.04887894]
------
Step:23, Action:East
State  196
Old Q Values:  [-2469.90645144  2291.30047507  2439.05393051   239.04887894]
New Q values:  [-2469.90645144  2291.30047507  2558.99506573   239.04887894]
Reward: -1  Episode Reward:  37
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[1181.34445667 1151.62530828 5279.91164507  566.35179234]
------
Step:24, Action:East
State  208
Old Q Values:  [1181.34445667 1151.62530828 5279.91164507  566.35179234]
New Q values:  [1181.34445667 1151.62530828 3515.33815155  566.35179234]
Reward: -301  Episode Reward:  -264
xxxxx
x g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[1181.34445667 1151.62530828 3515.33815155  566.35179234]
------
Step:25, Action:East
State  208
Old Q Values:  [1181.34445667 1151.62530828 3515.33815155  566.35179234]
New Q values:  [1181.34445667 1151.62530828 2280.13670608  566.35179234]
Reward: -301  Episode Reward:  -565
xxxxx
x  gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[1181.34445667 1151.62530828 2280.13670608  566.35179234]
------
Step:26, Action:East
State  208
Old Q Values:  [1181.34445667 1151.62530828 2280.13670608  566.35179234]
New Q values:  [ 1181.34445667  1151.62530828 -4584.50430574   566.35179234]
Reward: -10301  Episode Reward:  -10866
xxxxx
x  .x
x  gx
x   x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.88770638e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  3.88770638e+02]
New Q values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.84691248e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   297.23868516  1079.27664303]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516  1079.27664303]
New Q values:  [ -281.736      -1150.91067548   297.23868516   857.4744394 ]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253 1401.2126073  -252.78192178]
------
Step:3, Action:East
State  107
Old Q Values:  [-252.35169558  765.83302253 1401.2126073  -252.78192178]
New Q values:  [-252.35169558  765.83302253  777.6155172  -252.78192178]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   595.56427871   725.76824759]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516   857.4744394 ]
New Q values:  [ -281.736      -1150.91067548   297.23868516   575.67443092]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253  777.6155172  -252.78192178]
------
Step:5, Action:East
State  109
Old Q Values:  [-241.10880094  335.69847369   -8.57207238 -180.6       ]
New Q values:  [ -241.10880094   335.69847369 -5413.46457526  -180.6       ]
Reward: -10001  Episode Reward:  -9985
xxxxx
x g x
x...x
x.. x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  173.37866468    26.73544252 -1689.55777052   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [  173.37866468    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [ 1222.32392279    26.73544252 -1689.55777052   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 902.4633276   448.77263373 3825.24152306    0.        ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 902.4633276   448.77263373 3825.24152306    0.        ]
New Q values:  [ 902.4633276   448.77263373 4780.75720435    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 10817.53531707  1849.21327227   876.97690303]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831  1798.53162517 -4582.3674281    534.04109446]
New Q values:  [-5922.26708831  1015.68722189 -4582.3674281    534.04109446]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 -256.73274642  969.58190609]
------
Step:4, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 -256.73274642  969.58190609]
New Q values:  [ 677.52857079 -168.92307549 -256.73274642  753.92993927]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1222.32392279    26.73544252 -1689.55777052   -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [ 1222.32392279    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  575.48038256    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x...x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 221.228739   281.43329554 262.76946019]
------
Step:6, Action:North
State  183
Old Q Values:  [ 902.4633276   448.77263373 4780.75720435    0.        ]
New Q values:  [ 540.75283156  448.77263373 4780.75720435    0.        ]
Reward: 9  Episode Reward:  34
xxxxx
xa..x
x  .x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[221.30610858 437.96846403 581.22500173   0.        ]
------
Step:7, Action:East
State  103
Old Q Values:  [221.30610858 437.96846403 581.22500173   0.        ]
New Q values:  [221.30610858 437.96846403 238.35800069   0.        ]
Reward: 9  Episode Reward:  43
xxxxx
x a.x
x  .x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SE
[0.   1.56 0.   0.  ]
------
Step:8, Action:South
State  119
Old Q Values:  [0.   1.56 0.   0.  ]
New Q values:  [  0.        465.4216645   0.          0.       ]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[ 1.32443385e-01  1.55132555e+03 -4.51080211e+03  4.03062559e+02]
------
Step:9, Action:South
State  199
Old Q Values:  [  14.86214194 1136.00704508 2141.57355904 1915.70494401]
New Q values:  [  14.86214194  860.64522533 2141.57355904 1915.70494401]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.          -29.77444073 1356.14135767]
------
Step:10, Action:West
State  277
Old Q Values:  [   1.64433       0.          -29.77444073 1356.14135767]
New Q values:  [  1.64433      0.         -29.77444073 714.50065783]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  575.48038256    26.73544252 -1689.55777052   -35.88578819]
------
Step:11, Action:North
State  261
Old Q Values:  [  575.48038256    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [ 1663.81931433    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156  448.77263373 4780.75720435    0.        ]
------
Step:12, Action:East
State  183
Old Q Values:  [ 540.75283156  448.77263373 4780.75720435    0.        ]
New Q values:  [ 540.75283156  448.77263373 6553.11033196    0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:13, Action:East
State  199
Old Q Values:  [  14.86214194  860.64522533 2141.57355904 1915.70494401]
New Q values:  [  14.86214194  860.64522533 1337.73675385 1915.70494401]
Reward: 9  Episode Reward:  47
xxxxx
x  .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 886.26132206 1067.85596996  790.72804752 1585.6911008 ]
------
Step:14, Action:West
State  210
Old Q Values:  [ 886.26132206 1067.85596996  790.72804752 1585.6911008 ]
New Q values:  [ 886.26132206 1067.85596996  790.72804752 5275.08389054]
Reward: -1  Episode Reward:  46
xxxxx
x  .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:15, Action:East
State  196
Old Q Values:  [-2469.90645144  2291.30047507  2558.99506573   239.04887894]
New Q values:  [-2469.90645144  2291.30047507  1377.40136329   239.04887894]
Reward: -1  Episode Reward:  45
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1181.34445667  1151.62530828 -4584.50430574   566.35179234]
------
Step:16, Action:North
State  208
Old Q Values:  [ 1181.34445667  1151.62530828 -4584.50430574   566.35179234]
New Q values:  [73806.50785532  1151.62530828 -4584.50430574   566.35179234]
Reward: 100009  Episode Reward:  100054
xxxxx
x gax
x   x
x   x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869 1502.60929848  420.82027852 -120.29354603]
New Q values:  [-177.44732869 1502.60929848  391.45858569 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x a x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   595.56427871   725.76824759]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   595.56427871   725.76824759]
New Q values:  [ -253.44886264 -1902.20915811   595.56427871   740.49008858]
Reward: -1  Episode Reward:  8
xxxxx
xa  x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1502.60929848  391.45858569 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 1502.60929848  391.45858569 -120.29354603]
New Q values:  [-177.44732869 1221.87186368  391.45858569 -120.29354603]
Reward: 9  Episode Reward:  17
xxxxx
x   x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346 2051.42714763 1558.87588424  154.04646645]
------
Step:4, Action:South
State  189
Old Q Values:  [ 422.44659346 2051.42714763 1558.87588424  154.04646645]
New Q values:  [ 422.44659346 1325.11665335 1558.87588424  154.04646645]
Reward: 9  Episode Reward:  26
xxxxx
x   x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1663.81931433    26.73544252 -1689.55777052   -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [ 1663.81931433    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [ 1132.590491      26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346 1325.11665335 1558.87588424  154.04646645]
------
Step:6, Action:East
State  189
Old Q Values:  [ 422.44659346 1325.11665335 1558.87588424  154.04646645]
New Q values:  [ 422.44659346 1325.11665335 1094.3480182   154.04646645]
Reward: 9  Episode Reward:  34
xxxxx
x  gx
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[ 1.32443385e-01  1.55132555e+03 -4.51080211e+03  4.03062559e+02]
------
Step:7, Action:South
State  197
Old Q Values:  [ 1.32443385e-01  1.55132555e+03 -4.51080211e+03  4.03062559e+02]
New Q values:  [ 1.32443385e-01  8.40280417e+02 -4.51080211e+03  4.03062559e+02]
Reward: 9  Episode Reward:  43
xxxxx
x  gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         -29.77444073 714.50065783]
------
Step:8, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1100.75874442   772.33519233]
New Q values:  [   16.82637525 -5807.06396197  1100.75874442   648.11122423]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1132.590491      26.73544252 -1689.55777052   -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [ 1132.590491      26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  539.58700984    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[290.50271147 221.228739   281.43329554 262.76946019]
------
Step:10, Action:North
State  181
Old Q Values:  [290.50271147 221.228739   281.43329554 262.76946019]
New Q values:  [216.3106267  221.228739   281.43329554 262.76946019]
Reward: -1  Episode Reward:  40
xxxxx
xa gx
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   335.69847369 -5413.46457526  -180.6       ]
------
Step:11, Action:South
State  111
Old Q Values:  [-177.44732869 1221.87186368  391.45858569 -120.29354603]
New Q values:  [-177.44732869  885.68374148  391.45858569 -120.29354603]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346 1325.11665335 1094.3480182   154.04646645]
------
Step:12, Action:South
State  183
Old Q Values:  [ 540.75283156  448.77263373 6553.11033196    0.        ]
New Q values:  [ 540.75283156  340.78515644 6553.11033196    0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x  .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  539.58700984    26.73544252 -1689.55777052   -35.88578819]
------
Step:13, Action:North
State  261
Old Q Values:  [  539.58700984    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  612.76979994    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 422.44659346 1325.11665335 1094.3480182   154.04646645]
------
Step:14, Action:South
State  181
Old Q Values:  [216.3106267  221.228739   281.43329554 262.76946019]
New Q values:  [216.3106267  271.72243558 281.43329554 262.76946019]
Reward: -1  Episode Reward:  36
xxxxx
x  gx
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  612.76979994    26.73544252 -1689.55777052   -35.88578819]
------
Step:15, Action:North
State  261
Old Q Values:  [  612.76979994    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  328.93790864    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  35
xxxxx
x g x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[216.3106267  271.72243558 281.43329554 262.76946019]
------
Step:16, Action:East
State  181
Old Q Values:  [216.3106267  271.72243558 281.43329554 262.76946019]
New Q values:  [216.3106267  271.72243558 364.05744322 262.76946019]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[ 1.32443385e-01  8.40280417e+02 -4.51080211e+03  4.03062559e+02]
------
Step:17, Action:South
State  196
Old Q Values:  [-2469.90645144  2291.30047507  1377.40136329   239.04887894]
New Q values:  [-2469.90645144  1246.14781335  1377.40136329   239.04887894]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197  1100.75874442   648.11122423]
------
Step:18, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197  1100.75874442   648.11122423]
New Q values:  [   16.82637525 -5807.06396197   896.58323434   648.11122423]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966  1502.93245525]
------
Step:19, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966  1502.93245525]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966   869.5479524 ]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   896.58323434   648.11122423]
------
Step:20, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197   896.58323434   648.11122423]
New Q values:  [   16.82637525 -5807.06396197   618.89767946   648.11122423]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966   869.5479524 ]
------
Step:21, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966   869.5479524 ]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966   541.65254823]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xg .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   618.89767946   648.11122423]
------
Step:22, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946   648.11122423]
New Q values:  [   16.82637525 -5807.06396197   618.89767946   357.32586229]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  328.93790864    26.73544252 -1689.55777052   -35.88578819]
------
Step:23, Action:North
State  261
Old Q Values:  [  328.93790864    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  240.19239642    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  37
xxxxx
x g x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[216.3106267  271.72243558 364.05744322 262.76946019]
------
Step:24, Action:East
State  180
Old Q Values:  [-2405.85343029   572.80541947  2303.90790636 -4966.32149798]
New Q values:  [-2405.85343029   572.80541947  1334.18357153 -4966.32149798]
Reward: -1  Episode Reward:  36
xxxxx
xg  x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1246.14781335  1377.40136329   239.04887894]
------
Step:25, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  2.45236401e+03  5.01320293e+03 -2.68406591e+03]
New Q values:  [ 3.89777037e-01  2.45236401e+03  8.41526335e+04 -2.68406591e+03]
Reward: 100009  Episode Reward:  100045
xxxxx
x g x
x  ax
x   x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.84691248e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.84691248e+02]
New Q values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.21423526e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   595.56427871   740.49008858]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516   575.67443092]
New Q values:  [ -281.736      -1150.91067548   297.23868516   468.95442753]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253  777.6155172  -252.78192178]
------
Step:3, Action:East
State  107
Old Q Values:  [-252.35169558  765.83302253  777.6155172  -252.78192178]
New Q values:  [-252.35169558  765.83302253  532.59323345 -252.78192178]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   595.56427871   740.49008858]
------
Step:4, Action:West
State  121
Old Q Values:  [    0.             0.         -7059.48304203   303.61233522]
New Q values:  [    0.             0.         -7059.48304203   221.5544762 ]
Reward: -1  Episode Reward:  16
xxxxx
xa gx
x...x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   335.69847369 -5413.46457526  -180.6       ]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869  885.68374148  391.45858569 -120.29354603]
New Q values:  [-177.44732869  468.89072956  391.45858569 -120.29354603]
Reward: 9  Episode Reward:  25
xxxxx
x   x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[216.3106267  271.72243558 364.05744322 262.76946019]
------
Step:6, Action:East
State  181
Old Q Values:  [216.3106267  271.72243558 364.05744322 262.76946019]
New Q values:  [ 216.3106267   271.72243558 1215.18995229  262.76946019]
Reward: 9  Episode Reward:  34
xxxxx
x  gx
x a.x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  3.54722325e+03 -5.74006972e+03  2.00341972e+02]
------
Step:7, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  3.54722325e+03 -5.74006972e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  1.65046828e+03 -5.74006972e+03  2.00341972e+02]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 -256.73274642  753.92993927]
------
Step:8, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946   357.32586229]
New Q values:  [   16.82637525 -5807.06396197   618.89767946   220.38806384]
Reward: 9  Episode Reward:  52
xxxxx
x   x
x g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  240.19239642    26.73544252 -1689.55777052   -35.88578819]
------
Step:9, Action:North
State  261
Old Q Values:  [  240.19239642    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  460.03394425    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  51
xxxxx
x g x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 216.3106267   271.72243558 1215.18995229  262.76946019]
------
Step:10, Action:East
State  180
Old Q Values:  [-2405.85343029   572.80541947  1334.18357153 -4966.32149798]
New Q values:  [-2405.85343029   572.80541947   946.2938376  -4966.32149798]
Reward: -1  Episode Reward:  50
xxxxx
xg  x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1246.14781335  1377.40136329   239.04887894]
------
Step:11, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  2.45236401e+03  8.41526335e+04 -2.68406591e+03]
New Q values:  [ 3.89777037e-01  2.45236401e+03  1.15808406e+05 -2.68406591e+03]
Reward: 100009  Episode Reward:  100059
xxxxx
x   x
xg ax
x   x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1015.68722189 -4582.3674281    534.04109446]
------
Step:1, Action:South
State  195
Old Q Values:  [   38.85388605 10817.53531707  1849.21327227   876.97690303]
New Q values:  [  38.85388605 4558.59310861 1849.21327227  876.97690303]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 -256.73274642  753.92993927]
------
Step:2, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 -256.73274642  753.92993927]
New Q values:  [  677.52857079  -168.92307549  -256.73274642 27832.30631648]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.  x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[91751.11446922  6367.89256635 16022.11762108  1875.31501677]
------
Step:3, Action:North
State  261
Old Q Values:  [  460.03394425    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [  553.97056339    26.73544252 -1689.55777052   -35.88578819]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 216.3106267   271.72243558 1215.18995229  262.76946019]
------
Step:4, Action:East
State  181
Old Q Values:  [ 216.3106267   271.72243558 1215.18995229  262.76946019]
New Q values:  [  216.3106267    271.72243558 29227.9977114    262.76946019]
Reward: -10001  Episode Reward:  -9974
xxxxx
x...x
x g x
x  .x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   297.23868516   468.95442753]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516   468.95442753]
New Q values:  [ -281.736      -1150.91067548   297.23868516   333.64898988]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  468.89072956  391.45858569 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  468.89072956  391.45858569 -120.29354603]
New Q values:  [-177.44732869 2158.88939141  391.45858569 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156  340.78515644 6553.11033196    0.        ]
------
Step:3, Action:East
State  189
Old Q Values:  [ 422.44659346 1325.11665335 1094.3480182   154.04646645]
New Q values:  [ 422.44659346 1325.11665335  695.22333228  154.04646645]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[ 1.32443385e-01  8.40280417e+02 -4.51080211e+03  4.03062559e+02]
------
Step:4, Action:South
State  197
Old Q Values:  [ 1.32443385e-01  8.40280417e+02 -4.51080211e+03  4.03062559e+02]
New Q values:  [ 1.32443385e-01  5.55862364e+02 -4.51080211e+03  4.03062559e+02]
Reward: 9  Episode Reward:  36
xxxxx
x  gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         -29.77444073 714.50065783]
------
Step:5, Action:West
State  277
Old Q Values:  [  1.64433      0.         -29.77444073 714.50065783]
New Q values:  [  1.64433      0.         -29.77444073 451.39143215]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  553.97056339    26.73544252 -1689.55777052   -35.88578819]
------
Step:6, Action:North
State  261
Old Q Values:  [  553.97056339    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [ 8989.38753878    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  216.3106267    271.72243558 29227.9977114    262.76946019]
------
Step:7, Action:South
State  181
Old Q Values:  [  216.3106267    271.72243558 29227.9977114    262.76946019]
New Q values:  [  216.3106267   2804.90523587 29227.9977114    262.76946019]
Reward: -1  Episode Reward:  33
xxxxx
x g.x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 8989.38753878    26.73544252 -1689.55777052   -35.88578819]
------
Step:8, Action:North
State  261
Old Q Values:  [ 8989.38753878    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [12363.55432893    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  32
xxxxx
x  gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  216.3106267   2804.90523587 29227.9977114    262.76946019]
------
Step:9, Action:East
State  181
Old Q Values:  [  216.3106267   2804.90523587 29227.9977114    262.76946019]
New Q values:  [  216.3106267   2804.90523587 11857.35779377   262.76946019]
Reward: -1  Episode Reward:  31
xxxxx
x  .x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[ 1.32443385e-01  5.55862364e+02 -4.51080211e+03  4.03062559e+02]
------
Step:10, Action:South
State  199
Old Q Values:  [  14.86214194  860.64522533 1337.73675385 1915.70494401]
New Q values:  [  14.86214194  479.07551978 1337.73675385 1915.70494401]
Reward: -1  Episode Reward:  30
xxxxx
x  .x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[  1.64433      0.         -29.77444073 451.39143215]
------
Step:11, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946   220.38806384]
New Q values:  [   16.82637525 -5807.06396197   618.89767946  3796.62152422]
Reward: -1  Episode Reward:  29
xxxxx
x  .x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[12363.55432893    26.73544252 -1689.55777052   -35.88578819]
------
Step:12, Action:North
State  261
Old Q Values:  [12363.55432893    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [ 6910.75483116    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  28
xxxxx
x  .x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156  340.78515644 6553.11033196    0.        ]
------
Step:13, Action:East
State  183
Old Q Values:  [ 540.75283156  340.78515644 6553.11033196    0.        ]
New Q values:  [ 540.75283156  340.78515644 7262.05158301    0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x  .x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:14, Action:East
State  196
Old Q Values:  [-2469.90645144  1246.14781335  1377.40136329   239.04887894]
New Q values:  [-2469.90645144  1246.14781335 22698.31290191   239.04887894]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[73806.50785532  1151.62530828 -4584.50430574   566.35179234]
------
Step:15, Action:North
State  218
Old Q Values:  [ 401.76524785 3567.55356402    0.         1039.23717408]
New Q values:  [ 292.53315687 3567.55356402    0.         1039.23717408]
Reward: 9  Episode Reward:  45
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.21423526e+02]
------
Step:16, Action:West
State  130
Old Q Values:  [ 46177.80406237  16843.0423315    -180.00807518 107587.90147272]
New Q values:  [46177.80406237 16843.0423315   -180.00807518 82035.3341718 ]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.30002579e+05]
------
Step:17, Action:West
State  126
Old Q Values:  [   0.          331.64678262 1161.74098086  927.83230218]
New Q values:  [   0.          331.64678262 1161.74098086 1018.19973829]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2158.88939141  391.45858569 -120.29354603]
------
Step:18, Action:South
State  110
Old Q Values:  [ -239.29051573 -4240.96053511   429.94531313  -180.6       ]
New Q values:  [ -239.29051573 -1158.52310634   429.94531313  -180.6       ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  1.79487036e+03  0.00000000e+00]
------
Step:19, Action:East
State  191
Old Q Values:  [  3.06655861 970.40507756 307.92473414   0.        ]
New Q values:  [  3.06655861 970.40507756 302.52728443   0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[    0.         -1406.21014518   599.85796923     0.        ]
------
Step:20, Action:East
State  206
Old Q Values:  [    0.         -1406.21014518   599.85796923     0.        ]
New Q values:  [    0.         -1406.21014518  1309.6092569      0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 292.53315687 3567.55356402    0.         1039.23717408]
------
Step:21, Action:South
State  210
Old Q Values:  [ 886.26132206 1067.85596996  790.72804752 5275.08389054]
New Q values:  [  886.26132206 60595.03815245   790.72804752  5275.08389054]
Reward: 100009  Episode Reward:  100049
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156  340.78515644 7262.05158301    0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [ 540.75283156  340.78515644 7262.05158301    0.        ]
New Q values:  [ 540.75283156  340.78515644 3749.74241149    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.79840593e+03]
------
Step:2, Action:West
State  195
Old Q Values:  [  38.85388605 4558.59310861 1849.21327227  876.97690303]
New Q values:  [  38.85388605 4558.59310861 1849.21327227 1475.11348466]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156  340.78515644 3749.74241149    0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [ 540.75283156  340.78515644 3749.74241149    0.        ]
New Q values:  [ 540.75283156  340.78515644 2338.81874289    0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.79840593e+03]
------
Step:4, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.79840593e+03]
New Q values:  [-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.82003529e+03]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xa .x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  5670.90973934     0.        ]
------
Step:5, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  5670.90973934     0.        ]
New Q values:  [    0.         -5969.29177534  3113.77448359     0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x a.x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.82003529e+03]
------
Step:6, Action:West
State  192
Old Q Values:  [ 3.89777037e-01  2.45236401e+03  1.15808406e+05 -2.68406591e+03]
New Q values:  [ 3.89777037e-01  2.45236401e+03  1.15808406e+05 -6.79033821e+03]
Reward: -10001  Episode Reward:  -9996
xxxxx
x...x
xg .x
x.. x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2158.88939141  391.45858569 -120.29354603]
------
Step:1, Action:South
State  110
Old Q Values:  [ -239.29051573 -1158.52310634   429.94531313  -180.6       ]
New Q values:  [-239.29051573  476.12310254  429.94531313 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  3113.77448359     0.        ]
------
Step:2, Action:East
State  180
Old Q Values:  [-2405.85343029   572.80541947   946.2938376  -4966.32149798]
New Q values:  [-2405.85343029   572.80541947  7193.41140561 -4966.32149798]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
xga.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1246.14781335 22698.31290191   239.04887894]
------
Step:3, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  2.45236401e+03  1.15808406e+05 -6.79033821e+03]
New Q values:  [ 3.89777037e-01  2.45236401e+03  6.84707147e+04 -6.79033821e+03]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
x  ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[73806.50785532  1151.62530828 -4584.50430574   566.35179234]
------
Step:4, Action:North
State  216
Old Q Values:  [ 1444.3365313   2501.82066656 -8896.20691497  2418.4316485 ]
New Q values:  [ 1210.82830871  2501.82066656 -8896.20691497  2418.4316485 ]
Reward: 9  Episode Reward:  36
xxxxx
x gax
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2092.31232062 -2383.80019164   -72.21682463]
------
Step:5, Action:South
State  136
Old Q Values:  [ -170.77177351  2092.31232062 -2383.80019164   -72.21682463]
New Q values:  [ -170.77177351  1586.87112822 -2383.80019164   -72.21682463]
Reward: -1  Episode Reward:  35
xxxxx
xg. x
x  ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1210.82830871  2501.82066656 -8896.20691497  2418.4316485 ]
------
Step:6, Action:South
State  208
Old Q Values:  [73806.50785532  1151.62530828 -4584.50430574   566.35179234]
New Q values:  [73806.50785532   628.54588778 -4584.50430574   566.35179234]
Reward: 9  Episode Reward:  44
xxxxx
x . x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966   541.65254823]
------
Step:7, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966   541.65254823]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966   687.85564224]
Reward: -1  Episode Reward:  43
xxxxx
x . x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1572.64874317 -4499.2450344 ]
------
Step:8, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1572.64874317 -4499.2450344 ]
New Q values:  [-2527.46239811 -8521.23367799   834.81618994 -4499.2450344 ]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966   687.85564224]
------
Step:9, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966   687.85564224]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966   524.98711388]
Reward: -1  Episode Reward:  41
xxxxx
xg. x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   834.81618994 -4499.2450344 ]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   834.81618994 -4499.2450344 ]
New Q values:  [-2527.46239811 -8521.23367799   490.82261014 -4499.2450344 ]
Reward: -1  Episode Reward:  40
xxxxx
x . x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966   524.98711388]
------
Step:11, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966   524.98711388]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966   356.64162859]
Reward: -1  Episode Reward:  39
xxxxx
x . x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   490.82261014 -4499.2450344 ]
------
Step:12, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   490.82261014 -4499.2450344 ]
New Q values:  [-2527.46239811 -8521.23367799   302.72153263 -4499.2450344 ]
Reward: -1  Episode Reward:  38
xxxxx
x . x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966   356.64162859]
------
Step:13, Action:West
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966   356.64162859]
New Q values:  [ -769.14142248 -6442.16912869 -8192.20126966   232.87311123]
Reward: -1  Episode Reward:  37
xxxxx
x . x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   302.72153263 -4499.2450344 ]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   302.72153263 -4499.2450344 ]
New Q values:  [-2527.46239811 -8521.23367799   190.35054642 -4499.2450344 ]
Reward: -1  Episode Reward:  36
xxxxx
x . x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -769.14142248 -6442.16912869 -8192.20126966   232.87311123]
------
Step:15, Action:North
State  288
Old Q Values:  [ -769.14142248 -6442.16912869 -8192.20126966   232.87311123]
New Q values:  [21833.6957876  -6442.16912869 -8192.20126966   232.87311123]
Reward: -1  Episode Reward:  35
xxxxx
x . x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[73806.50785532   628.54588778 -4584.50430574   566.35179234]
------
Step:16, Action:North
State  216
Old Q Values:  [ 1210.82830871  2501.82066656 -8896.20691497  2418.4316485 ]
New Q values:  [  959.79266195  2501.82066656 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  34
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1586.87112822 -2383.80019164   -72.21682463]
------
Step:17, Action:South
State  128
Old Q Values:  [11374.93691792 11203.77199754 -8652.84       44428.56690885]
New Q values:  [11374.93691792 26622.86115561 -8652.84       44428.56690885]
Reward: -1  Episode Reward:  33
xxxxx
x .gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[73806.50785532   628.54588778 -4584.50430574   566.35179234]
------
Step:18, Action:South
State  216
Old Q Values:  [  959.79266195  2501.82066656 -8896.20691497  2418.4316485 ]
New Q values:  [  959.79266195  7550.23700291 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21833.6957876  -6442.16912869 -8192.20126966   232.87311123]
------
Step:19, Action:North
State  288
Old Q Values:  [21833.6957876  -6442.16912869 -8192.20126966   232.87311123]
New Q values:  [30874.83067164 -6442.16912869 -8192.20126966   232.87311123]
Reward: -1  Episode Reward:  31
xxxxx
x . x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[73806.50785532   628.54588778 -4584.50430574   566.35179234]
------
Step:20, Action:North
State  216
Old Q Values:  [  959.79266195  7550.23700291 -8896.20691497  2418.4316485 ]
New Q values:  [  859.37840324  7550.23700291 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  30
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1586.87112822 -2383.80019164   -72.21682463]
------
Step:21, Action:South
State  130
Old Q Values:  [46177.80406237 16843.0423315   -180.00807518 82035.3341718 ]
New Q values:  [46177.80406237 28878.5692892   -180.00807518 82035.3341718 ]
Reward: -1  Episode Reward:  29
xxxxx
x . x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[73806.50785532   628.54588778 -4584.50430574   566.35179234]
------
Step:22, Action:North
State  216
Old Q Values:  [  859.37840324  7550.23700291 -8896.20691497  2418.4316485 ]
New Q values:  [  819.21269976  7550.23700291 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  28
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1586.87112822 -2383.80019164   -72.21682463]
------
Step:23, Action:South
State  130
Old Q Values:  [46177.80406237 28878.5692892   -180.00807518 82035.3341718 ]
New Q values:  [46177.80406237 33692.78007228  -180.00807518 82035.3341718 ]
Reward: -1  Episode Reward:  27
xxxxx
x . x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[73806.50785532   628.54588778 -4584.50430574   566.35179234]
------
Step:24, Action:North
State  208
Old Q Values:  [73806.50785532   628.54588778 -4584.50430574   566.35179234]
New Q values:  [54132.60339367   628.54588778 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  26
xxxxx
x .ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 33692.78007228  -180.00807518 82035.3341718 ]
------
Step:25, Action:West
State  130
Old Q Values:  [46177.80406237 33692.78007228  -180.00807518 82035.3341718 ]
New Q values:  [46177.80406237 33692.78007228  -180.00807518 53552.3379283 ]
Reward: 9  Episode Reward:  35
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  28498.32154925 69109.34753192]
------
Step:26, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516   333.64898988]
New Q values:  [ -281.736      -1150.91067548   297.23868516   248.57627421]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -5363.03361968   385.72226087  -180.6       ]
------
Step:27, Action:East
State  104
Old Q Values:  [-8652.84        6965.13445598   579.78600099 -8652.84      ]
New Q values:  [-8652.84        6965.13445598   821.87865408 -8652.84      ]
Reward: -1  Episode Reward:  33
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1968.5475123  -4277.89685558]
------
Step:28, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1968.5475123  -4277.89685558]
New Q values:  [-9594.56523706 -8069.05606225  1262.88034339 -4277.89685558]
Reward: -1  Episode Reward:  32
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1586.87112822 -2383.80019164   -72.21682463]
------
Step:29, Action:South
State  138
Old Q Values:  [ 7.64171987e+01 -5.06117151e+03 -3.22965309e-01  4.21423526e+02]
New Q values:  [ 7.64171987e+01  2.40002496e+02 -3.22965309e-01  4.21423526e+02]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  819.21269976  7550.23700291 -8896.20691497  2418.4316485 ]
------
Step:30, Action:South
State  216
Old Q Values:  [  819.21269976  7550.23700291 -8896.20691497  2418.4316485 ]
New Q values:  [  819.21269976 12281.94400265 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[30874.83067164 -6442.16912869 -8192.20126966   232.87311123]
------
Step:31, Action:North
State  288
Old Q Values:  [30874.83067164 -6442.16912869 -8192.20126966   232.87311123]
New Q values:  [30527.84371439 -6442.16912869 -8192.20126966   232.87311123]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  886.26132206 60595.03815245   790.72804752  5275.08389054]
------
Step:32, Action:South
State  218
Old Q Values:  [ 292.53315687 3567.55356402    0.         1039.23717408]
New Q values:  [  292.53315687 10584.77453993     0.          1039.23717408]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[30527.84371439 -6442.16912869 -8192.20126966   232.87311123]
------
Step:33, Action:North
State  288
Old Q Values:  [30527.84371439 -6442.16912869 -8192.20126966   232.87311123]
New Q values:  [15895.12068655 -6442.16912869 -8192.20126966   232.87311123]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  819.21269976 12281.94400265 -8896.20691497  2418.4316485 ]
------
Step:34, Action:South
State  216
Old Q Values:  [  819.21269976 12281.94400265 -8896.20691497  2418.4316485 ]
New Q values:  [  819.21269976  9680.71380703 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15895.12068655 -6442.16912869 -8192.20126966   232.87311123]
------
Step:35, Action:North
State  288
Old Q Values:  [15895.12068655 -6442.16912869 -8192.20126966   232.87311123]
New Q values:  [ 9261.66241673 -6442.16912869 -8192.20126966   232.87311123]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  819.21269976  9680.71380703 -8896.20691497  2418.4316485 ]
------
Step:36, Action:South
State  216
Old Q Values:  [  819.21269976  9680.71380703 -8896.20691497  2418.4316485 ]
New Q values:  [  819.21269976  6650.18424783 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  24
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9261.66241673 -6442.16912869 -8192.20126966   232.87311123]
------
Step:37, Action:North
State  288
Old Q Values:  [ 9261.66241673 -6442.16912869 -8192.20126966   232.87311123]
New Q values:  [ 5699.12024104 -6442.16912869 -8192.20126966   232.87311123]
Reward: -1  Episode Reward:  23
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  819.21269976  6650.18424783 -8896.20691497  2418.4316485 ]
------
Step:38, Action:South
State  216
Old Q Values:  [  819.21269976  6650.18424783 -8896.20691497  2418.4316485 ]
New Q values:  [  819.21269976  4369.20977144 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  22
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5699.12024104 -6442.16912869 -8192.20126966   232.87311123]
------
Step:39, Action:North
State  288
Old Q Values:  [ 5699.12024104 -6442.16912869 -8192.20126966   232.87311123]
New Q values:  [20457.55954215 -6442.16912869 -8192.20126966   232.87311123]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[  886.26132206 60595.03815245   790.72804752  5275.08389054]
------
Step:40, Action:South
State  218
Old Q Values:  [  292.53315687 10584.77453993     0.          1039.23717408]
New Q values:  [  292.53315687 10370.57767862     0.          1039.23717408]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20457.55954215 -6442.16912869 -8192.20126966   232.87311123]
------
Step:41, Action:North
State  288
Old Q Values:  [20457.55954215 -6442.16912869 -8192.20126966   232.87311123]
New Q values:  [ 9493.18674829 -6442.16912869 -8192.20126966   232.87311123]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x gax
x.  x
xxxxx
Step:42, Action:South
State  216
Old Q Values:  [  819.21269976  4369.20977144 -8896.20691497  2418.4316485 ]
New Q values:  [  819.21269976  4595.03993307 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  18
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9493.18674829 -6442.16912869 -8192.20126966   232.87311123]
------
Step:43, Action:North
State  288
Old Q Values:  [ 9493.18674829 -6442.16912869 -8192.20126966   232.87311123]
New Q values:  [ 5175.18667924 -6442.16912869 -8192.20126966   232.87311123]
Reward: -1  Episode Reward:  17
xxxxx
x  gx
x  ax
x.  x
xxxxx
Step:44, Action:North
State  216
Old Q Values:  [  819.21269976  4595.03993307 -8896.20691497  2418.4316485 ]
New Q values:  [-4294.40294018  4595.03993307 -8896.20691497  2418.4316485 ]
Reward: -10001  Episode Reward:  -9984
xxxxx
x   x
x  gx
x.  x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  2.45236401e+03  6.84707147e+04 -6.79033821e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [  62.8218634  1737.6681949  2082.55581386 1141.49622464]
New Q values:  [  62.8218634  1737.6681949  2216.93430546 1141.49622464]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x  ax
x...x
xxxxx
Step:2, Action:South
State  208
Old Q Values:  [54132.60339367   628.54588778 -4584.50430574   566.35179234]
New Q values:  [54132.60339367  1809.37435888 -4584.50430574   566.35179234]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5175.18667924 -6442.16912869 -8192.20126966   232.87311123]
------
Step:3, Action:North
State  288
Old Q Values:  [ 5175.18667924 -6442.16912869 -8192.20126966   232.87311123]
New Q values:  [18309.2556898  -6442.16912869 -8192.20126966   232.87311123]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[54132.60339367  1809.37435888 -4584.50430574   566.35179234]
------
Step:4, Action:North
State  216
Old Q Values:  [-4294.40294018  4595.03993307 -8896.20691497  2418.4316485 ]
New Q values:  [-1236.29983761  4595.03993307 -8896.20691497  2418.4316485 ]
Reward: 9  Episode Reward:  26
xxxxx
xg.ax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1586.87112822 -2383.80019164   -72.21682463]
------
Step:5, Action:South
State  130
Old Q Values:  [46177.80406237 33692.78007228  -180.00807518 53552.3379283 ]
New Q values:  [46177.80406237 29716.29304701  -180.00807518 53552.3379283 ]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[54132.60339367  1809.37435888 -4584.50430574   566.35179234]
------
Step:6, Action:North
State  216
Old Q Values:  [-1236.29983761  4595.03993307 -8896.20691497  2418.4316485 ]
New Q values:  [  -19.05859658  4595.03993307 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  24
xxxxx
xg.ax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1586.87112822 -2383.80019164   -72.21682463]
------
Step:7, Action:South
State  130
Old Q Values:  [46177.80406237 29716.29304701  -180.00807518 53552.3379283 ]
New Q values:  [46177.80406237 28125.69823691  -180.00807518 53552.3379283 ]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[54132.60339367  1809.37435888 -4584.50430574   566.35179234]
------
Step:8, Action:North
State  216
Old Q Values:  [  -19.05859658  4595.03993307 -8896.20691497  2418.4316485 ]
New Q values:  [  467.83789983  4595.03993307 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  22
xxxxx
xg.ax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1586.87112822 -2383.80019164   -72.21682463]
------
Step:9, Action:South
State  130
Old Q Values:  [46177.80406237 28125.69823691  -180.00807518 53552.3379283 ]
New Q values:  [46177.80406237 27489.46031286  -180.00807518 53552.3379283 ]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[54132.60339367  1809.37435888 -4584.50430574   566.35179234]
------
Step:10, Action:North
State  208
Old Q Values:  [54132.60339367  1809.37435888 -4584.50430574   566.35179234]
New Q values:  [37718.14273596  1809.37435888 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  20
xxxxx
x..ax
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 27489.46031286  -180.00807518 53552.3379283 ]
------
Step:11, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.40002496e+02 -3.22965309e-01  4.21423526e+02]
New Q values:  [ 7.64171987e+01  2.40002496e+02 -3.22965309e-01  3.96116437e+02]
Reward: 9  Episode Reward:  29
xxxxx
x.a x
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   595.56427871   740.49008858]
------
Step:12, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   595.56427871   740.49008858]
New Q values:  [ -253.44886264 -1902.20915811   595.56427871   531.34594219]
Reward: 9  Episode Reward:  38
xxxxx
xa  x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  765.83302253  532.59323345 -252.78192178]
------
Step:13, Action:South
State  107
Old Q Values:  [-252.35169558  765.83302253  532.59323345 -252.78192178]
New Q values:  [-252.35169558  580.13040598  532.59323345 -252.78192178]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xa  x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[429.84695601   0.         914.65732325   0.        ]
------
Step:14, Action:East
State  187
Old Q Values:  [429.84695601   0.         914.65732325   0.        ]
New Q values:  [ 429.84695601    0.         4039.55692722    0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 1.22476467e+04 1.95105832e+03 0.00000000e+00]
------
Step:15, Action:South
State  202
Old Q Values:  [    0.         -7041.23396577  2329.27778519     0.        ]
New Q values:  [    0.         -8753.98842238  2329.27778519     0.        ]
Reward: -9991  Episode Reward:  -9955
xxxxx
x   x
x   x
x.g x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 6910.75483116    26.73544252 -1689.55777052   -35.88578819]
------
Step:1, Action:North
State  261
Old Q Values:  [ 6910.75483116    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [ 6326.9092706     26.73544252 -1689.55777052   -35.88578819]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  216.3106267   2804.90523587 11857.35779377   262.76946019]
------
Step:2, Action:South
State  181
Old Q Values:  [  216.3106267   2804.90523587 11857.35779377   262.76946019]
New Q values:  [  216.3106267   3019.43487553 11857.35779377   262.76946019]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 6326.9092706     26.73544252 -1689.55777052   -35.88578819]
------
Step:3, Action:North
State  261
Old Q Values:  [ 6326.9092706     26.73544252 -1689.55777052   -35.88578819]
New Q values:  [ 6087.37104637    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  216.3106267   3019.43487553 11857.35779377   262.76946019]
------
Step:4, Action:South
State  181
Old Q Values:  [  216.3106267   3019.43487553 11857.35779377   262.76946019]
New Q values:  [  216.3106267   3033.38526412 11857.35779377   262.76946019]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x .gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 6087.37104637    26.73544252 -1689.55777052   -35.88578819]
------
Step:5, Action:North
State  261
Old Q Values:  [ 6087.37104637    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [ 5991.55575668    26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  5
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  216.3106267   3033.38526412 11857.35779377   262.76946019]
------
Step:6, Action:South
State  183
Old Q Values:  [ 540.75283156  340.78515644 2338.81874289    0.        ]
New Q values:  [ 540.75283156 1933.18078958 2338.81874289    0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x...x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5991.55575668    26.73544252 -1689.55777052   -35.88578819]
------
Step:7, Action:North
State  261
Old Q Values:  [ 5991.55575668    26.73544252 -1689.55777052   -35.88578819]
New Q values:  [ 5953.2296408     26.73544252 -1689.55777052   -35.88578819]
Reward: -1  Episode Reward:  3
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  216.3106267   3033.38526412 11857.35779377   262.76946019]
------
Step:8, Action:South
State  181
Old Q Values:  [  216.3106267   3033.38526412 11857.35779377   262.76946019]
New Q values:  [  216.3106267   2998.72299789 11857.35779377   262.76946019]
Reward: -1  Episode Reward:  2
xxxxx
x.g.x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5953.2296408     26.73544252 -1689.55777052   -35.88578819]
------
Step:9, Action:North
State  260
Old Q Values:  [-6626.68415157 -2735.46306511   652.50364178 -6102.86502307]
New Q values:  [ -493.25023894 -2735.46306511   652.50364178 -6102.86502307]
Reward: -1  Episode Reward:  1
xxxxx
xg..x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2405.85343029   572.80541947  7193.41140561 -4966.32149798]
------
Step:10, Action:East
State  181
Old Q Values:  [  216.3106267   2998.72299789 11857.35779377   262.76946019]
New Q values:  [  216.3106267   2998.72299789 11557.83698808   262.76946019]
Reward: 9  Episode Reward:  10
xxxxx
x.g.x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1246.14781335 22698.31290191   239.04887894]
------
Step:11, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  2.45236401e+03  6.84707147e+04 -6.79033821e+03]
New Q values:  [ 3.89777037e-01  2.45236401e+03  3.87091287e+04 -6.79033821e+03]
Reward: 9  Episode Reward:  19
xxxxx
xg..x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[37718.14273596  1809.37435888 -4584.50430574   566.35179234]
------
Step:12, Action:North
State  208
Old Q Values:  [37718.14273596  1809.37435888 -4584.50430574   566.35179234]
New Q values:  [28421.22716704  1809.37435888 -4584.50430574   566.35179234]
Reward: 9  Episode Reward:  28
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 26622.86115561 -8652.84       44428.56690885]
------
Step:13, Action:South
State  128
Old Q Values:  [11374.93691792 26622.86115561 -8652.84       44428.56690885]
New Q values:  [11374.93691792 19174.91261236 -8652.84       44428.56690885]
Reward: -1  Episode Reward:  27
xxxxx
x..gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28421.22716704  1809.37435888 -4584.50430574   566.35179234]
------
Step:14, Action:South
State  208
Old Q Values:  [28421.22716704  1809.37435888 -4584.50430574   566.35179234]
New Q values:  [28421.22716704  6221.92645049 -4584.50430574   566.35179234]
Reward: 9  Episode Reward:  36
xxxxx
x.. x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18309.2556898  -6442.16912869 -8192.20126966   232.87311123]
------
Step:15, Action:West
State  288
Old Q Values:  [18309.2556898  -6442.16912869 -8192.20126966   232.87311123]
New Q values:  [18309.2556898  -6442.16912869 -8192.20126966  8442.24113943]
Reward: -1  Episode Reward:  35
xxxxx
x.. x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  677.52857079  -168.92307549  -256.73274642 27832.30631648]
------
Step:16, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   190.35054642 -4499.2450344 ]
New Q values:  [-2527.46239811 -8521.23367799   190.35054642 25725.03632701]
Reward: -1  Episode Reward:  34
xxxxx
x.. x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[91751.11446922  6367.89256635 16022.11762108  1875.31501677]
------
Step:17, Action:North
State  257
Old Q Values:  [91751.11446922  6367.89256635 16022.11762108  1875.31501677]
New Q values:  [60917.79164297  6367.89256635 16022.11762108  1875.31501677]
Reward: -1  Episode Reward:  33
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[80726.48618428 19270.74827372 31955.97996693     0.        ]
------
Step:18, Action:North
State  181
Old Q Values:  [  216.3106267   2998.72299789 11557.83698808   262.76946019]
New Q values:  [  223.31478989  2998.72299789 11557.83698808   262.76946019]
Reward: 9  Episode Reward:  42
xxxxx
xa. x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[221.30610858 437.96846403 238.35800069   0.        ]
------
Step:19, Action:South
State  101
Old Q Values:  [   0.         1050.91802415    0.            0.        ]
New Q values:  [   0.         3887.11830609    0.            0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  223.31478989  2998.72299789 11557.83698808   262.76946019]
------
Step:20, Action:East
State  177
Old Q Values:  [80726.48618428 19270.74827372 31955.97996693     0.        ]
New Q values:  [80726.48618428 19270.74827372 24394.53059268     0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  2.45236401e+03  3.87091287e+04 -6.79033821e+03]
------
Step:21, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  2.45236401e+03  3.87091287e+04 -6.79033821e+03]
New Q values:  [ 3.89777037e-01  2.45236401e+03  2.40094196e+04 -6.79033821e+03]
Reward: -1  Episode Reward:  39
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28421.22716704  6221.92645049 -4584.50430574   566.35179234]
------
Step:22, Action:North
State  210
Old Q Values:  [  886.26132206 60595.03815245   790.72804752  5275.08389054]
New Q values:  [16419.60590731 60595.03815245   790.72804752  5275.08389054]
Reward: -1  Episode Reward:  38
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 27489.46031286  -180.00807518 53552.3379283 ]
------
Step:23, Action:West
State  130
Old Q Values:  [46177.80406237 27489.46031286  -180.00807518 53552.3379283 ]
New Q values:  [ 46177.80406237  27489.46031286   -180.00807518 102159.13943089]
Reward: 100009  Episode Reward:  100047
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
Step:1, Action:West
State  261
Old Q Values:  [ 5953.2296408     26.73544252 -1689.55777052   -35.88578819]
New Q values:  [ 5953.2296408     26.73544252 -1689.55777052  1591.01457697]
Reward: -301  Episode Reward:  -301
xxxxx
x.g x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5953.2296408     26.73544252 -1689.55777052  1591.01457697]
------
Step:2, Action:North
State  261
Old Q Values:  [ 5953.2296408     26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 5854.04295275    26.73544252 -1689.55777052  1591.01457697]
Reward: 9  Episode Reward:  -292
xxxxx
x..gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  223.31478989  2998.72299789 11557.83698808   262.76946019]
------
Step:3, Action:East
State  189
Old Q Values:  [ 422.44659346 1325.11665335  695.22333228  154.04646645]
New Q values:  [ 422.44659346 1325.11665335  588.19549948  154.04646645]
Reward: 9  Episode Reward:  -283
xxxxx
x.. x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1015.68722189 -4582.3674281    534.04109446]
------
Step:4, Action:South
State  193
Old Q Values:  [-5922.26708831  1015.68722189 -4582.3674281    534.04109446]
New Q values:  [-5922.26708831  8761.3667837  -4582.3674281    534.04109446]
Reward: 9  Episode Reward:  -274
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  677.52857079  -168.92307549  -256.73274642 27832.30631648]
------
Step:5, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946  3796.62152422]
New Q values:  [   16.82637525 -5807.06396197   618.89767946  3274.26149551]
Reward: -1  Episode Reward:  -275
xxxxx
x.g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5854.04295275    26.73544252 -1689.55777052  1591.01457697]
------
Step:6, Action:North
State  261
Old Q Values:  [ 5854.04295275    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 5808.36827752    26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  -276
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  223.31478989  2998.72299789 11557.83698808   262.76946019]
------
Step:7, Action:East
State  177
Old Q Values:  [80726.48618428 19270.74827372 24394.53059268     0.        ]
New Q values:  [80726.48618428 19270.74827372 12385.62227218     0.        ]
Reward: -1  Episode Reward:  -277
xxxxx
x.. x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8761.3667837  -4582.3674281    534.04109446]
------
Step:8, Action:South
State  195
Old Q Values:  [  38.85388605 4558.59310861 1849.21327227 1475.11348466]
New Q values:  [   38.85388605 10172.52913839  1849.21327227  1475.11348466]
Reward: -1  Episode Reward:  -278
xxxxx
x.. x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  677.52857079  -168.92307549  -256.73274642 27832.30631648]
------
Step:9, Action:West
State  273
Old Q Values:  [  677.52857079  -168.92307549  -256.73274642 27832.30631648]
New Q values:  [  677.52857079  -168.92307549  -256.73274642 12874.83300985]
Reward: -1  Episode Reward:  -279
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5808.36827752    26.73544252 -1689.55777052  1591.01457697]
------
Step:10, Action:North
State  261
Old Q Values:  [ 5808.36827752    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 3024.39293388    26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  -280
xxxxx
x.. x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1933.18078958 2338.81874289    0.        ]
------
Step:11, Action:East
State  177
Old Q Values:  [80726.48618428 19270.74827372 12385.62227218     0.        ]
New Q values:  [80726.48618428 19270.74827372  7582.05894398     0.        ]
Reward: -1  Episode Reward:  -281
xxxxx
x.. x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8761.3667837  -4582.3674281    534.04109446]
------
Step:12, Action:South
State  195
Old Q Values:  [   38.85388605 10172.52913839  1849.21327227  1475.11348466]
New Q values:  [  38.85388605 7930.86155831 1849.21327227 1475.11348466]
Reward: -1  Episode Reward:  -282
xxxxx
x.. x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  677.52857079  -168.92307549  -256.73274642 12874.83300985]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   190.35054642 25725.03632701]
New Q values:  [-2527.46239811 -8521.23367799   190.35054642 11196.73241097]
Reward: -1  Episode Reward:  -283
xxxxx
x.. x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3024.39293388    26.73544252 -1689.55777052  1591.01457697]
------
Step:14, Action:North
State  261
Old Q Values:  [ 3024.39293388    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 4676.50826997    26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  -284
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  223.31478989  2998.72299789 11557.83698808   262.76946019]
------
Step:15, Action:South
State  177
Old Q Values:  [80726.48618428 19270.74827372  7582.05894398     0.        ]
New Q values:  [80726.48618428  9110.65179048  7582.05894398     0.        ]
Reward: -1  Episode Reward:  -285
xxxxx
x.g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4676.50826997    26.73544252 -1689.55777052  1591.01457697]
------
Step:16, Action:North
State  260
Old Q Values:  [ -493.25023894 -2735.46306511   652.50364178 -6102.86502307]
New Q values:  [ 1960.12332611 -2735.46306511   652.50364178 -6102.86502307]
Reward: -1  Episode Reward:  -286
xxxxx
xg. x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2405.85343029   572.80541947  7193.41140561 -4966.32149798]
------
Step:17, Action:East
State  177
Old Q Values:  [80726.48618428  9110.65179048  7582.05894398     0.        ]
New Q values:  [80726.48618428  9110.65179048 10235.04946499     0.        ]
Reward: -1  Episode Reward:  -287
xxxxx
x.g x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  2.45236401e+03  2.40094196e+04 -6.79033821e+03]
------
Step:18, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  2.45236401e+03  2.40094196e+04 -6.79033821e+03]
New Q values:  [ 3.89777037e-01  2.45236401e+03  1.81355360e+04 -6.79033821e+03]
Reward: 9  Episode Reward:  -278
xxxxx
xg. x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28421.22716704  6221.92645049 -4584.50430574   566.35179234]
------
Step:19, Action:North
State  208
Old Q Values:  [28421.22716704  6221.92645049 -4584.50430574   566.35179234]
New Q values:  [24696.46093947  6221.92645049 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -279
xxxxx
xg.ax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 19174.91261236 -8652.84       44428.56690885]
------
Step:20, Action:West
State  130
Old Q Values:  [ 46177.80406237  27489.46031286   -180.00807518 102159.13943089]
New Q values:  [46177.80406237 27489.46031286  -180.00807518 61601.86003193]
Reward: 9  Episode Reward:  -270
xxxxx
x.a x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  28498.32154925 69109.34753192]
------
Step:21, Action:West
State  126
Old Q Values:  [   0.          331.64678262 1161.74098086 1018.19973829]
New Q values:  [   0.          331.64678262 1161.74098086 1060.34671274]
Reward: 9  Episode Reward:  -261
xxxxx
xa  x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2158.88939141  391.45858569 -120.29354603]
------
Step:22, Action:South
State  111
Old Q Values:  [-177.44732869 2158.88939141  391.45858569 -120.29354603]
New Q values:  [-177.44732869 1154.07727983  391.45858569 -120.29354603]
Reward: -1  Episode Reward:  -262
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 970.40507756 302.52728443   0.        ]
------
Step:23, Action:South
State  189
Old Q Values:  [ 422.44659346 1325.11665335  588.19549948  154.04646645]
New Q values:  [ 422.44659346 1932.39914233  588.19549948  154.04646645]
Reward: -1  Episode Reward:  -263
xxxxx
x   x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4676.50826997    26.73544252 -1689.55777052  1591.01457697]
------
Step:24, Action:North
State  260
Old Q Values:  [ 1960.12332611 -2735.46306511   652.50364178 -6102.86502307]
New Q values:  [-4612.17892201 -2735.46306511   652.50364178 -6102.86502307]
Reward: -10001  Episode Reward:  -10264
xxxxx
x   x
xg  x
x  .x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18309.2556898  -6442.16912869 -8192.20126966  8442.24113943]
------
Step:1, Action:North
State  288
Old Q Values:  [18309.2556898  -6442.16912869 -8192.20126966  8442.24113943]
New Q values:  [14738.04055776 -6442.16912869 -8192.20126966  8442.24113943]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24696.46093947  6221.92645049 -4584.50430574   566.35179234]
------
Step:2, Action:North
State  208
Old Q Values:  [24696.46093947  6221.92645049 -4584.50430574   566.35179234]
New Q values:  [ 4360.04571425  6221.92645049 -4584.50430574   566.35179234]
Reward: -9991  Episode Reward:  -9982
xxxxx
x .gx
x.. x
x.. x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4676.50826997    26.73544252 -1689.55777052  1591.01457697]
------
Step:1, Action:North
State  261
Old Q Values:  [ 4676.50826997    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 2577.64893086    26.73544252 -1689.55777052  1591.01457697]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1933.18078958 2338.81874289    0.        ]
------
Step:2, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  3113.77448359     0.        ]
New Q values:  [    0.         -5969.29177534  2090.92038129     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.82003529e+03]
------
Step:3, Action:West
State  192
Old Q Values:  [ 3.89777037e-01  2.45236401e+03  1.81355360e+04 -6.79033821e+03]
New Q values:  [ 3.89777037e-01  2.45236401e+03  1.81355360e+04 -6.55871186e+03]
Reward: -10001  Episode Reward:  -9993
xxxxx
x...x
xg .x
x ..x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[16419.60590731 60595.03815245   790.72804752  5275.08389054]
------
Step:1, Action:South
State  210
Old Q Values:  [16419.60590731 60595.03815245   790.72804752  5275.08389054]
New Q values:  [16419.60590731 28664.82742831   790.72804752  5275.08389054]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14738.04055776 -6442.16912869 -8192.20126966  8442.24113943]
------
Step:2, Action:North
State  288
Old Q Values:  [14738.04055776 -6442.16912869 -8192.20126966  8442.24113943]
New Q values:  [ 7761.19415825 -6442.16912869 -8192.20126966  8442.24113943]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4360.04571425  6221.92645049 -4584.50430574   566.35179234]
------
Step:3, Action:South
State  208
Old Q Values:  [ 4360.04571425  6221.92645049 -4584.50430574   566.35179234]
New Q values:  [ 4360.04571425  5020.84292203 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x..gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7761.19415825 -6442.16912869 -8192.20126966  8442.24113943]
------
Step:4, Action:West
State  288
Old Q Values:  [ 7761.19415825 -6442.16912869 -8192.20126966  8442.24113943]
New Q values:  [ 7761.19415825 -6442.16912869 -8192.20126966  6741.31617906]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   190.35054642 11196.73241097]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   190.35054642 11196.73241097]
New Q values:  [-2527.46239811 -8521.23367799   190.35054642 22753.43045728]
Reward: -1  Episode Reward:  15
xxxxx
x.g.x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[60917.79164297  6367.89256635 16022.11762108  1875.31501677]
------
Step:6, Action:North
State  261
Old Q Values:  [ 2577.64893086    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 4503.81066877    26.73544252 -1689.55777052  1591.01457697]
Reward: 9  Episode Reward:  24
xxxxx
x..gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  223.31478989  2998.72299789 11557.83698808   262.76946019]
------
Step:7, Action:East
State  181
Old Q Values:  [  223.31478989  2998.72299789 11557.83698808   262.76946019]
New Q values:  [ 223.31478989 2998.72299789 7256.94483034  262.76946019]
Reward: 9  Episode Reward:  33
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8761.3667837  -4582.3674281    534.04109446]
------
Step:8, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  2.45236401e+03  1.81355360e+04 -6.55871186e+03]
New Q values:  [ 3.89777037e-01  7.80637474e+03  1.81355360e+04 -6.55871186e+03]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   190.35054642 22753.43045728]
------
Step:9, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   190.35054642 22753.43045728]
New Q values:  [-2527.46239811 -8521.23367799   190.35054642 10451.91538354]
Reward: -1  Episode Reward:  31
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4503.81066877    26.73544252 -1689.55777052  1591.01457697]
------
Step:10, Action:North
State  260
Old Q Values:  [-4612.17892201 -2735.46306511   652.50364178 -6102.86502307]
New Q values:  [-1218.19545442 -2735.46306511   652.50364178 -6102.86502307]
Reward: -1  Episode Reward:  30
xxxxx
x...x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  2090.92038129     0.        ]
------
Step:11, Action:East
State  182
Old Q Values:  [    0.         -5969.29177534  2090.92038129     0.        ]
New Q values:  [    0.         -5969.29177534  1681.77874037     0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x...x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.82003529e+03]
------
Step:12, Action:West
State  192
Old Q Values:  [ 3.89777037e-01  7.80637474e+03  1.81355360e+04 -6.55871186e+03]
New Q values:  [ 3.89777037e-01  7.80637474e+03  1.81355360e+04 -6.46606132e+03]
Reward: -10001  Episode Reward:  -9972
xxxxx
x...x
xg  x
x   x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1586.87112822 -2383.80019164   -72.21682463]
------
Step:1, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.40002496e+02 -3.22965309e-01  3.96116437e+02]
New Q values:  [ 7.64171987e+01  1.47991298e+03 -3.22965309e-01  3.96116437e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  467.83789983  4595.03993307 -8896.20691497  2418.4316485 ]
------
Step:2, Action:South
State  210
Old Q Values:  [16419.60590731 28664.82742831   790.72804752  5275.08389054]
New Q values:  [16419.60590731 13799.6892188    790.72804752  5275.08389054]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7761.19415825 -6442.16912869 -8192.20126966  6741.31617906]
------
Step:3, Action:North
State  288
Old Q Values:  [ 7761.19415825 -6442.16912869 -8192.20126966  6741.31617906]
New Q values:  [ 8029.75943549 -6442.16912869 -8192.20126966  6741.31617906]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[16419.60590731 13799.6892188    790.72804752  5275.08389054]
------
Step:4, Action:North
State  210
Old Q Values:  [16419.60590731 13799.6892188    790.72804752  5275.08389054]
New Q values:  [ 7011.21625644 13799.6892188    790.72804752  5275.08389054]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.47991298e+03 -3.22965309e-01  3.96116437e+02]
------
Step:5, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.47991298e+03 -3.22965309e-01  3.96116437e+02]
New Q values:  [ 7.64171987e+01  4.73127196e+03 -3.22965309e-01  3.96116437e+02]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 7011.21625644 13799.6892188    790.72804752  5275.08389054]
------
Step:6, Action:North
State  210
Old Q Values:  [ 7011.21625644 13799.6892188    790.72804752  5275.08389054]
New Q values:  [ 4223.26808967 13799.6892188    790.72804752  5275.08389054]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.73127196e+03 -3.22965309e-01  3.96116437e+02]
------
Step:7, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.73127196e+03 -3.22965309e-01  3.96116437e+02]
New Q values:  [ 7.64171987e+01  6.03181555e+03 -3.22965309e-01  3.96116437e+02]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x..ax
xg. x
xxxxx
Step:8, Action:West
State  210
Old Q Values:  [ 4223.26808967 13799.6892188    790.72804752  5275.08389054]
New Q values:  [ 4223.26808967 13799.6892188    790.72804752  2961.44414407]
Reward: 9  Episode Reward:  22
xxxxx
x.  x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.82003529e+03]
------
Step:9, Action:West
State  203
Old Q Values:  [3.60604218e+00 1.22476467e+04 1.95105832e+03 0.00000000e+00]
New Q values:  [3.60604218e+00 1.22476467e+04 1.95105832e+03 1.05954695e+04]
Reward: 9  Episode Reward:  31
xxxxx
x.  x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         16101.90751562 35300.23152799     0.        ]
------
Step:10, Action:East
State  179
Old Q Values:  [    0.         16101.90751562 35300.23152799     0.        ]
New Q values:  [    0.         16101.90751562 14965.50319905     0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.82003529e+03]
------
Step:11, Action:West
State  203
Old Q Values:  [3.60604218e+00 1.22476467e+04 1.95105832e+03 1.05954695e+04]
New Q values:  [3.60604218e+00 1.22476467e+04 1.95105832e+03 9.06816004e+03]
Reward: -1  Episode Reward:  29
xxxxx
x.  x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[    0.         16101.90751562 14965.50319905     0.        ]
------
Step:12, Action:South
State  177
Old Q Values:  [80726.48618428  9110.65179048 10235.04946499     0.        ]
New Q values:  [80726.48618428 21924.99820908 10235.04946499     0.        ]
Reward: 9  Episode Reward:  38
xxxxx
x.  x
x  gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[60917.79164297  6367.89256635 16022.11762108  1875.31501677]
------
Step:13, Action:North
State  257
Old Q Values:  [60917.79164297  6367.89256635 16022.11762108  1875.31501677]
New Q values:  [48584.46251247  6367.89256635 16022.11762108  1875.31501677]
Reward: -1  Episode Reward:  37
xxxxx
x.  x
xag x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[80726.48618428 21924.99820908 10235.04946499     0.        ]
------
Step:14, Action:North
State  189
Old Q Values:  [ 422.44659346 1932.39914233  588.19549948  154.04646645]
New Q values:  [ 275.08817949 1932.39914233  588.19549948  154.04646645]
Reward: 9  Episode Reward:  46
xxxxx
xag x
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   335.69847369 -5413.46457526  -180.6       ]
------
Step:15, Action:South
State  108
Old Q Values:  [-8463.16477134  4913.1673244    845.00690416     0.        ]
New Q values:  [-8463.16477134  2569.03867731   845.00690416     0.        ]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  1892.16995939  2014.57249182     0.        ]
------
Step:16, Action:East
State  188
Old Q Values:  [-6523.78898263  1892.16995939  2014.57249182     0.        ]
New Q values:  [-6523.78898263  1892.16995939  1470.30928837     0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xga x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1737.6681949  2216.93430546 1141.49622464]
------
Step:17, Action:East
State  202
Old Q Values:  [    0.         -8753.98842238  2329.27778519     0.        ]
New Q values:  [    0.         -8753.98842238  4042.28441766     0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  292.53315687 10370.57767862     0.          1039.23717408]
------
Step:18, Action:South
State  216
Old Q Values:  [  467.83789983  4595.03993307 -8896.20691497  2418.4316485 ]
New Q values:  [  467.83789983  4246.34380387 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8029.75943549 -6442.16912869 -8192.20126966  6741.31617906]
------
Step:19, Action:North
State  288
Old Q Values:  [ 8029.75943549 -6442.16912869 -8192.20126966  6741.31617906]
New Q values:  [ 6322.47707778 -6442.16912869 -8192.20126966  6741.31617906]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  292.53315687 10370.57767862     0.          1039.23717408]
------
Step:20, Action:South
State  216
Old Q Values:  [  467.83789983  4246.34380387 -8896.20691497  2418.4316485 ]
New Q values:  [  467.83789983  3720.33237527 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6322.47707778 -6442.16912869 -8192.20126966  6741.31617906]
------
Step:21, Action:West
State  288
Old Q Values:  [ 6322.47707778 -6442.16912869 -8192.20126966  6741.31617906]
New Q values:  [ 6322.47707778 -6442.16912869 -8192.20126966 65837.50108669]
Reward: 100009  Episode Reward:  100049
xxxxx
x   x
x g x
x a x
xxxxx
Episode # 900
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1933.18078958 2338.81874289    0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [ 540.75283156 1933.18078958 2338.81874289    0.        ]
New Q values:  [ 540.75283156 1933.18078958 1786.93808501    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.82003529e+03]
------
Step:2, Action:West
State  195
Old Q Values:  [  38.85388605 7930.86155831 1849.21327227 1475.11348466]
New Q values:  [  38.85388605 7930.86155831 1849.21327227 1169.39963074]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1933.18078958 1786.93808501    0.        ]
------
Step:3, Action:South
State  183
Old Q Values:  [ 540.75283156 1933.18078958 1786.93808501    0.        ]
New Q values:  [ 540.75283156 2129.81551646 1786.93808501    0.        ]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4503.81066877    26.73544252 -1689.55777052  1591.01457697]
------
Step:4, Action:North
State  261
Old Q Values:  [ 4503.81066877    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 2439.86892245    26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xa .x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 2129.81551646 1786.93808501    0.        ]
------
Step:5, Action:South
State  181
Old Q Values:  [ 223.31478989 2998.72299789 7256.94483034  262.76946019]
New Q values:  [ 223.31478989 1930.84987589 7256.94483034  262.76946019]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x  gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2439.86892245    26.73544252 -1689.55777052  1591.01457697]
------
Step:6, Action:North
State  261
Old Q Values:  [ 2439.86892245    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 3152.43101808    26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xag.x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 223.31478989 1930.84987589 7256.94483034  262.76946019]
------
Step:7, Action:South
State  183
Old Q Values:  [ 540.75283156 2129.81551646 1786.93808501    0.        ]
New Q values:  [ 540.75283156 1797.05551201 1786.93808501    0.        ]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3152.43101808    26.73544252 -1689.55777052  1591.01457697]
------
Step:8, Action:North
State  260
Old Q Values:  [-1218.19545442 -2735.46306511   652.50364178 -6102.86502307]
New Q values:  [   16.65544035 -2735.46306511   652.50364178 -6102.86502307]
Reward: -1  Episode Reward:  12
xxxxx
x...x
xa .x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5969.29177534  1681.77874037     0.        ]
------
Step:9, Action:East
State  183
Old Q Values:  [ 540.75283156 1797.05551201 1786.93808501    0.        ]
New Q values:  [ 540.75283156 1797.05551201 1560.18582186    0.        ]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.82003529e+03]
------
Step:10, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.32169573e+02  1.81388025e+03  2.82003529e+03]
New Q values:  [-6.00000000e-01  1.32169573e+02  1.81388025e+03  1.63194774e+03]
Reward: -1  Episode Reward:  10
xxxxx
x...x
xa .x
xg. x
xxxxx
Step:11, Action:East
State  180
Old Q Values:  [-2405.85343029   572.80541947  7193.41140561 -4966.32149798]
New Q values:  [-2405.85343029   572.80541947  8317.42536224 -4966.32149798]
Reward: -1  Episode Reward:  9
xxxxx
x...x
xga.x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  7.80637474e+03  1.81355360e+04 -6.46606132e+03]
------
Step:12, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  7.80637474e+03  1.81355360e+04 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  7.80637474e+03  8.76586728e+03 -6.46606132e+03]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x  ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4360.04571425  5020.84292203 -4584.50430574   566.35179234]
------
Step:13, Action:South
State  208
Old Q Values:  [ 4360.04571425  5020.84292203 -4584.50430574   566.35179234]
New Q values:  [ 4360.04571425 21758.98749482 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6322.47707778 -6442.16912869 -8192.20126966 65837.50108669]
------
Step:14, Action:West
State  288
Old Q Values:  [ 6322.47707778 -6442.16912869 -8192.20126966 65837.50108669]
New Q values:  [ 6322.47707778 -6442.16912869 -8192.20126966 29475.97504974]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   190.35054642 10451.91538354]
------
Step:15, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   190.35054642 10451.91538354]
New Q values:  [-2527.46239811 -8521.23367799  8918.33273349 10451.91538354]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6322.47707778 -6442.16912869 -8192.20126966 29475.97504974]
------
Step:16, Action:North
State  288
Old Q Values:  [ 6322.47707778 -6442.16912869 -8192.20126966 29475.97504974]
New Q values:  [ 9056.08707956 -6442.16912869 -8192.20126966 29475.97504974]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4360.04571425 21758.98749482 -4584.50430574   566.35179234]
------
Step:17, Action:South
State  208
Old Q Values:  [ 4360.04571425 21758.98749482 -4584.50430574   566.35179234]
New Q values:  [ 4360.04571425 17545.78751285 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  23
xxxxx
x.g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9056.08707956 -6442.16912869 -8192.20126966 29475.97504974]
------
Step:18, Action:West
State  288
Old Q Values:  [ 9056.08707956 -6442.16912869 -8192.20126966 29475.97504974]
New Q values:  [ 9056.08707956 -6442.16912869 -8192.20126966 14925.36463496]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8918.33273349 10451.91538354]
------
Step:19, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8918.33273349 10451.91538354]
New Q values:  [-2527.46239811 -8521.23367799  8918.33273349  5125.89545884]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3152.43101808    26.73544252 -1689.55777052  1591.01457697]
------
Step:20, Action:North
State  261
Old Q Values:  [ 3152.43101808    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 3437.45585634    26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  20
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 223.31478989 1930.84987589 7256.94483034  262.76946019]
------
Step:21, Action:South
State  180
Old Q Values:  [-2405.85343029   572.80541947  8317.42536224 -4966.32149798]
New Q values:  [-2405.85343029   424.27326032  8317.42536224 -4966.32149798]
Reward: -1  Episode Reward:  19
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[   16.65544035 -2735.46306511   652.50364178 -6102.86502307]
------
Step:22, Action:East
State  260
Old Q Values:  [   16.65544035 -2735.46306511   652.50364178 -6102.86502307]
New Q values:  [   16.65544035 -2735.46306511  2935.90127676 -6102.86502307]
Reward: -1  Episode Reward:  18
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8918.33273349  5125.89545884]
------
Step:23, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8918.33273349  5125.89545884]
New Q values:  [-2527.46239811 -8521.23367799  8044.34248388  5125.89545884]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9056.08707956 -6442.16912869 -8192.20126966 14925.36463496]
------
Step:24, Action:West
State  288
Old Q Values:  [ 9056.08707956 -6442.16912869 -8192.20126966 14925.36463496]
New Q values:  [ 9056.08707956 -6442.16912869 -8192.20126966  9831.99575694]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  677.52857079  -168.92307549  -256.73274642 12874.83300985]
------
Step:25, Action:West
State  273
Old Q Values:  [  677.52857079  -168.92307549  -256.73274642 12874.83300985]
New Q values:  [ 677.52857079 -168.92307549 -256.73274642 6180.56996084]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3437.45585634    26.73544252 -1689.55777052  1591.01457697]
------
Step:26, Action:North
State  261
Old Q Values:  [ 3437.45585634    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 1913.49899614    26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1797.05551201 1560.18582186    0.        ]
------
Step:27, Action:South
State  181
Old Q Values:  [ 223.31478989 1930.84987589 7256.94483034  262.76946019]
New Q values:  [ 223.31478989 1345.7896492  7256.94483034  262.76946019]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1913.49899614    26.73544252 -1689.55777052  1591.01457697]
------
Step:28, Action:North
State  261
Old Q Values:  [ 1913.49899614    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 2941.88304756    26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  12
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 223.31478989 1345.7896492  7256.94483034  262.76946019]
------
Step:29, Action:South
State  177
Old Q Values:  [80726.48618428 21924.99820908 10235.04946499     0.        ]
New Q values:  [80726.48618428 23344.73803738 10235.04946499     0.        ]
Reward: -1  Episode Reward:  11
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[48584.46251247  6367.89256635 16022.11762108  1875.31501677]
------
Step:30, Action:North
State  257
Old Q Values:  [48584.46251247  6367.89256635 16022.11762108  1875.31501677]
New Q values:  [43651.13086027  6367.89256635 16022.11762108  1875.31501677]
Reward: -1  Episode Reward:  10
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[80726.48618428 23344.73803738 10235.04946499     0.        ]
------
Step:31, Action:North
State  181
Old Q Values:  [ 223.31478989 1345.7896492  7256.94483034  262.76946019]
New Q values:  [1260.86140778 1345.7896492  7256.94483034  262.76946019]
Reward: 9  Episode Reward:  19
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         3887.11830609    0.            0.        ]
------
Step:32, Action:South
State  101
Old Q Values:  [   0.         3887.11830609    0.            0.        ]
New Q values:  [   0.         3731.33077154    0.            0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1260.86140778 1345.7896492  7256.94483034  262.76946019]
------
Step:33, Action:East
State  181
Old Q Values:  [1260.86140778 1345.7896492  7256.94483034  262.76946019]
New Q values:  [1260.86140778 1345.7896492  9711.67180271  262.76946019]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1246.14781335 22698.31290191   239.04887894]
------
Step:34, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  7.80637474e+03  8.76586728e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  7.80637474e+03  8.76948316e+03 -6.46606132e+03]
Reward: -1  Episode Reward:  16
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4360.04571425 17545.78751285 -4584.50430574   566.35179234]
------
Step:35, Action:South
State  208
Old Q Values:  [ 4360.04571425 17545.78751285 -4584.50430574   566.35179234]
New Q values:  [ 4360.04571425  9967.31373222 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9056.08707956 -6442.16912869 -8192.20126966  9831.99575694]
------
Step:36, Action:West
State  288
Old Q Values:  [ 9056.08707956 -6442.16912869 -8192.20126966  9831.99575694]
New Q values:  [ 9056.08707956 -6442.16912869 -8192.20126966  6345.50104794]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  8044.34248388  5125.89545884]
------
Step:37, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  8044.34248388  5125.89545884]
New Q values:  [-2527.46239811 -8521.23367799  5933.96311742  5125.89545884]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9056.08707956 -6442.16912869 -8192.20126966  6345.50104794]
------
Step:38, Action:North
State  288
Old Q Values:  [ 9056.08707956 -6442.16912869 -8192.20126966  6345.50104794]
New Q values:  [ 6612.02895149 -6442.16912869 -8192.20126966  6345.50104794]
Reward: -1  Episode Reward:  12
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4360.04571425  9967.31373222 -4584.50430574   566.35179234]
------
Step:39, Action:South
State  208
Old Q Values:  [ 4360.04571425  9967.31373222 -4584.50430574   566.35179234]
New Q values:  [ 4360.04571425  5969.93417833 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  11
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6612.02895149 -6442.16912869 -8192.20126966  6345.50104794]
------
Step:40, Action:North
State  288
Old Q Values:  [ 6612.02895149 -6442.16912869 -8192.20126966  6345.50104794]
New Q values:  [ 4435.1918341  -6442.16912869 -8192.20126966  6345.50104794]
Reward: -1  Episode Reward:  10
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4360.04571425  5969.93417833 -4584.50430574   566.35179234]
------
Step:41, Action:South
State  208
Old Q Values:  [ 4360.04571425  5969.93417833 -4584.50430574   566.35179234]
New Q values:  [ 4360.04571425  4291.02398572 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  9
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4435.1918341  -6442.16912869 -8192.20126966  6345.50104794]
------
Step:42, Action:West
State  288
Old Q Values:  [ 4435.1918341  -6442.16912869 -8192.20126966  6345.50104794]
New Q values:  [ 4435.1918341  -6442.16912869 -8192.20126966  4391.77140743]
Reward: -1  Episode Reward:  8
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 -256.73274642 6180.56996084]
------
Step:43, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 -256.73274642 6180.56996084]
New Q values:  [ 677.52857079 -168.92307549 -256.73274642 3354.1928986 ]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2941.88304756    26.73544252 -1689.55777052  1591.01457697]
------
Step:44, Action:North
State  261
Old Q Values:  [ 2941.88304756    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 4089.65475984    26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
xag x
x   x
xxxxx
Step:45, Action:West
State  183
Old Q Values:  [ 540.75283156 1797.05551201 1560.18582186    0.        ]
New Q values:  [ 540.75283156 1797.05551201 1560.18582186  358.5166536 ]
Reward: -301  Episode Reward:  -295
xxxxx
x ..x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1797.05551201 1560.18582186  358.5166536 ]
------
Step:46, Action:South
State  183
Old Q Values:  [ 540.75283156 1797.05551201 1560.18582186  358.5166536 ]
New Q values:  [ 540.75283156 1945.11863275 1560.18582186  358.5166536 ]
Reward: -1  Episode Reward:  -296
xxxxx
x ..x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4089.65475984    26.73544252 -1689.55777052  1591.01457697]
------
Step:47, Action:North
State  261
Old Q Values:  [ 4089.65475984    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 2218.79749376    26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  -297
xxxxx
x ..x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1945.11863275 1560.18582186  358.5166536 ]
------
Step:48, Action:South
State  182
Old Q Values:  [    0.         -5969.29177534  1681.77874037     0.        ]
New Q values:  [    0.         -7507.54632711  1681.77874037     0.        ]
Reward: -10001  Episode Reward:  -10298
xxxxx
x ..x
x   x
xg  x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5933.96311742  5125.89545884]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5933.96311742  5125.89545884]
New Q values:  [-2527.46239811 -8521.23367799  3709.5427972   5125.89545884]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4435.1918341  -6442.16912869 -8192.20126966  4391.77140743]
------
Step:2, Action:North
State  288
Old Q Values:  [ 4435.1918341  -6442.16912869 -8192.20126966  4391.77140743]
New Q values:  [ 5919.38349928 -6442.16912869 -8192.20126966  4391.77140743]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4223.26808967 13799.6892188    790.72804752  2961.44414407]
------
Step:3, Action:South
State  210
Old Q Values:  [ 4223.26808967 13799.6892188    790.72804752  2961.44414407]
New Q values:  [4223.26808967 7295.0907373   790.72804752 2961.44414407]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5919.38349928 -6442.16912869 -8192.20126966  4391.77140743]
------
Step:4, Action:North
State  288
Old Q Values:  [ 5919.38349928 -6442.16912869 -8192.20126966  4391.77140743]
New Q values:  [ 3675.16711399 -6442.16912869 -8192.20126966  4391.77140743]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4360.04571425  4291.02398572 -4584.50430574   566.35179234]
------
Step:5, Action:North
State  208
Old Q Values:  [ 4360.04571425  4291.02398572 -4584.50430574   566.35179234]
New Q values:  [20229.97629528  4291.02398572 -4584.50430574   566.35179234]
Reward: 9  Episode Reward:  25
xxxxx
x..ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 27489.46031286  -180.00807518 61601.86003193]
------
Step:6, Action:West
State  136
Old Q Values:  [ -170.77177351  1586.87112822 -2383.80019164   -72.21682463]
New Q values:  [ -170.77177351  1586.87112822 -2383.80019164    42.97961301]
Reward: 9  Episode Reward:  34
xxxxx
x.agx
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7059.48304203   221.5544762 ]
------
Step:7, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1262.88034339 -4277.89685558]
New Q values:  [-9594.56523706 -8069.05606225  1262.88034339 -1157.45542097]
Reward: 9  Episode Reward:  43
xxxxx
xag x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6         1827.67773756 -7438.53829696     0.        ]
------
Step:8, Action:South
State  104
Old Q Values:  [-8652.84        6965.13445598   821.87865408 -8652.84      ]
New Q values:  [-8652.84        7009.88182296   821.87865408 -8652.84      ]
Reward: 9  Episode Reward:  52
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[  162.71210215     0.         14061.4268019      0.        ]
------
Step:9, Action:East
State  184
Old Q Values:  [  162.71210215     0.         14061.4268019      0.        ]
New Q values:  [ 162.71210215    0.         6289.0510124     0.        ]
Reward: -1  Episode Reward:  51
xxxxx
xg  x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1737.6681949  2216.93430546 1141.49622464]
------
Step:10, Action:East
State  200
Old Q Values:  [  62.8218634  1737.6681949  2216.93430546 1141.49622464]
New Q values:  [  62.8218634  1737.6681949  2002.27343477 1141.49622464]
Reward: -1  Episode Reward:  50
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  467.83789983  3720.33237527 -8896.20691497  2418.4316485 ]
------
Step:11, Action:South
State  216
Old Q Values:  [  467.83789983  3720.33237527 -8896.20691497  2418.4316485 ]
New Q values:  [  467.83789983  2805.06437234 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  49
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3675.16711399 -6442.16912869 -8192.20126966  4391.77140743]
------
Step:12, Action:West
State  288
Old Q Values:  [ 3675.16711399 -6442.16912869 -8192.20126966  4391.77140743]
New Q values:  [ 3675.16711399 -6442.16912869 -8192.20126966  3293.87720062]
Reward: -1  Episode Reward:  48
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3709.5427972   5125.89545884]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3709.5427972   5125.89545884]
New Q values:  [-2527.46239811 -8521.23367799  3709.5427972  75151.09744162]
Reward: 100009  Episode Reward:  100057
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3709.5427972  75151.09744162]
------
Step:1, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946  3274.26149551]
New Q values:  [   16.82637525 -5807.06396197   618.89767946  1980.74384633]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2218.79749376    26.73544252 -1689.55777052  1591.01457697]
------
Step:2, Action:North
State  260
Old Q Values:  [   16.65544035 -2735.46306511  2935.90127676 -6102.86502307]
New Q values:  [-3492.71021519 -2735.46306511  2935.90127676 -6102.86502307]
Reward: -9991  Episode Reward:  -9982
xxxxx
x. .x
xg..x
x  .x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3709.5427972  75151.09744162]
------
Step:1, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946  1980.74384633]
New Q values:  [   16.82637525 -5807.06396197   618.89767946  1463.33678666]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2218.79749376    26.73544252 -1689.55777052  1591.01457697]
------
Step:2, Action:North
State  260
Old Q Values:  [-3492.71021519 -2735.46306511  2935.90127676 -6102.86502307]
New Q values:  [ 1103.54352259 -2735.46306511  2935.90127676 -6102.86502307]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2405.85343029   424.27326032  8317.42536224 -4966.32149798]
------
Step:3, Action:East
State  181
Old Q Values:  [1260.86140778 1345.7896492  9711.67180271  262.76946019]
New Q values:  [ 1260.86140778  1345.7896492  10699.56259166   262.76946019]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1246.14781335 22698.31290191   239.04887894]
------
Step:4, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  7.80637474e+03  8.76948316e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  7.80637474e+03  9.58218615e+03 -6.46606132e+03]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20229.97629528  4291.02398572 -4584.50430574   566.35179234]
------
Step:5, Action:North
State  216
Old Q Values:  [  467.83789983  2805.06437234 -8896.20691497  2418.4316485 ]
New Q values:  [  668.5964984   2805.06437234 -8896.20691497  2418.4316485 ]
Reward: 9  Episode Reward:  45
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1586.87112822 -2383.80019164    42.97961301]
------
Step:6, Action:South
State  128
Old Q Values:  [11374.93691792 19174.91261236 -8652.84       44428.56690885]
New Q values:  [11374.93691792 13738.35793353 -8652.84       44428.56690885]
Reward: -1  Episode Reward:  44
xxxxx
xg. x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20229.97629528  4291.02398572 -4584.50430574   566.35179234]
------
Step:7, Action:North
State  216
Old Q Values:  [  668.5964984   2805.06437234 -8896.20691497  2418.4316485 ]
New Q values:  [  742.89993782  2805.06437234 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  43
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1586.87112822 -2383.80019164    42.97961301]
------
Step:8, Action:South
State  130
Old Q Values:  [46177.80406237 27489.46031286  -180.00807518 61601.86003193]
New Q values:  [46177.80406237 17064.17701373  -180.00807518 61601.86003193]
Reward: -1  Episode Reward:  42
xxxxx
x . x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20229.97629528  4291.02398572 -4584.50430574   566.35179234]
------
Step:9, Action:North
State  216
Old Q Values:  [  742.89993782  2805.06437234 -8896.20691497  2418.4316485 ]
New Q values:  [  772.62131359  2805.06437234 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  41
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1586.87112822 -2383.80019164    42.97961301]
------
Step:10, Action:South
State  128
Old Q Values:  [11374.93691792 13738.35793353 -8652.84       44428.56690885]
New Q values:  [11374.93691792 11563.73606199 -8652.84       44428.56690885]
Reward: -1  Episode Reward:  40
xxxxx
xg. x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20229.97629528  4291.02398572 -4584.50430574   566.35179234]
------
Step:11, Action:North
State  208
Old Q Values:  [20229.97629528  4291.02398572 -4584.50430574   566.35179234]
New Q values:  [26571.94852769  4291.02398572 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  39
xxxxx
x .ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 17064.17701373  -180.00807518 61601.86003193]
------
Step:12, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  6.03181555e+03 -3.22965309e-01  3.96116437e+02]
New Q values:  [ 7.64171987e+01  6.03181555e+03 -3.22965309e-01  5.12368869e+02]
Reward: 9  Episode Reward:  48
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 1161.74098086 1060.34671274]
------
Step:13, Action:East
State  126
Old Q Values:  [   0.          331.64678262 1161.74098086 1060.34671274]
New Q values:  [   0.          331.64678262 2273.64105687 1060.34671274]
Reward: -1  Episode Reward:  47
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  6.03181555e+03 -3.22965309e-01  5.12368869e+02]
------
Step:14, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  6.03181555e+03 -3.22965309e-01  5.12368869e+02]
New Q values:  [ 7.64171987e+01  5.52329952e+03 -3.22965309e-01  5.12368869e+02]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  292.53315687 10370.57767862     0.          1039.23717408]
------
Step:15, Action:South
State  210
Old Q Values:  [4223.26808967 7295.0907373   790.72804752 2961.44414407]
New Q values:  [ 4223.26808967 64025.98642912   790.72804752  2961.44414407]
Reward: 100009  Episode Reward:  100055
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26571.94852769  4291.02398572 -4584.50430574   566.35179234]
------
Step:1, Action:North
State  216
Old Q Values:  [  772.62131359  2805.06437234 -8896.20691497  2418.4316485 ]
New Q values:  [ 1971.43838233  2805.06437234 -8896.20691497  2418.4316485 ]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  5.52329952e+03 -3.22965309e-01  5.12368869e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  5.52329952e+03 -3.22965309e-01  5.12368869e+02]
New Q values:  [ 7.64171987e+01  2.14165157e+04 -3.22965309e-01  5.12368869e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4223.26808967 64025.98642912   790.72804752  2961.44414407]
------
Step:3, Action:South
State  208
Old Q Values:  [26571.94852769  4291.02398572 -4584.50430574   566.35179234]
New Q values:  [26571.94852769  2824.35972848 -4584.50430574   566.35179234]
Reward: 9  Episode Reward:  17
xxxxx
x.. x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3675.16711399 -6442.16912869 -8192.20126966  3293.87720062]
------
Step:4, Action:North
State  288
Old Q Values:  [ 3675.16711399 -6442.16912869 -8192.20126966  3293.87720062]
New Q values:  [ 3441.0514039  -6442.16912869 -8192.20126966  3293.87720062]
Reward: -10001  Episode Reward:  -9984
xxxxx
x.. x
x .gx
x.. x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2218.79749376    26.73544252 -1689.55777052  1591.01457697]
------
Step:1, Action:North
State  261
Old Q Values:  [ 2218.79749376    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 4102.787775      26.73544252 -1689.55777052  1591.01457697]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1260.86140778  1345.7896492  10699.56259166   262.76946019]
------
Step:2, Action:East
State  181
Old Q Values:  [ 1260.86140778  1345.7896492  10699.56259166   262.76946019]
New Q values:  [1260.86140778 1345.7896492  6907.63507177  262.76946019]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8761.3667837  -4582.3674281    534.04109446]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144  1246.14781335 22698.31290191   239.04887894]
New Q values:  [-2469.90645144   942.86016134 22698.31290191   239.04887894]
Reward: 9  Episode Reward:  17
xxxxx
x.g.x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   618.89767946  1463.33678666]
------
Step:4, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 -256.73274642 3354.1928986 ]
New Q values:  [ 677.52857079 -168.92307549 -256.73274642 2571.91349194]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4102.787775      26.73544252 -1689.55777052  1591.01457697]
------
Step:5, Action:North
State  261
Old Q Values:  [ 4102.787775      26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 3712.80563153    26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  15
xxxxx
x.g.x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1260.86140778 1345.7896492  6907.63507177  262.76946019]
------
Step:6, Action:East
State  181
Old Q Values:  [1260.86140778 1345.7896492  6907.63507177  262.76946019]
New Q values:  [1260.86140778 1345.7896492  5390.86406382  262.76946019]
Reward: -1  Episode Reward:  14
xxxxx
x..gx
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8761.3667837  -4582.3674281    534.04109446]
------
Step:7, Action:South
State  193
Old Q Values:  [-5922.26708831  8761.3667837  -4582.3674281    534.04109446]
New Q values:  [-5922.26708831  4275.52076106 -4582.3674281    534.04109446]
Reward: -1  Episode Reward:  13
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 -256.73274642 2571.91349194]
------
Step:8, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 -256.73274642 2571.91349194]
New Q values:  [ 677.52857079 -168.92307549 -256.73274642 2142.00708624]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3712.80563153    26.73544252 -1689.55777052  1591.01457697]
------
Step:9, Action:North
State  261
Old Q Values:  [ 3712.80563153    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 2068.05784244    26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  11
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1945.11863275 1560.18582186  358.5166536 ]
------
Step:10, Action:South
State  181
Old Q Values:  [1260.86140778 1345.7896492  5390.86406382  262.76946019]
New Q values:  [1260.86140778 1158.13321241 5390.86406382  262.76946019]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2068.05784244    26.73544252 -1689.55777052  1591.01457697]
------
Step:11, Action:North
State  261
Old Q Values:  [ 2068.05784244    26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 1410.1587268     26.73544252 -1689.55777052  1591.01457697]
Reward: -1  Episode Reward:  9
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1945.11863275 1560.18582186  358.5166536 ]
------
Step:12, Action:South
State  181
Old Q Values:  [1260.86140778 1158.13321241 5390.86406382  262.76946019]
New Q values:  [1260.86140778  939.95765805 5390.86406382  262.76946019]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1410.1587268     26.73544252 -1689.55777052  1591.01457697]
------
Step:13, Action:West
State  261
Old Q Values:  [ 1410.1587268     26.73544252 -1689.55777052  1591.01457697]
New Q values:  [ 1410.1587268     26.73544252 -1689.55777052   933.11020388]
Reward: -301  Episode Reward:  -293
xxxxx
x...x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1410.1587268     26.73544252 -1689.55777052   933.11020388]
------
Step:14, Action:North
State  261
Old Q Values:  [ 1410.1587268     26.73544252 -1689.55777052   933.11020388]
New Q values:  [ 2180.72270987    26.73544252 -1689.55777052   933.11020388]
Reward: -1  Episode Reward:  -294
xxxxx
x...x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1260.86140778  939.95765805 5390.86406382  262.76946019]
------
Step:15, Action:East
State  181
Old Q Values:  [1260.86140778  939.95765805 5390.86406382  262.76946019]
New Q values:  [1260.86140778  939.95765805 3438.40185385  262.76946019]
Reward: -1  Episode Reward:  -295
xxxxx
x..gx
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4275.52076106 -4582.3674281    534.04109446]
------
Step:16, Action:South
State  196
Old Q Values:  [-2469.90645144   942.86016134 22698.31290191   239.04887894]
New Q values:  [-2469.90645144   815.54510053 22698.31290191   239.04887894]
Reward: -1  Episode Reward:  -296
xxxxx
x.g.x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   618.89767946  1463.33678666]
------
Step:17, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 -256.73274642 2142.00708624]
New Q values:  [ 677.52857079 -168.92307549 -256.73274642 1510.41964745]
Reward: -1  Episode Reward:  -297
xxxxx
x..gx
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2180.72270987    26.73544252 -1689.55777052   933.11020388]
------
Step:18, Action:North
State  261
Old Q Values:  [ 2180.72270987    26.73544252 -1689.55777052   933.11020388]
New Q values:  [ 1903.2096401     26.73544252 -1689.55777052   933.11020388]
Reward: -1  Episode Reward:  -298
xxxxx
x.g.x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1260.86140778  939.95765805 3438.40185385  262.76946019]
------
Step:19, Action:East
State  181
Old Q Values:  [1260.86140778  939.95765805 3438.40185385  262.76946019]
New Q values:  [1260.86140778  939.95765805 2657.41696986  262.76946019]
Reward: -1  Episode Reward:  -299
xxxxx
x..gx
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  4275.52076106 -4582.3674281    534.04109446]
------
Step:20, Action:South
State  196
Old Q Values:  [-2469.90645144   815.54510053 22698.31290191   239.04887894]
New Q values:  [-2469.90645144   764.61907621 22698.31290191   239.04887894]
Reward: -1  Episode Reward:  -300
xxxxx
x.g.x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   618.89767946  1463.33678666]
------
Step:21, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946  1463.33678666]
New Q values:  [   16.82637525 -5807.06396197   618.89767946  1465.50509769]
Reward: -1  Episode Reward:  -301
xxxxx
xg..x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1103.54352259 -2735.46306511  2935.90127676 -6102.86502307]
------
Step:22, Action:East
State  260
Old Q Values:  [ 1103.54352259 -2735.46306511  2935.90127676 -6102.86502307]
New Q values:  [ 1103.54352259 -2735.46306511  1613.41204001 -6102.86502307]
Reward: -1  Episode Reward:  -302
xxxxx
x...x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   618.89767946  1465.50509769]
------
Step:23, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946  1465.50509769]
New Q values:  [   16.82637525 -5807.06396197   618.89767946 -4930.37434892]
Reward: -10001  Episode Reward:  -10303
xxxxx
x...x
x  .x
xg .x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   595.56427871   531.34594219]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   297.23868516   248.57627421]
New Q values:  [ -281.736      -1150.91067548  6549.25019544   248.57627421]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.14165157e+04 -3.22965309e-01  5.12368869e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.14165157e+04 -3.22965309e-01  5.12368869e+02]
New Q values:  [ 7.64171987e+01  1.65435909e+04 -3.22965309e-01  5.12368869e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26571.94852769  2824.35972848 -4584.50430574   566.35179234]
------
Step:3, Action:North
State  208
Old Q Values:  [26571.94852769  2824.35972848 -4584.50430574   566.35179234]
New Q values:  [15591.25666712  2824.35972848 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.65435909e+04 -3.22965309e-01  5.12368869e+02]
------
Step:4, Action:South
State  136
Old Q Values:  [ -170.77177351  1586.87112822 -2383.80019164    42.97961301]
New Q values:  [ -170.77177351  5311.52545142 -2383.80019164    42.97961301]
Reward: -1  Episode Reward:  16
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15591.25666712  2824.35972848 -4584.50430574   566.35179234]
------
Step:5, Action:North
State  208
Old Q Values:  [15591.25666712  2824.35972848 -4584.50430574   566.35179234]
New Q values:  [11198.97992289  2824.35972848 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.65435909e+04 -3.22965309e-01  5.12368869e+02]
------
Step:6, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.65435909e+04 -3.22965309e-01  5.12368869e+02]
New Q values:  [ 7.64171987e+01  9.97653032e+03 -3.22965309e-01  5.12368869e+02]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11198.97992289  2824.35972848 -4584.50430574   566.35179234]
------
Step:7, Action:North
State  208
Old Q Values:  [11198.97992289  2824.35972848 -4584.50430574   566.35179234]
New Q values:  [ 7471.95106463  2824.35972848 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  9.97653032e+03 -3.22965309e-01  5.12368869e+02]
------
Step:8, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  9.97653032e+03 -3.22965309e-01  5.12368869e+02]
New Q values:  [ 7.64171987e+01  2.31978081e+04 -3.22965309e-01  5.12368869e+02]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4223.26808967 64025.98642912   790.72804752  2961.44414407]
------
Step:9, Action:South
State  210
Old Q Values:  [ 4223.26808967 64025.98642912   790.72804752  2961.44414407]
New Q values:  [ 4223.26808967 26642.10999282   790.72804752  2961.44414407]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3441.0514039  -6442.16912869 -8192.20126966  3293.87720062]
------
Step:10, Action:North
State  288
Old Q Values:  [ 3441.0514039  -6442.16912869 -8192.20126966  3293.87720062]
New Q values:  [ 9368.45355941 -6442.16912869 -8192.20126966  3293.87720062]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4223.26808967 26642.10999282   790.72804752  2961.44414407]
------
Step:11, Action:North
State  208
Old Q Values:  [ 7471.95106463  2824.35972848 -4584.50430574   566.35179234]
New Q values:  [ 9947.52284266  2824.35972848 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  9
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.31978081e+04 -3.22965309e-01  5.12368869e+02]
------
Step:12, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.31978081e+04 -3.22965309e-01  5.12368869e+02]
New Q values:  [ 7.64171987e+01  2.31978081e+04 -3.22965309e-01  2.16912261e+03]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  6549.25019544   248.57627421]
------
Step:13, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  6549.25019544   248.57627421]
New Q values:  [ -281.736      -1150.91067548  9578.44249499   248.57627421]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.31978081e+04 -3.22965309e-01  2.16912261e+03]
------
Step:14, Action:South
State  136
Old Q Values:  [ -170.77177351  5311.52545142 -2383.80019164    42.97961301]
New Q values:  [ -170.77177351  5108.26703337 -2383.80019164    42.97961301]
Reward: -1  Episode Reward:  6
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9947.52284266  2824.35972848 -4584.50430574   566.35179234]
------
Step:15, Action:North
State  208
Old Q Values:  [ 9947.52284266  2824.35972848 -4584.50430574   566.35179234]
New Q values:  [ 5510.88924708  2824.35972848 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  5
xxxxx
x.gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5108.26703337 -2383.80019164    42.97961301]
------
Step:16, Action:South
State  136
Old Q Values:  [ -170.77177351  5108.26703337 -2383.80019164    42.97961301]
New Q values:  [ -170.77177351  3695.97358747 -2383.80019164    42.97961301]
Reward: -1  Episode Reward:  4
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5510.88924708  2824.35972848 -4584.50430574   566.35179234]
------
Step:17, Action:North
State  208
Old Q Values:  [ 5510.88924708  2824.35972848 -4584.50430574   566.35179234]
New Q values:  [ 9163.09811564  2824.35972848 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  3
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.31978081e+04 -3.22965309e-01  2.16912261e+03]
------
Step:18, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.31978081e+04 -3.22965309e-01  2.16912261e+03]
New Q values:  [ 7.64171987e+01  1.72711562e+04 -3.22965309e-01  2.16912261e+03]
Reward: -1  Episode Reward:  2
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4223.26808967 26642.10999282   790.72804752  2961.44414407]
------
Step:19, Action:South
State  208
Old Q Values:  [ 9163.09811564  2824.35972848 -4584.50430574   566.35179234]
New Q values:  [ 9163.09811564  3939.67995921 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  1
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9368.45355941 -6442.16912869 -8192.20126966  3293.87720062]
------
Step:20, Action:North
State  288
Old Q Values:  [ 9368.45355941 -6442.16912869 -8192.20126966  3293.87720062]
New Q values:  [ 6495.71085846 -6442.16912869 -8192.20126966  3293.87720062]
Reward: -1  Episode Reward:  0
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9163.09811564  3939.67995921 -4584.50430574   566.35179234]
------
Step:21, Action:North
State  208
Old Q Values:  [ 9163.09811564  3939.67995921 -4584.50430574   566.35179234]
New Q values:  [ 8845.98611233  3939.67995921 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -1
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.72711562e+04 -3.22965309e-01  2.16912261e+03]
------
Step:22, Action:South
State  136
Old Q Values:  [ -170.77177351  3695.97358747 -2383.80019164    42.97961301]
New Q values:  [ -170.77177351  4131.58526869 -2383.80019164    42.97961301]
Reward: -1  Episode Reward:  -2
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8845.98611233  3939.67995921 -4584.50430574   566.35179234]
------
Step:23, Action:North
State  208
Old Q Values:  [ 8845.98611233  3939.67995921 -4584.50430574   566.35179234]
New Q values:  [ 8719.14131101  3939.67995921 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -3
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.72711562e+04 -3.22965309e-01  2.16912261e+03]
------
Step:24, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.72711562e+04 -3.22965309e-01  2.16912261e+03]
New Q values:  [ 7.64171987e+01  1.49004955e+04 -3.22965309e-01  2.16912261e+03]
Reward: -1  Episode Reward:  -4
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4223.26808967 26642.10999282   790.72804752  2961.44414407]
------
Step:25, Action:South
State  210
Old Q Values:  [ 4223.26808967 26642.10999282   790.72804752  2961.44414407]
New Q values:  [ 4223.26808967 12604.95725466   790.72804752  2961.44414407]
Reward: -1  Episode Reward:  -5
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6495.71085846 -6442.16912869 -8192.20126966  3293.87720062]
------
Step:26, Action:North
State  288
Old Q Values:  [ 6495.71085846 -6442.16912869 -8192.20126966  3293.87720062]
New Q values:  [ 5213.42673669 -6442.16912869 -8192.20126966  3293.87720062]
Reward: -1  Episode Reward:  -6
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8719.14131101  3939.67995921 -4584.50430574   566.35179234]
------
Step:27, Action:North
State  210
Old Q Values:  [ 4223.26808967 12604.95725466   790.72804752  2961.44414407]
New Q values:  [ 6158.85588165 12604.95725466   790.72804752  2961.44414407]
Reward: -1  Episode Reward:  -7
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  1.49004955e+04 -3.22965309e-01  2.16912261e+03]
------
Step:28, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  1.49004955e+04 -3.22965309e-01  2.16912261e+03]
New Q values:  [ 7.64171987e+01  9.74108537e+03 -3.22965309e-01  2.16912261e+03]
Reward: -1  Episode Reward:  -8
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6158.85588165 12604.95725466   790.72804752  2961.44414407]
------
Step:29, Action:South
State  208
Old Q Values:  [ 8719.14131101  3939.67995921 -4584.50430574   566.35179234]
New Q values:  [ 8719.14131101  3139.30000469 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -9
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5213.42673669 -6442.16912869 -8192.20126966  3293.87720062]
------
Step:30, Action:North
State  288
Old Q Values:  [ 5213.42673669 -6442.16912869 -8192.20126966  3293.87720062]
New Q values:  [ 4700.51308798 -6442.16912869 -8192.20126966  3293.87720062]
Reward: -1  Episode Reward:  -10
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8719.14131101  3139.30000469 -4584.50430574   566.35179234]
------
Step:31, Action:North
State  210
Old Q Values:  [ 6158.85588165 12604.95725466   790.72804752  2961.44414407]
New Q values:  [ 5385.2679639  12604.95725466   790.72804752  2961.44414407]
Reward: -1  Episode Reward:  -11
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  9.74108537e+03 -3.22965309e-01  2.16912261e+03]
------
Step:32, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  9.74108537e+03 -3.22965309e-01  2.16912261e+03]
New Q values:  [ 7.64171987e+01  7.67732132e+03 -3.22965309e-01  2.16912261e+03]
Reward: -1  Episode Reward:  -12
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5385.2679639  12604.95725466   790.72804752  2961.44414407]
------
Step:33, Action:South
State  210
Old Q Values:  [ 5385.2679639  12604.95725466   790.72804752  2961.44414407]
New Q values:  [5385.2679639  6451.53682826  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  -13
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4700.51308798 -6442.16912869 -8192.20126966  3293.87720062]
------
Step:34, Action:North
State  288
Old Q Values:  [ 4700.51308798 -6442.16912869 -8192.20126966  3293.87720062]
New Q values:  [ 4495.34762849 -6442.16912869 -8192.20126966  3293.87720062]
Reward: -1  Episode Reward:  -14
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8719.14131101  3139.30000469 -4584.50430574   566.35179234]
------
Step:35, Action:North
State  208
Old Q Values:  [ 8719.14131101  3139.30000469 -4584.50430574   566.35179234]
New Q values:  [ 5790.25292182  3139.30000469 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -15
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  7.67732132e+03 -3.22965309e-01  2.16912261e+03]
------
Step:36, Action:South
State  136
Old Q Values:  [ -170.77177351  4131.58526869 -2383.80019164    42.97961301]
New Q values:  [ -170.77177351  3389.10998402 -2383.80019164    42.97961301]
Reward: -1  Episode Reward:  -16
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5790.25292182  3139.30000469 -4584.50430574   566.35179234]
------
Step:37, Action:North
State  208
Old Q Values:  [ 5790.25292182  3139.30000469 -4584.50430574   566.35179234]
New Q values:  [ 4618.69756614  3139.30000469 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -17
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  7.67732132e+03 -3.22965309e-01  2.16912261e+03]
------
Step:38, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  7.67732132e+03 -3.22965309e-01  2.16912261e+03]
New Q values:  [ 7.64171987e+01  4.45593780e+03 -3.22965309e-01  2.16912261e+03]
Reward: -1  Episode Reward:  -18
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4618.69756614  3139.30000469 -4584.50430574   566.35179234]
------
Step:39, Action:North
State  210
Old Q Values:  [5385.2679639  6451.53682826  790.72804752 2961.44414407]
New Q values:  [3490.28852548 6451.53682826  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  -19
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.45593780e+03 -3.22965309e-01  2.16912261e+03]
------
Step:40, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.45593780e+03 -3.22965309e-01  2.16912261e+03]
New Q values:  [ 7.64171987e+01  3.71723617e+03 -3.22965309e-01  2.16912261e+03]
Reward: -1  Episode Reward:  -20
xxxxx
x.  x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3490.28852548 6451.53682826  790.72804752 2961.44414407]
------
Step:41, Action:North
State  208
Old Q Values:  [ 4618.69756614  3139.30000469 -4584.50430574   566.35179234]
New Q values:  [ 2962.04987697  3139.30000469 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -21
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.71723617e+03 -3.22965309e-01  2.16912261e+03]
------
Step:42, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.71723617e+03 -3.22965309e-01  2.16912261e+03]
New Q values:  [ 7.64171987e+01  3.71723617e+03 -3.22965309e-01  3.74058179e+03]
Reward: -1  Episode Reward:  -22
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  9578.44249499   248.57627421]
------
Step:43, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1262.88034339 -1157.45542097]
New Q values:  [-9594.56523706 -8069.05606225  1521.28513256 -1157.45542097]
Reward: -1  Episode Reward:  -23
xxxxx
x.gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3389.10998402 -2383.80019164    42.97961301]
------
Step:44, Action:South
State  136
Old Q Values:  [ -170.77177351  3389.10998402 -2383.80019164    42.97961301]
New Q values:  [ -170.77177351  2296.83399502 -2383.80019164    42.97961301]
Reward: -1  Episode Reward:  -24
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2962.04987697  3139.30000469 -4584.50430574   566.35179234]
------
Step:45, Action:South
State  208
Old Q Values:  [ 2962.04987697  3139.30000469 -4584.50430574   566.35179234]
New Q values:  [ 2962.04987697  2603.72429043 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -25
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4495.34762849 -6442.16912869 -8192.20126966  3293.87720062]
------
Step:46, Action:North
State  288
Old Q Values:  [ 4495.34762849 -6442.16912869 -8192.20126966  3293.87720062]
New Q values:  [ 2686.15401449 -6442.16912869 -8192.20126966  3293.87720062]
Reward: -1  Episode Reward:  -26
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2962.04987697  2603.72429043 -4584.50430574   566.35179234]
------
Step:47, Action:North
State  208
Old Q Values:  [ 2962.04987697  2603.72429043 -4584.50430574   566.35179234]
New Q values:  [ 2306.39448808  2603.72429043 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -27
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.71723617e+03 -3.22965309e-01  3.74058179e+03]
------
Step:48, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.71723617e+03 -3.22965309e-01  3.74058179e+03]
New Q values:  [ 7.64171987e+01  3.71723617e+03 -3.22965309e-01  4.36916546e+03]
Reward: -1  Episode Reward:  -28
xxxxx
x.a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  9578.44249499   248.57627421]
------
Step:49, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  9578.44249499   248.57627421]
New Q values:  [ -281.736      -1150.91067548  5141.52663746   248.57627421]
Reward: -1  Episode Reward:  -29
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.71723617e+03 -3.22965309e-01  4.36916546e+03]
------
Step:50, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.71723617e+03 -3.22965309e-01  4.36916546e+03]
New Q values:  [ 7.64171987e+01  3.71723617e+03 -3.22965309e-01  3.28952418e+03]
Reward: -1  Episode Reward:  -30
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  5141.52663746   248.57627421]
------
Step:51, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  5141.52663746   248.57627421]
New Q values:  [ -281.736      -1150.91067548  3171.1815055    248.57627421]
Reward: -1  Episode Reward:  -31
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.71723617e+03 -3.22965309e-01  3.28952418e+03]
------
Step:52, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.71723617e+03 -3.22965309e-01  3.28952418e+03]
New Q values:  [ 7.64171987e+01  3.42175552e+03 -3.22965309e-01  3.28952418e+03]
Reward: -1  Episode Reward:  -32
xxxxx
x.  x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3490.28852548 6451.53682826  790.72804752 2961.44414407]
------
Step:53, Action:North
State  208
Old Q Values:  [ 2306.39448808  2603.72429043 -4584.50430574   566.35179234]
New Q values:  [ 1948.48444998  2603.72429043 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -33
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.42175552e+03 -3.22965309e-01  3.28952418e+03]
------
Step:54, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  3.42175552e+03 -3.22965309e-01  3.28952418e+03]
New Q values:  [ 7.64171987e+01  3.42175552e+03 -3.22965309e-01  2.26656412e+03]
Reward: -1  Episode Reward:  -34
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3171.1815055    248.57627421]
------
Step:55, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   595.56427871   531.34594219]
New Q values:  [ -253.44886264 -1902.20915811  1264.15236623   531.34594219]
Reward: -1  Episode Reward:  -35
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.42175552e+03 -3.22965309e-01  2.26656412e+03]
------
Step:56, Action:West
State  136
Old Q Values:  [ -170.77177351  2296.83399502 -2383.80019164    42.97961301]
New Q values:  [ -170.77177351  2296.83399502 -2383.80019164    83.05818806]
Reward: -1  Episode Reward:  -36
xxxxx
x.agx
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7059.48304203   221.5544762 ]
------
Step:57, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1264.15236623   531.34594219]
New Q values:  [ -253.44886264 -1902.20915811  1264.15236623   391.97749867]
Reward: 9  Episode Reward:  -27
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  580.13040598  532.59323345 -252.78192178]
------
Step:58, Action:South
State  111
Old Q Values:  [-177.44732869 1154.07727983  391.45858569 -120.29354603]
New Q values:  [-177.44732869 1050.56650176  391.45858569 -120.29354603]
Reward: 9  Episode Reward:  -18
xxxxx
x   x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1945.11863275 1560.18582186  358.5166536 ]
------
Step:59, Action:South
State  183
Old Q Values:  [ 540.75283156 1945.11863275 1560.18582186  358.5166536 ]
New Q values:  [ 540.75283156 1354.41034513 1560.18582186  358.5166536 ]
Reward: 9  Episode Reward:  -9
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1903.2096401     26.73544252 -1689.55777052   933.11020388]
------
Step:60, Action:North
State  261
Old Q Values:  [ 1903.2096401     26.73544252 -1689.55777052   933.11020388]
New Q values:  [ 1228.7396026     26.73544252 -1689.55777052   933.11020388]
Reward: -1  Episode Reward:  -10
xxxxx
x   x
xa. x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1354.41034513 1560.18582186  358.5166536 ]
------
Step:61, Action:East
State  179
Old Q Values:  [    0.         16101.90751562 14965.50319905     0.        ]
New Q values:  [    0.         16101.90751562  6535.7653539      0.        ]
Reward: 9  Episode Reward:  -1
xxxxx
x   x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  1.63194774e+03]
------
Step:62, Action:East
State  200
Old Q Values:  [  62.8218634  1737.6681949  2002.27343477 1141.49622464]
New Q values:  [  62.8218634  1737.6681949  1641.82868561 1141.49622464]
Reward: -1  Episode Reward:  -2
xxxxx
x   x
x gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1971.43838233  2805.06437234 -8896.20691497  2418.4316485 ]
------
Step:63, Action:South
State  216
Old Q Values:  [ 1971.43838233  2805.06437234 -8896.20691497  2418.4316485 ]
New Q values:  [ 1971.43838233  2109.58890912 -8896.20691497  2418.4316485 ]
Reward: -1  Episode Reward:  -3
xxxxx
x   x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2686.15401449 -6442.16912869 -8192.20126966  3293.87720062]
------
Step:64, Action:West
State  288
Old Q Values:  [ 2686.15401449 -6442.16912869 -8192.20126966  3293.87720062]
New Q values:  [ 2686.15401449 -6442.16912869 -8192.20126966 83868.28011273]
Reward: 100009  Episode Reward:  100006
xxxxx
x   x
x   x
xga x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1228.7396026     26.73544252 -1689.55777052   933.11020388]
------
Step:1, Action:North
State  261
Old Q Values:  [ 1228.7396026     26.73544252 -1689.55777052   933.11020388]
New Q values:  [ 1294.120932      26.73544252 -1689.55777052   933.11020388]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1260.86140778  939.95765805 2657.41696986  262.76946019]
------
Step:2, Action:East
State  183
Old Q Values:  [ 540.75283156 1354.41034513 1560.18582186  358.5166536 ]
New Q values:  [ 540.75283156 1354.41034513 3008.73279624  358.5166536 ]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 7930.86155831 1849.21327227 1169.39963074]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831  4275.52076106 -4582.3674281    534.04109446]
New Q values:  [-5922.26708831  2168.73419866 -4582.3674281    534.04109446]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 -256.73274642 1510.41964745]
------
Step:4, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 -256.73274642 1510.41964745]
New Q values:  [ 677.52857079 -168.92307549 -256.73274642  991.80413858]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x  .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1294.120932      26.73544252 -1689.55777052   933.11020388]
------
Step:5, Action:North
State  261
Old Q Values:  [ 1294.120932      26.73544252 -1689.55777052   933.11020388]
New Q values:  [ 1419.66821167    26.73544252 -1689.55777052   933.11020388]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1354.41034513 3008.73279624  358.5166536 ]
------
Step:6, Action:East
State  181
Old Q Values:  [1260.86140778  939.95765805 2657.41696986  262.76946019]
New Q values:  [ 1260.86140778   939.95765805 -2062.97736574   262.76946019]
Reward: -10001  Episode Reward:  -9976
xxxxx
x.. x
x g.x
x  .x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176  391.45858569 -120.29354603]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094   335.69847369 -5413.46457526  -180.6       ]
New Q values:  [ -241.10880094   517.93781181 -5413.46457526  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x .gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1260.86140778   939.95765805 -2062.97736574   262.76946019]
------
Step:2, Action:North
State  181
Old Q Values:  [ 1260.86140778   939.95765805 -2062.97736574   262.76946019]
New Q values:  [  818.91451364   939.95765805 -2062.97736574   262.76946019]
Reward: -1  Episode Reward:  8
xxxxx
xa..x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176  391.45858569 -120.29354603]
------
Step:3, Action:South
State  103
Old Q Values:  [221.30610858 437.96846403 238.35800069   0.        ]
New Q values:  [ 221.30610858 1077.20722448  238.35800069    0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1354.41034513 3008.73279624  358.5166536 ]
------
Step:4, Action:East
State  183
Old Q Values:  [ 540.75283156 1354.41034513 3008.73279624  358.5166536 ]
New Q values:  [ 540.75283156 1354.41034513 1753.05719277  358.5166536 ]
Reward: 9  Episode Reward:  16
xxxxx
x ..x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  1.63194774e+03]
------
Step:5, Action:East
State  200
Old Q Values:  [  62.8218634  1737.6681949  1641.82868561 1141.49622464]
New Q values:  [  62.8218634  1737.6681949  1381.66096879 1141.49622464]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1971.43838233  2109.58890912 -8896.20691497  2418.4316485 ]
------
Step:6, Action:South
State  208
Old Q Values:  [ 1948.48444998  2603.72429043 -4584.50430574   566.35179234]
New Q values:  [ 1948.48444998 26207.37374999 -4584.50430574   566.35179234]
Reward: 9  Episode Reward:  24
xxxxx
x ..x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2686.15401449 -6442.16912869 -8192.20126966 83868.28011273]
------
Step:7, Action:West
State  288
Old Q Values:  [ 2686.15401449 -6442.16912869 -8192.20126966 83868.28011273]
New Q values:  [ 2686.15401449 -6442.16912869 -8192.20126966 56098.04127758]
Reward: 9  Episode Reward:  33
xxxxx
x ..x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3709.5427972  75151.09744162]
------
Step:8, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946 -4930.37434892]
New Q values:  [   16.82637525 -5807.06396197   618.89767946 -1540.84927607]
Reward: 9  Episode Reward:  42
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1419.66821167    26.73544252 -1689.55777052   933.11020388]
------
Step:9, Action:North
State  261
Old Q Values:  [ 1419.66821167    26.73544252 -1689.55777052   933.11020388]
New Q values:  [  849.25458208    26.73544252 -1689.55777052   933.11020388]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  818.91451364   939.95765805 -2062.97736574   262.76946019]
------
Step:10, Action:South
State  180
Old Q Values:  [-2405.85343029   424.27326032  8317.42536224 -4966.32149798]
New Q values:  [-2405.85343029   653.13291613  8317.42536224 -4966.32149798]
Reward: -1  Episode Reward:  40
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1103.54352259 -2735.46306511  1613.41204001 -6102.86502307]
------
Step:11, Action:East
State  261
Old Q Values:  [  849.25458208    26.73544252 -1689.55777052   933.11020388]
New Q values:  [  849.25458208    26.73544252 21868.90612428   933.11020388]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3709.5427972  75151.09744162]
------
Step:12, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3709.5427972  75151.09744162]
New Q values:  [-2527.46239811 -8521.23367799  3709.5427972  30543.86258865]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1103.54352259 -2735.46306511  1613.41204001 -6102.86502307]
------
Step:13, Action:East
State  260
Old Q Values:  [ 1103.54352259 -2735.46306511  1613.41204001 -6102.86502307]
New Q values:  [ 1103.54352259 -2735.46306511  9807.9235926  -6102.86502307]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3709.5427972  30543.86258865]
------
Step:14, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3709.5427972  30543.86258865]
New Q values:  [-2527.46239811 -8521.23367799 18312.62950215 30543.86258865]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2686.15401449 -6442.16912869 -8192.20126966 56098.04127758]
------
Step:15, Action:North
State  288
Old Q Values:  [ 2686.15401449 -6442.16912869 -8192.20126966 56098.04127758]
New Q values:  [ 3009.32265427 -6442.16912869 -8192.20126966 56098.04127758]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3490.28852548 6451.53682826  790.72804752 2961.44414407]
------
Step:16, Action:South
State  210
Old Q Values:  [3490.28852548 6451.53682826  790.72804752 2961.44414407]
New Q values:  [ 3490.28852548 19409.42711458   790.72804752  2961.44414407]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3009.32265427 -6442.16912869 -8192.20126966 56098.04127758]
------
Step:17, Action:North
State  288
Old Q Values:  [ 3009.32265427 -6442.16912869 -8192.20126966 56098.04127758]
New Q values:  [ 7025.95719608 -6442.16912869 -8192.20126966 56098.04127758]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3490.28852548 19409.42711458   790.72804752  2961.44414407]
------
Step:18, Action:South
State  208
Old Q Values:  [ 1948.48444998 26207.37374999 -4584.50430574   566.35179234]
New Q values:  [ 1948.48444998 27311.76188327 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7025.95719608 -6442.16912869 -8192.20126966 56098.04127758]
------
Step:19, Action:West
State  288
Old Q Values:  [ 7025.95719608 -6442.16912869 -8192.20126966 56098.04127758]
New Q values:  [ 7025.95719608 -6442.16912869 -8192.20126966 31601.77528763]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 18312.62950215 30543.86258865]
------
Step:20, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 18312.62950215 30543.86258865]
New Q values:  [-2527.46239811 -8521.23367799 16804.98438715 30543.86258865]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7025.95719608 -6442.16912869 -8192.20126966 31601.77528763]
------
Step:21, Action:West
State  288
Old Q Values:  [ 7025.95719608 -6442.16912869 -8192.20126966 31601.77528763]
New Q values:  [ 7025.95719608 -6442.16912869 -8192.20126966 21803.26889165]
Reward: -1  Episode Reward:  29
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 16804.98438715 30543.86258865]
------
Step:22, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946 -1540.84927607]
New Q values:  [   16.82637525 -5807.06396197   618.89767946  5943.73212686]
Reward: -1  Episode Reward:  28
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  849.25458208    26.73544252 21868.90612428   933.11020388]
------
Step:23, Action:East
State  261
Old Q Values:  [  849.25458208    26.73544252 21868.90612428   933.11020388]
New Q values:  [  849.25458208    26.73544252 17910.12122631   933.11020388]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 16804.98438715 30543.86258865]
------
Step:24, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 16804.98438715 30543.86258865]
New Q values:  [-2527.46239811 -8521.23367799 16804.98438715 17589.98140335]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  849.25458208    26.73544252 17910.12122631   933.11020388]
------
Step:25, Action:West
State  261
Old Q Values:  [  849.25458208    26.73544252 17910.12122631   933.11020388]
New Q values:  [  849.25458208    26.73544252 17910.12122631  5565.68044944]
Reward: -301  Episode Reward:  -275
xxxxx
x ..x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  849.25458208    26.73544252 17910.12122631  5565.68044944]
------
Step:26, Action:East
State  261
Old Q Values:  [  849.25458208    26.73544252 17910.12122631  5565.68044944]
New Q values:  [ 849.25458208   26.73544252 7460.9897321  5565.68044944]
Reward: -1  Episode Reward:  -276
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 -256.73274642  991.80413858]
------
Step:27, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 -256.73274642  991.80413858]
New Q values:  [ 677.52857079 -168.92307549 -256.73274642 2634.41857506]
Reward: -1  Episode Reward:  -277
xxxxx
x .gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 849.25458208   26.73544252 7460.9897321  5565.68044944]
------
Step:28, Action:East
State  261
Old Q Values:  [ 849.25458208   26.73544252 7460.9897321  5565.68044944]
New Q values:  [ 849.25458208   26.73544252 4766.9155309  5565.68044944]
Reward: -1  Episode Reward:  -278
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   618.89767946  5943.73212686]
------
Step:29, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 16804.98438715 17589.98140335]
New Q values:  [-2527.46239811 -8521.23367799 16804.98438715  8705.09669617]
Reward: -1  Episode Reward:  -279
xxxxx
x ..x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 849.25458208   26.73544252 4766.9155309  5565.68044944]
------
Step:30, Action:West
State  261
Old Q Values:  [ 849.25458208   26.73544252 4766.9155309  5565.68044944]
New Q values:  [ 849.25458208   26.73544252 4766.9155309  3715.37631461]
Reward: -301  Episode Reward:  -580
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 849.25458208   26.73544252 4766.9155309  3715.37631461]
------
Step:31, Action:West
State  260
Old Q Values:  [ 1103.54352259 -2735.46306511  9807.9235926  -6102.86502307]
New Q values:  [ 1103.54352259 -2735.46306511  9807.9235926  -5679.36893145]
Reward: -10301  Episode Reward:  -10881
xxxxx
x ..x
x   x
xg  x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3490.28852548 19409.42711458   790.72804752  2961.44414407]
------
Step:1, Action:South
State  210
Old Q Values:  [ 3490.28852548 19409.42711458   790.72804752  2961.44414407]
New Q values:  [3490.28852548 8310.15151332  790.72804752 2961.44414407]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x.. x
x. gx
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 16804.98438715  8705.09669617]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 16804.98438715  8705.09669617]
New Q values:  [-2527.46239811 -8521.23367799 13268.37442235  8705.09669617]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7025.95719608 -6442.16912869 -8192.20126966 21803.26889165]
------
Step:2, Action:North
State  288
Old Q Values:  [ 7025.95719608 -6442.16912869 -8192.20126966 21803.26889165]
New Q values:  [11009.31144341 -6442.16912869 -8192.20126966 21803.26889165]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1948.48444998 27311.76188327 -4584.50430574   566.35179234]
------
Step:3, Action:South
State  210
Old Q Values:  [3490.28852548 8310.15151332  790.72804752 2961.44414407]
New Q values:  [3490.28852548 9864.44127282  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11009.31144341 -6442.16912869 -8192.20126966 21803.26889165]
------
Step:4, Action:North
State  288
Old Q Values:  [11009.31144341 -6442.16912869 -8192.20126966 21803.26889165]
New Q values:  [ 7362.45695921 -6442.16912869 -8192.20126966 21803.26889165]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3490.28852548 9864.44127282  790.72804752 2961.44414407]
------
Step:5, Action:South
State  208
Old Q Values:  [ 1948.48444998 27311.76188327 -4584.50430574   566.35179234]
New Q values:  [ 1948.48444998 17465.0854208  -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  15
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7362.45695921 -6442.16912869 -8192.20126966 21803.26889165]
------
Step:6, Action:West
State  288
Old Q Values:  [ 7362.45695921 -6442.16912869 -8192.20126966 21803.26889165]
New Q values:  [ 7362.45695921 -6442.16912869 -8192.20126966 12701.21988336]
Reward: -1  Episode Reward:  14
xxxxx
xg..x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 13268.37442235  8705.09669617]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 13268.37442235  8705.09669617]
New Q values:  [-2527.46239811 -8521.23367799  9117.11573395  8705.09669617]
Reward: -1  Episode Reward:  13
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7362.45695921 -6442.16912869 -8192.20126966 12701.21988336]
------
Step:8, Action:West
State  288
Old Q Values:  [ 7362.45695921 -6442.16912869 -8192.20126966 12701.21988336]
New Q values:  [ 7362.45695921 -6442.16912869 -8192.20126966  7815.02267353]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9117.11573395  8705.09669617]
------
Step:9, Action:East
State  273
Old Q Values:  [ 677.52857079 -168.92307549 -256.73274642 2634.41857506]
New Q values:  [ 677.52857079 -168.92307549 2241.21370349 2634.41857506]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7362.45695921 -6442.16912869 -8192.20126966  7815.02267353]
------
Step:10, Action:West
State  288
Old Q Values:  [ 7362.45695921 -6442.16912869 -8192.20126966  7815.02267353]
New Q values:  [ 7362.45695921 -6442.16912869 -8192.20126966  3915.73464193]
Reward: -1  Episode Reward:  10
xxxxx
x..gx
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 2241.21370349 2634.41857506]
------
Step:11, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9117.11573395  8705.09669617]
New Q values:  [-2527.46239811 -8521.23367799  9117.11573395 16576.77793655]
Reward: -1  Episode Reward:  9
xxxxx
x.g.x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[43651.13086027  6367.89256635 16022.11762108  1875.31501677]
------
Step:12, Action:North
State  261
Old Q Values:  [ 849.25458208   26.73544252 4766.9155309  3715.37631461]
New Q values:  [ 627.08913025   26.73544252 4766.9155309  3715.37631461]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  818.91451364   939.95765805 -2062.97736574   262.76946019]
------
Step:13, Action:South
State  181
Old Q Values:  [  818.91451364   939.95765805 -2062.97736574   262.76946019]
New Q values:  [  818.91451364  1805.45772249 -2062.97736574   262.76946019]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 627.08913025   26.73544252 4766.9155309  3715.37631461]
------
Step:14, Action:East
State  261
Old Q Values:  [ 627.08913025   26.73544252 4766.9155309  3715.37631461]
New Q values:  [ 627.08913025   26.73544252 2696.49178488 3715.37631461]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 2241.21370349 2634.41857506]
------
Step:15, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 2241.21370349 2634.41857506]
New Q values:  [ 677.52857079 -168.92307549 2241.21370349 2167.78032441]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 627.08913025   26.73544252 2696.49178488 3715.37631461]
------
Step:16, Action:West
State  261
Old Q Values:  [ 627.08913025   26.73544252 2696.49178488 3715.37631461]
New Q values:  [ 627.08913025   26.73544252 2696.49178488 2420.16342023]
Reward: -301  Episode Reward:  -286
xxxxx
x...x
x . x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 627.08913025   26.73544252 2696.49178488 2420.16342023]
------
Step:17, Action:East
State  261
Old Q Values:  [ 627.08913025   26.73544252 2696.49178488 2420.16342023]
New Q values:  [ 627.08913025   26.73544252 1750.360825   2420.16342023]
Reward: -1  Episode Reward:  -287
xxxxx
x...x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 2241.21370349 2167.78032441]
------
Step:18, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 2241.21370349 2167.78032441]
New Q values:  [ 677.52857079 -168.92307549 2241.21370349 1592.56115583]
Reward: -1  Episode Reward:  -288
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 627.08913025   26.73544252 1750.360825   2420.16342023]
------
Step:19, Action:West
State  261
Old Q Values:  [ 627.08913025   26.73544252 1750.360825   2420.16342023]
New Q values:  [ 627.08913025   26.73544252 1750.360825   1513.51439416]
Reward: -301  Episode Reward:  -589
xxxxx
x..gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 627.08913025   26.73544252 1750.360825   1513.51439416]
------
Step:20, Action:East
State  261
Old Q Values:  [ 627.08913025   26.73544252 1750.360825   1513.51439416]
New Q values:  [ 627.08913025   26.73544252 5672.57771096 1513.51439416]
Reward: -1  Episode Reward:  -590
xxxxx
x.g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9117.11573395 16576.77793655]
------
Step:21, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9117.11573395 16576.77793655]
New Q values:  [-2527.46239811 -8521.23367799  9117.11573395  8331.88448791]
Reward: -1  Episode Reward:  -591
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 627.08913025   26.73544252 5672.57771096 1513.51439416]
------
Step:22, Action:East
State  261
Old Q Values:  [ 627.08913025   26.73544252 5672.57771096 1513.51439416]
New Q values:  [ 627.08913025   26.73544252 5003.56580457 1513.51439416]
Reward: -1  Episode Reward:  -592
xxxxx
x.g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9117.11573395  8331.88448791]
------
Step:23, Action:East
State  273
Old Q Values:  [ 677.52857079 -168.92307549 2241.21370349 1592.56115583]
New Q values:  [ 677.52857079 -168.92307549 3104.62256916 1592.56115583]
Reward: -1  Episode Reward:  -593
xxxxx
x..gx
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7362.45695921 -6442.16912869 -8192.20126966  3915.73464193]
------
Step:24, Action:North
State  288
Old Q Values:  [ 7362.45695921 -6442.16912869 -8192.20126966  3915.73464193]
New Q values:  [ 8183.90840993 -6442.16912869 -8192.20126966  3915.73464193]
Reward: -1  Episode Reward:  -594
xxxxx
x.g.x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1948.48444998 17465.0854208  -4584.50430574   566.35179234]
------
Step:25, Action:South
State  208
Old Q Values:  [ 1948.48444998 17465.0854208  -4584.50430574   566.35179234]
New Q values:  [ 1948.48444998  9440.6066913  -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -595
xxxxx
xg..x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8183.90840993 -6442.16912869 -8192.20126966  3915.73464193]
------
Step:26, Action:North
State  288
Old Q Values:  [ 8183.90840993 -6442.16912869 -8192.20126966  3915.73464193]
New Q values:  [ 6105.14537136 -6442.16912869 -8192.20126966  3915.73464193]
Reward: -1  Episode Reward:  -596
xxxxx
x.g.x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1948.48444998  9440.6066913  -4584.50430574   566.35179234]
------
Step:27, Action:South
State  208
Old Q Values:  [ 1948.48444998  9440.6066913  -4584.50430574   566.35179234]
New Q values:  [ 1948.48444998  5607.18628793 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -597
xxxxx
xg..x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6105.14537136 -6442.16912869 -8192.20126966  3915.73464193]
------
Step:28, Action:North
State  288
Old Q Values:  [ 6105.14537136 -6442.16912869 -8192.20126966  3915.73464193]
New Q values:  [ 4123.61403492 -6442.16912869 -8192.20126966  3915.73464193]
Reward: -1  Episode Reward:  -598
xxxxx
x.g.x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1948.48444998  5607.18628793 -4584.50430574   566.35179234]
------
Step:29, Action:South
State  208
Old Q Values:  [ 1948.48444998  5607.18628793 -4584.50430574   566.35179234]
New Q values:  [ 1948.48444998  3479.35872565 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -599
xxxxx
x...x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4123.61403492 -6442.16912869 -8192.20126966  3915.73464193]
------
Step:30, Action:North
State  288
Old Q Values:  [ 4123.61403492 -6442.16912869 -8192.20126966  3915.73464193]
New Q values:  [ 2692.65323166 -6442.16912869 -8192.20126966  3915.73464193]
Reward: -1  Episode Reward:  -600
xxxxx
x.g.x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1948.48444998  3479.35872565 -4584.50430574   566.35179234]
------
Step:31, Action:South
State  208
Old Q Values:  [ 1948.48444998  3479.35872565 -4584.50430574   566.35179234]
New Q values:  [ 1948.48444998  2565.86388284 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -601
xxxxx
xg..x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2692.65323166 -6442.16912869 -8192.20126966  3915.73464193]
------
Step:32, Action:West
State  288
Old Q Values:  [ 2692.65323166 -6442.16912869 -8192.20126966  3915.73464193]
New Q values:  [ 2692.65323166 -6442.16912869 -8192.20126966  4300.82857696]
Reward: -1  Episode Reward:  -602
xxxxx
x.g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9117.11573395  8331.88448791]
------
Step:33, Action:East
State  273
Old Q Values:  [ 677.52857079 -168.92307549 3104.62256916 1592.56115583]
New Q values:  [ 677.52857079 -168.92307549 2531.49760075 1592.56115583]
Reward: -1  Episode Reward:  -603
xxxxx
x..gx
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2692.65323166 -6442.16912869 -8192.20126966  4300.82857696]
------
Step:34, Action:West
State  288
Old Q Values:  [ 2692.65323166 -6442.16912869 -8192.20126966  4300.82857696]
New Q values:  [ 2692.65323166 -6442.16912869 -8192.20126966  4454.86615097]
Reward: -1  Episode Reward:  -604
xxxxx
x.g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  9117.11573395  8331.88448791]
------
Step:35, Action:East
State  273
Old Q Values:  [ 677.52857079 -168.92307549 2531.49760075 1592.56115583]
New Q values:  [ 677.52857079 -168.92307549 2348.45888559 1592.56115583]
Reward: -1  Episode Reward:  -605
xxxxx
x..gx
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2692.65323166 -6442.16912869 -8192.20126966  4454.86615097]
------
Step:36, Action:West
State  288
Old Q Values:  [ 2692.65323166 -6442.16912869 -8192.20126966  4454.86615097]
New Q values:  [ 2692.65323166 -6442.16912869 -8192.20126966  2485.88412606]
Reward: -1  Episode Reward:  -606
xxxxx
x...x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 2348.45888559 1592.56115583]
------
Step:37, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  9117.11573395  8331.88448791]
New Q values:  [-2527.46239811 -8521.23367799  4454.04226308  8331.88448791]
Reward: -1  Episode Reward:  -607
xxxxx
x...x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2692.65323166 -6442.16912869 -8192.20126966  2485.88412606]
------
Step:38, Action:North
State  288
Old Q Values:  [ 2692.65323166 -6442.16912869 -8192.20126966  2485.88412606]
New Q values:  [ 4035.79367451 -6442.16912869 -8192.20126966  2485.88412606]
Reward: -1  Episode Reward:  -608
xxxxx
x...x
x .ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3490.28852548 9864.44127282  790.72804752 2961.44414407]
------
Step:39, Action:South
State  208
Old Q Values:  [ 1948.48444998  2565.86388284 -4584.50430574   566.35179234]
New Q values:  [ 1948.48444998  2236.48365549 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -609
xxxxx
x...x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4035.79367451 -6442.16912869 -8192.20126966  2485.88412606]
------
Step:40, Action:North
State  288
Old Q Values:  [ 4035.79367451 -6442.16912869 -8192.20126966  2485.88412606]
New Q values:  [ 2284.66256645 -6442.16912869 -8192.20126966  2485.88412606]
Reward: -1  Episode Reward:  -610
xxxxx
x...x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1948.48444998  2236.48365549 -4584.50430574   566.35179234]
------
Step:41, Action:South
State  210
Old Q Values:  [3490.28852548 9864.44127282  790.72804752 2961.44414407]
New Q values:  [3490.28852548 4690.94174695  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  -611
xxxxx
x...x
x . x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2284.66256645 -6442.16912869 -8192.20126966  2485.88412606]
------
Step:42, Action:West
State  288
Old Q Values:  [ 2284.66256645 -6442.16912869 -8192.20126966  2485.88412606]
New Q values:  [ 2284.66256645 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -10001  Episode Reward:  -10612
xxxxx
x...x
x . x
x g x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  1.63194774e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  7.80637474e+03  9.58218615e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  7.80637474e+03  4.50921956e+03 -6.46606132e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1948.48444998  2236.48365549 -4584.50430574   566.35179234]
------
Step:2, Action:South
State  208
Old Q Values:  [ 1948.48444998  2236.48365549 -4584.50430574   566.35179234]
New Q values:  [ 1948.48444998  1585.39223213 -4584.50430574   566.35179234]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x.  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2284.66256645 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:3, Action:North
State  288
Old Q Values:  [ 2284.66256645 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [ 1497.81036157 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xg ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1948.48444998  1585.39223213 -4584.50430574   566.35179234]
------
Step:4, Action:North
State  210
Old Q Values:  [3490.28852548 4690.94174695  790.72804752 2961.44414407]
New Q values:  [19882.07341977  4690.94174695   790.72804752  2961.44414407]
Reward: 9  Episode Reward:  26
xxxxx
x..ax
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 17064.17701373  -180.00807518 61601.86003193]
------
Step:5, Action:West
State  130
Old Q Values:  [46177.80406237 17064.17701373  -180.00807518 61601.86003193]
New Q values:  [46177.80406237 17064.17701373  -180.00807518 45378.94827235]
Reward: 9  Episode Reward:  35
xxxxx
x.a x
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  28498.32154925 69109.34753192]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  3171.1815055    248.57627421]
New Q values:  [ -281.736      -1150.91067548  3171.1815055    278.86963148]
Reward: 9  Episode Reward:  44
xxxxx
xa  x
x.g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  580.13040598  532.59323345 -252.78192178]
------
Step:7, Action:South
State  109
Old Q Values:  [ -241.10880094   517.93781181 -5413.46457526  -180.6       ]
New Q values:  [ -241.10880094   792.29486742 -5413.46457526  -180.6       ]
Reward: 9  Episode Reward:  53
xxxxx
x g x
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1932.39914233  588.19549948  154.04646645]
------
Step:8, Action:South
State  188
Old Q Values:  [-6523.78898263  1892.16995939  1470.30928837     0.        ]
New Q values:  [-6523.78898263  3698.64506154  1470.30928837     0.        ]
Reward: -1  Episode Reward:  52
xxxxx
xg  x
x   x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1103.54352259 -2735.46306511  9807.9235926  -5679.36893145]
------
Step:9, Action:East
State  256
Old Q Values:  [33845.23946664 10195.91087381  5576.40109469   644.94785455]
New Q values:  [33845.23946664 10195.91087381 64735.52578425   644.94785455]
Reward: 100009  Episode Reward:  100061
xxxxx
x   x
xg  x
x a x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  818.91451364  1805.45772249 -2062.97736574   262.76946019]
------
Step:1, Action:South
State  181
Old Q Values:  [  818.91451364  1805.45772249 -2062.97736574   262.76946019]
New Q values:  [  818.91451364  2228.65283037 -2062.97736574   262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 627.08913025   26.73544252 5003.56580457 1513.51439416]
------
Step:2, Action:East
State  261
Old Q Values:  [ 627.08913025   26.73544252 5003.56580457 1513.51439416]
New Q values:  [ 627.08913025   26.73544252 2711.36398751 1513.51439416]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
x .gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 2348.45888559 1592.56115583]
------
Step:3, Action:East
State  273
Old Q Values:  [ 677.52857079 -168.92307549 2348.45888559 1592.56115583]
New Q values:  [ 677.52857079 -168.92307549 1394.12666271 1592.56115583]
Reward: 9  Episode Reward:  27
xxxxx
x. gx
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1497.81036157 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:4, Action:North
State  288
Old Q Values:  [ 1497.81036157 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [ 1189.06947962 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: 9  Episode Reward:  36
xxxxx
x.g.x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1948.48444998  1585.39223213 -4584.50430574   566.35179234]
------
Step:5, Action:North
State  208
Old Q Values:  [ 1948.48444998  1585.39223213 -4584.50430574   566.35179234]
New Q values:  [14638.1349987   1585.39223213 -4584.50430574   566.35179234]
Reward: 9  Episode Reward:  45
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[46177.80406237 17064.17701373  -180.00807518 45378.94827235]
------
Step:6, Action:North
State  130
Old Q Values:  [46177.80406237 17064.17701373  -180.00807518 45378.94827235]
New Q values:  [32143.86284366 17064.17701373  -180.00807518 45378.94827235]
Reward: -301  Episode Reward:  -256
xxxxx
x. ax
x . x
x g x
xxxxx
Step:7, Action:North
State  130
Old Q Values:  [32143.86284366 17064.17701373  -180.00807518 45378.94827235]
New Q values:  [26290.62961917 17064.17701373  -180.00807518 45378.94827235]
Reward: -301  Episode Reward:  -557
xxxxx
x. ax
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 17064.17701373  -180.00807518 45378.94827235]
------
Step:8, Action:West
State  130
Old Q Values:  [26290.62961917 17064.17701373  -180.00807518 45378.94827235]
New Q values:  [26290.62961917 17064.17701373  -180.00807518 57151.75289165]
Reward: -1  Episode Reward:  -558
xxxxx
x.a x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.30002579e+05]
------
Step:9, Action:West
State  121
Old Q Values:  [    0.             0.         -7059.48304203   221.5544762 ]
New Q values:  [    0.             0.         -7059.48304203   331.71025071]
Reward: 9  Episode Reward:  -549
xxxxx
xa gx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   792.29486742 -5413.46457526  -180.6       ]
------
Step:10, Action:South
State  109
Old Q Values:  [ -241.10880094   792.29486742 -5413.46457526  -180.6       ]
New Q values:  [ -241.10880094   984.91379608 -5413.46457526  -180.6       ]
Reward: -1  Episode Reward:  -550
xxxxx
x g x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  818.91451364  2228.65283037 -2062.97736574   262.76946019]
------
Step:11, Action:South
State  180
Old Q Values:  [-2405.85343029   653.13291613  8317.42536224 -4966.32149798]
New Q values:  [-2405.85343029  3203.03024423  8317.42536224 -4966.32149798]
Reward: -1  Episode Reward:  -551
xxxxx
xg  x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1103.54352259 -2735.46306511  9807.9235926  -5679.36893145]
------
Step:12, Action:East
State  260
Old Q Values:  [ 1103.54352259 -2735.46306511  9807.9235926  -5679.36893145]
New Q values:  [ 1103.54352259 -2735.46306511  6422.13478341 -5679.36893145]
Reward: -1  Episode Reward:  -552
xxxxx
x   x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4454.04226308  8331.88448791]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4454.04226308  8331.88448791]
New Q values:  [-2527.46239811 -8521.23367799  4454.04226308  5258.79423019]
Reward: -1  Episode Reward:  -553
xxxxx
xg  x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1103.54352259 -2735.46306511  6422.13478341 -5679.36893145]
------
Step:14, Action:East
State  260
Old Q Values:  [ 1103.54352259 -2735.46306511  6422.13478341 -5679.36893145]
New Q values:  [ 1103.54352259 -2735.46306511  4145.89218242 -5679.36893145]
Reward: -1  Episode Reward:  -554
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4454.04226308  5258.79423019]
------
Step:15, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4454.04226308  5258.79423019]
New Q values:  [-2527.46239811 -8521.23367799  4454.04226308  2916.32688833]
Reward: -1  Episode Reward:  -555
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 627.08913025   26.73544252 2711.36398751 1513.51439416]
------
Step:16, Action:East
State  260
Old Q Values:  [ 1103.54352259 -2735.46306511  4145.89218242 -5679.36893145]
New Q values:  [ 1103.54352259 -2735.46306511  2993.96955189 -5679.36893145]
Reward: -1  Episode Reward:  -556
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4454.04226308  2916.32688833]
------
Step:17, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4454.04226308  2916.32688833]
New Q values:  [-2527.46239811 -8521.23367799  2137.73774912  2916.32688833]
Reward: -1  Episode Reward:  -557
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1189.06947962 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:18, Action:North
State  288
Old Q Values:  [ 1189.06947962 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [ 6439.64981778 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -1  Episode Reward:  -558
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[19882.07341977  4690.94174695   790.72804752  2961.44414407]
------
Step:19, Action:North
State  208
Old Q Values:  [14638.1349987   1585.39223213 -4584.50430574   566.35179234]
New Q values:  [ 6881.18065423  1585.39223213 -4584.50430574   566.35179234]
Reward: -1  Episode Reward:  -559
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.42175552e+03 -3.22965309e-01  2.26656412e+03]
------
Step:20, Action:South
State  130
Old Q Values:  [26290.62961917 17064.17701373  -180.00807518 57151.75289165]
New Q values:  [26290.62961917  8889.42500176  -180.00807518 57151.75289165]
Reward: -1  Episode Reward:  -560
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6881.18065423  1585.39223213 -4584.50430574   566.35179234]
------
Step:21, Action:North
State  210
Old Q Values:  [19882.07341977  4690.94174695   790.72804752  2961.44414407]
New Q values:  [8978.75602266 4690.94174695  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  -561
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.42175552e+03 -3.22965309e-01  2.26656412e+03]
------
Step:22, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.42175552e+03 -3.22965309e-01  2.26656412e+03]
New Q values:  [ 7.64171987e+01  4.06172901e+03 -3.22965309e-01  2.26656412e+03]
Reward: -1  Episode Reward:  -562
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8978.75602266 4690.94174695  790.72804752 2961.44414407]
------
Step:23, Action:North
State  210
Old Q Values:  [8978.75602266 4690.94174695  790.72804752 2961.44414407]
New Q values:  [4809.421113   4690.94174695  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  -563
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  4.06172901e+03 -3.22965309e-01  2.26656412e+03]
------
Step:24, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  4.06172901e+03 -3.22965309e-01  2.26656412e+03]
New Q values:  [ 7.64171987e+01  3.06691794e+03 -3.22965309e-01  2.26656412e+03]
Reward: -1  Episode Reward:  -564
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4809.421113   4690.94174695  790.72804752 2961.44414407]
------
Step:25, Action:North
State  210
Old Q Values:  [4809.421113   4690.94174695  790.72804752 2961.44414407]
New Q values:  [2843.24382695 4690.94174695  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  -565
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  3.06691794e+03 -3.22965309e-01  2.26656412e+03]
------
Step:26, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  3.06691794e+03 -3.22965309e-01  2.26656412e+03]
New Q values:  [ 7.64171987e+01  2.63344970e+03 -3.22965309e-01  2.26656412e+03]
Reward: -1  Episode Reward:  -566
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2843.24382695 4690.94174695  790.72804752 2961.44414407]
------
Step:27, Action:South
State  210
Old Q Values:  [2843.24382695 4690.94174695  790.72804752 2961.44414407]
New Q values:  [2843.24382695 3807.67164411  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  -567
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6439.64981778 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:28, Action:North
State  288
Old Q Values:  [ 6439.64981778 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [ 3717.56142035 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -1  Episode Reward:  -568
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2843.24382695 3807.67164411  790.72804752 2961.44414407]
------
Step:29, Action:West
State  208
Old Q Values:  [ 6881.18065423  1585.39223213 -4584.50430574   566.35179234]
New Q values:  [ 6881.18065423  1585.39223213 -4584.50430574 60882.56097653]
Reward: 100009  Episode Reward:  99441
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3171.1815055    278.86963148]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3171.1815055    278.86963148]
New Q values:  [ -281.736      -1150.91067548  2063.90751212   278.86963148]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.63344970e+03 -3.22965309e-01  2.26656412e+03]
------
Step:2, Action:South
State  136
Old Q Values:  [ -170.77177351  2296.83399502 -2383.80019164    83.05818806]
New Q values:  [ -170.77177351 19188.90189097 -2383.80019164    83.05818806]
Reward: 9  Episode Reward:  18
xxxxx
xg  x
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6881.18065423  1585.39223213 -4584.50430574 60882.56097653]
------
Step:3, Action:West
State  208
Old Q Values:  [ 6881.18065423  1585.39223213 -4584.50430574 60882.56097653]
New Q values:  [ 6881.18065423  1585.39223213 -4584.50430574 26700.33681241]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x.a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  7.80637474e+03  4.50921956e+03 -6.46606132e+03]
------
Step:4, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  7.80637474e+03  4.50921956e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  4.00284796e+03  4.50921956e+03 -6.46606132e+03]
Reward: 9  Episode Reward:  36
xxxxx
x.  x
x.g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2137.73774912  2916.32688833]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2137.73774912  2916.32688833]
New Q values:  [-2527.46239811 -8521.23367799  2137.73774912 20586.58849061]
Reward: -1  Episode Reward:  35
xxxxx
x.  x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[33845.23946664 10195.91087381 64735.52578425   644.94785455]
------
Step:6, Action:East
State  256
Old Q Values:  [33845.23946664 10195.91087381 64735.52578425   644.94785455]
New Q values:  [33845.23946664 10195.91087381 32069.58686088   644.94785455]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
x.  x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2137.73774912 20586.58849061]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2137.73774912 20586.58849061]
New Q values:  [-2527.46239811 -8521.23367799  1975.76352575 20586.58849061]
Reward: 9  Episode Reward:  43
xxxxx
x.  x
x.  x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3717.56142035 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:8, Action:North
State  288
Old Q Values:  [ 3717.56142035 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [ 2628.72606137 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -1  Episode Reward:  42
xxxxx
x.  x
x. ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2843.24382695 3807.67164411  790.72804752 2961.44414407]
------
Step:9, Action:West
State  208
Old Q Values:  [ 6881.18065423  1585.39223213 -4584.50430574 26700.33681241]
New Q values:  [ 6881.18065423  1585.39223213 -4584.50430574 11330.15498456]
Reward: -1  Episode Reward:  41
xxxxx
x.  x
x.agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2168.73419866 -4582.3674281    534.04109446]
------
Step:10, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  4.00284796e+03  4.50921956e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  7.77651573e+03  4.50921956e+03 -6.46606132e+03]
Reward: -1  Episode Reward:  40
xxxxx
x.  x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1975.76352575 20586.58849061]
------
Step:11, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1975.76352575 20586.58849061]
New Q values:  [-2527.46239811 -8521.23367799  1975.76352575 18387.60723623]
Reward: -1  Episode Reward:  39
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[33845.23946664 10195.91087381 32069.58686088   644.94785455]
------
Step:12, Action:East
State  256
Old Q Values:  [33845.23946664 10195.91087381 32069.58686088   644.94785455]
New Q values:  [33845.23946664 10195.91087381 18343.51691522   644.94785455]
Reward: -1  Episode Reward:  38
xxxxx
xg  x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1975.76352575 18387.60723623]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1975.76352575 18387.60723623]
New Q values:  [-2527.46239811 -8521.23367799  1975.76352575 17508.01473449]
Reward: -1  Episode Reward:  37
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[33845.23946664 10195.91087381 18343.51691522   644.94785455]
------
Step:14, Action:East
State  257
Old Q Values:  [43651.13086027  6367.89256635 16022.11762108  1875.31501677]
New Q values:  [43651.13086027  6367.89256635 11660.65146878  1875.31501677]
Reward: -1  Episode Reward:  36
xxxxx
x.  x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1975.76352575 17508.01473449]
------
Step:15, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1975.76352575 17508.01473449]
New Q values:  [-2527.46239811 -8521.23367799  1975.76352575 17156.17773379]
Reward: -1  Episode Reward:  35
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[33845.23946664 10195.91087381 18343.51691522   644.94785455]
------
Step:16, Action:East
State  257
Old Q Values:  [43651.13086027  6367.89256635 11660.65146878  1875.31501677]
New Q values:  [43651.13086027  6367.89256635  9810.51390765  1875.31501677]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1975.76352575 17156.17773379]
------
Step:17, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1975.76352575 17156.17773379]
New Q values:  [-2527.46239811 -8521.23367799  1975.76352575 17015.44293351]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[33845.23946664 10195.91087381 18343.51691522   644.94785455]
------
Step:18, Action:East
State  256
Old Q Values:  [33845.23946664 10195.91087381 18343.51691522   644.94785455]
New Q values:  [33845.23946664 10195.91087381 12441.43964614   644.94785455]
Reward: -1  Episode Reward:  32
xxxxx
xg  x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1975.76352575 17015.44293351]
------
Step:19, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1975.76352575 17015.44293351]
New Q values:  [-2527.46239811 -8521.23367799  1975.76352575 16959.1490134 ]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[33845.23946664 10195.91087381 12441.43964614   644.94785455]
------
Step:20, Action:East
State  256
Old Q Values:  [33845.23946664 10195.91087381 12441.43964614   644.94785455]
New Q values:  [33845.23946664 10195.91087381 10063.72056248   644.94785455]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
x.  x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1975.76352575 16959.1490134 ]
------
Step:21, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1975.76352575 16959.1490134 ]
New Q values:  [-2527.46239811 -8521.23367799  1578.32322871 16959.1490134 ]
Reward: -1  Episode Reward:  29
xxxxx
x.  x
x.  x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2628.72606137 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:22, Action:North
State  288
Old Q Values:  [ 2628.72606137 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [ 2193.19191778 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -1  Episode Reward:  28
xxxxx
x.  x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2843.24382695 3807.67164411  790.72804752 2961.44414407]
------
Step:23, Action:South
State  208
Old Q Values:  [ 6881.18065423  1585.39223213 -4584.50430574 11330.15498456]
New Q values:  [ 6881.18065423  1291.51446819 -4584.50430574 11330.15498456]
Reward: -1  Episode Reward:  27
xxxxx
x.  x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2193.19191778 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:24, Action:North
State  288
Old Q Values:  [ 2193.19191778 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [ 4275.72326248 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -1  Episode Reward:  26
xxxxx
xg  x
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6881.18065423  1291.51446819 -4584.50430574 11330.15498456]
------
Step:25, Action:West
State  208
Old Q Values:  [ 6881.18065423  1291.51446819 -4584.50430574 11330.15498456]
New Q values:  [ 6881.18065423  1291.51446819 -4584.50430574  6864.41671345]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
x.a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  7.77651573e+03  4.50921956e+03 -6.46606132e+03]
------
Step:26, Action:South
State  193
Old Q Values:  [-5922.26708831  2168.73419866 -4582.3674281    534.04109446]
New Q values:  [-5922.26708831  1344.66202621 -4582.3674281    534.04109446]
Reward: -1  Episode Reward:  24
xxxxx
x. gx
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 1394.12666271 1592.56115583]
------
Step:27, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1578.32322871 16959.1490134 ]
New Q values:  [-2527.46239811 -8521.23367799  1578.32322871 19878.39886344]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[43651.13086027  6367.89256635  9810.51390765  1875.31501677]
------
Step:28, Action:North
State  257
Old Q Values:  [43651.13086027  6367.89256635  9810.51390765  1875.31501677]
New Q values:  [41683.79819939  6367.89256635  9810.51390765  1875.31501677]
Reward: 9  Episode Reward:  32
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[80726.48618428 23344.73803738 10235.04946499     0.        ]
------
Step:29, Action:North
State  177
Old Q Values:  [80726.48618428 23344.73803738 10235.04946499     0.        ]
New Q values:  [109163.23336057  23344.73803738  10235.04946499      0.        ]
Reward: 100009  Episode Reward:  100041
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6881.18065423  1291.51446819 -4584.50430574  6864.41671345]
------
Step:1, Action:West
State  216
Old Q Values:  [ 1971.43838233  2109.58890912 -8896.20691497  2418.4316485 ]
New Q values:  [ 1971.43838233  2109.58890912 -8896.20691497  3305.72737902]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  7.77651573e+03  4.50921956e+03 -6.46606132e+03]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831  1344.66202621 -4582.3674281    534.04109446]
New Q values:  [-5922.26708831  1021.03315723 -4582.3674281    534.04109446]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x.  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 1394.12666271 1592.56115583]
------
Step:3, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 1394.12666271 1592.56115583]
New Q values:  [  677.52857079  -168.92307549  1394.12666271 13147.56392215]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41683.79819939  6367.89256635  9810.51390765  1875.31501677]
------
Step:4, Action:North
State  261
Old Q Values:  [ 627.08913025   26.73544252 2711.36398751 1513.51439416]
New Q values:  [3.30052057e+04 2.67354425e+01 2.71136399e+03 1.51351439e+03]
Reward: 9  Episode Reward:  36
xxxxx
x..gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  10235.04946499      0.        ]
------
Step:5, Action:North
State  181
Old Q Values:  [  818.91451364  2228.65283037 -2062.97736574   262.76946019]
New Q values:  [  656.1279728   2228.65283037 -2062.97736574   262.76946019]
Reward: 9  Episode Reward:  45
xxxxx
xa. x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1077.20722448  238.35800069    0.        ]
------
Step:6, Action:South
State  103
Old Q Values:  [ 221.30610858 1077.20722448  238.35800069    0.        ]
New Q values:  [ 221.30610858 1098.8787389   238.35800069    0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x . x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  656.1279728   2228.65283037 -2062.97736574   262.76946019]
------
Step:7, Action:South
State  183
Old Q Values:  [ 540.75283156 1354.41034513 1753.05719277  358.5166536 ]
New Q values:  [  540.75283156 10442.72583613  1753.05719277   358.5166536 ]
Reward: -1  Episode Reward:  43
xxxxx
x . x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3.30052057e+04 2.67354425e+01 2.71136399e+03 1.51351439e+03]
------
Step:8, Action:North
State  261
Old Q Values:  [3.30052057e+04 2.67354425e+01 2.71136399e+03 1.51351439e+03]
New Q values:  [16334.30001495    26.73544252  2711.36398751  1513.51439416]
Reward: -1  Episode Reward:  42
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  540.75283156 10442.72583613  1753.05719277   358.5166536 ]
------
Step:9, Action:South
State  183
Old Q Values:  [  540.75283156 10442.72583613  1753.05719277   358.5166536 ]
New Q values:  [ 540.75283156 9076.78033894 1753.05719277  358.5166536 ]
Reward: -1  Episode Reward:  41
xxxxx
x . x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[16334.30001495    26.73544252  2711.36398751  1513.51439416]
------
Step:10, Action:North
State  261
Old Q Values:  [16334.30001495    26.73544252  2711.36398751  1513.51439416]
New Q values:  [7201.71585509   26.73544252 2711.36398751 1513.51439416]
Reward: -1  Episode Reward:  40
xxxxx
x . x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  656.1279728   2228.65283037 -2062.97736574   262.76946019]
------
Step:11, Action:South
State  180
Old Q Values:  [-2405.85343029  3203.03024423  8317.42536224 -4966.32149798]
New Q values:  [-2405.85343029  2178.80296326  8317.42536224 -4966.32149798]
Reward: -1  Episode Reward:  39
xxxxx
x . x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1103.54352259 -2735.46306511  2993.96955189 -5679.36893145]
------
Step:12, Action:East
State  261
Old Q Values:  [7201.71585509   26.73544252 2711.36398751 1513.51439416]
New Q values:  [7201.71585509   26.73544252 7047.46525403 1513.51439416]
Reward: -1  Episode Reward:  38
xxxxx
x . x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1578.32322871 19878.39886344]
------
Step:13, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1578.32322871 19878.39886344]
New Q values:  [-2527.46239811 -8521.23367799  1578.32322871  8848.95041094]
Reward: -1  Episode Reward:  37
xxxxx
x . x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1103.54352259 -2735.46306511  2993.96955189 -5679.36893145]
------
Step:14, Action:East
State  260
Old Q Values:  [ 1103.54352259 -2735.46306511  2993.96955189 -5679.36893145]
New Q values:  [ 1103.54352259 -2735.46306511  3851.67294404 -5679.36893145]
Reward: -1  Episode Reward:  36
xxxxx
x . x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1578.32322871  8848.95041094]
------
Step:15, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1578.32322871  8848.95041094]
New Q values:  [-2527.46239811 -8521.23367799  1919.44627023  8848.95041094]
Reward: 9  Episode Reward:  45
xxxxx
x . x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4275.72326248 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:16, Action:North
State  288
Old Q Values:  [ 4275.72326248 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [ 3774.04350126 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -1  Episode Reward:  44
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6881.18065423  1291.51446819 -4584.50430574  6864.41671345]
------
Step:17, Action:North
State  208
Old Q Values:  [ 6881.18065423  1291.51446819 -4584.50430574  6864.41671345]
New Q values:  [19897.39812919  1291.51446819 -4584.50430574  6864.41671345]
Reward: -1  Episode Reward:  43
xxxxx
x .ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  8889.42500176  -180.00807518 57151.75289165]
------
Step:18, Action:West
State  128
Old Q Values:  [11374.93691792 11563.73606199 -8652.84       44428.56690885]
New Q values:  [11374.93691792 11563.73606199 -8652.84       99080.33825189]
Reward: 100009  Episode Reward:  100052
xxxxx
x agx
x   x
x   x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176  391.45858569 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 1098.8787389   238.35800069    0.        ]
New Q values:  [ 221.30610858 3167.98559724  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa .x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 9076.78033894 1753.05719277  358.5166536 ]
------
Step:2, Action:South
State  181
Old Q Values:  [  656.1279728   2228.65283037 -2062.97736574   262.76946019]
New Q values:  [  656.1279728   3057.37588867 -2062.97736574   262.76946019]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[7201.71585509   26.73544252 7047.46525403 1513.51439416]
------
Step:3, Action:North
State  260
Old Q Values:  [ 1103.54352259 -2735.46306511  3851.67294404 -5679.36893145]
New Q values:  [-3063.95498229 -2735.46306511  3851.67294404 -5679.36893145]
Reward: -10001  Episode Reward:  -9983
xxxxx
x ..x
xg .x
x ..x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1919.44627023  8848.95041094]
------
Step:1, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946  5943.73212686]
New Q values:  [   16.82637525 -5807.06396197   618.89767946  4543.40760727]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[7201.71585509   26.73544252 7047.46525403 1513.51439416]
------
Step:2, Action:North
State  261
Old Q Values:  [7201.71585509   26.73544252 7047.46525403 1513.51439416]
New Q values:  [3803.29910864   26.73544252 7047.46525403 1513.51439416]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  656.1279728   3057.37588867 -2062.97736574   262.76946019]
------
Step:3, Action:South
State  183
Old Q Values:  [ 540.75283156 9076.78033894 1753.05719277  358.5166536 ]
New Q values:  [ 540.75283156 5744.35171179 1753.05719277  358.5166536 ]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3803.29910864   26.73544252 7047.46525403 1513.51439416]
------
Step:4, Action:North
State  260
Old Q Values:  [-3063.95498229 -2735.46306511  3851.67294404 -5679.36893145]
New Q values:  [ -721.6483708  -2735.46306511  3851.67294404 -5679.36893145]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xa .x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -7507.54632711  1681.77874037     0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [ 540.75283156 5744.35171179 1753.05719277  358.5166536 ]
New Q values:  [ 540.75283156 5744.35171179 1244.78695138  358.5166536 ]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  1.63194774e+03]
------
Step:6, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  7.77651573e+03  4.50921956e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  7.77651573e+03  7.77830726e+03 -6.46606132e+03]
Reward: 9  Episode Reward:  24
xxxxx
x...x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19897.39812919  1291.51446819 -4584.50430574  6864.41671345]
------
Step:7, Action:North
State  208
Old Q Values:  [19897.39812919  1291.51446819 -4584.50430574  6864.41671345]
New Q values:  [25109.88511917  1291.51446819 -4584.50430574  6864.41671345]
Reward: 9  Episode Reward:  33
xxxxx
x..ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  8889.42500176  -180.00807518 57151.75289165]
------
Step:8, Action:West
State  130
Old Q Values:  [26290.62961917  8889.42500176  -180.00807518 57151.75289165]
New Q values:  [26290.62961917  8889.42500176  -180.00807518 43598.90541624]
Reward: 9  Episode Reward:  42
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  28498.32154925 69109.34753192]
------
Step:9, Action:West
State  124
Old Q Values:  [   0.         1166.51141701 4292.78893337  963.6944397 ]
New Q values:  [   0.         1166.51141701 4292.78893337  686.3519147 ]
Reward: 9  Episode Reward:  51
xxxxx
xag x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   984.91379608 -5413.46457526  -180.6       ]
------
Step:10, Action:South
State  108
Old Q Values:  [-8463.16477134  2569.03867731   845.00690416     0.        ]
New Q values:  [-8463.16477134  2136.60898938   845.00690416     0.        ]
Reward: -1  Episode Reward:  50
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  3698.64506154  1470.30928837     0.        ]
------
Step:11, Action:South
State  189
Old Q Values:  [ 275.08817949 1932.39914233  588.19549948  154.04646645]
New Q values:  [ 275.08817949 2886.59923314  588.19549948  154.04646645]
Reward: -1  Episode Reward:  49
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3803.29910864   26.73544252 7047.46525403 1513.51439416]
------
Step:12, Action:East
State  261
Old Q Values:  [3803.29910864   26.73544252 7047.46525403 1513.51439416]
New Q values:  [3803.29910864   26.73544252 4181.40838379 1513.51439416]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   618.89767946  4543.40760727]
------
Step:13, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946  4543.40760727]
New Q values:  [   16.82637525 -5807.06396197   618.89767946  2972.26492612]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -721.6483708  -2735.46306511  3851.67294404 -5679.36893145]
------
Step:14, Action:East
State  260
Old Q Values:  [ -721.6483708  -2735.46306511  3851.67294404 -5679.36893145]
New Q values:  [ -721.6483708  -2735.46306511  2431.74865545 -5679.36893145]
Reward: -1  Episode Reward:  46
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   618.89767946  2972.26492612]
------
Step:15, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946  2972.26492612]
New Q values:  [   16.82637525 -5807.06396197   618.89767946  2442.72848559]
Reward: -1  Episode Reward:  45
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3803.29910864   26.73544252 4181.40838379 1513.51439416]
------
Step:16, Action:East
State  261
Old Q Values:  [3803.29910864   26.73544252 4181.40838379 1513.51439416]
New Q values:  [3803.29910864   26.73544252 2404.78189919 1513.51439416]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   618.89767946  2442.72848559]
------
Step:17, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946  2442.72848559]
New Q values:  [   16.82637525 -5807.06396197   618.89767946  2117.48112683]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3803.29910864   26.73544252 2404.78189919 1513.51439416]
------
Step:18, Action:North
State  260
Old Q Values:  [ -721.6483708  -2735.46306511  2431.74865545 -5679.36893145]
New Q values:  [  249.20175939 -2735.46306511  2431.74865545 -5679.36893145]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  1.79487036e+03  0.00000000e+00]
------
Step:19, Action:East
State  191
Old Q Values:  [  3.06655861 970.40507756 302.52728443   0.        ]
New Q values:  [  3.06655861 970.40507756 513.29369084   0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[    0.         -1406.21014518  1309.6092569      0.        ]
------
Step:20, Action:East
State  204
Old Q Values:  [   0.         1387.82535955 2641.67340466  441.58769553]
New Q values:  [   0.         1387.82535955 2047.78757557  441.58769553]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1971.43838233  2109.58890912 -8896.20691497  3305.72737902]
------
Step:21, Action:South
State  208
Old Q Values:  [25109.88511917  1291.51446819 -4584.50430574  6864.41671345]
New Q values:  [25109.88511917 61654.21883765 -4584.50430574  6864.41671345]
Reward: 100009  Episode Reward:  100049
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3774.04350126 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:1, Action:North
State  288
Old Q Values:  [ 3774.04350126 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [20011.2830518  -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25109.88511917 61654.21883765 -4584.50430574  6864.41671345]
------
Step:2, Action:South
State  210
Old Q Values:  [2843.24382695 3807.67164411  790.72804752 2961.44414407]
New Q values:  [2843.24382695 7525.85357319  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.  x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20011.2830518  -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:3, Action:North
State  288
Old Q Values:  [20011.2830518  -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [10261.66929268 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2843.24382695 7525.85357319  790.72804752 2961.44414407]
------
Step:4, Action:South
State  210
Old Q Values:  [2843.24382695 7525.85357319  790.72804752 2961.44414407]
New Q values:  [2843.24382695 6088.24221708  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x.  x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10261.66929268 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:5, Action:North
State  288
Old Q Values:  [10261.66929268 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [ 5930.54038219 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2843.24382695 6088.24221708  790.72804752 2961.44414407]
------
Step:6, Action:South
State  208
Old Q Values:  [25109.88511917 61654.21883765 -4584.50430574  6864.41671345]
New Q values:  [25109.88511917 26440.24964972 -4584.50430574  6864.41671345]
Reward: -1  Episode Reward:  4
xxxxx
x...x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5930.54038219 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:7, Action:North
State  288
Old Q Values:  [ 5930.54038219 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [ 4303.69104779 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -10001  Episode Reward:  -9997
xxxxx
x...x
x. gx
x.. x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3803.29910864   26.73544252 2404.78189919 1513.51439416]
------
Step:1, Action:North
State  261
Old Q Values:  [3803.29910864   26.73544252 2404.78189919 1513.51439416]
New Q values:  [2443.93241006   26.73544252 2404.78189919 1513.51439416]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  656.1279728   3057.37588867 -2062.97736574   262.76946019]
------
Step:2, Action:South
State  181
Old Q Values:  [  656.1279728   3057.37588867 -2062.97736574   262.76946019]
New Q values:  [  656.1279728   1955.53007849 -2062.97736574   262.76946019]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2443.93241006   26.73544252 2404.78189919 1513.51439416]
------
Step:3, Action:North
State  261
Old Q Values:  [2443.93241006   26.73544252 2404.78189919 1513.51439416]
New Q values:  [1563.63198757   26.73544252 2404.78189919 1513.51439416]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xag x
x ..x
xxxxx
Step:4, Action:East
State  180
Old Q Values:  [-2405.85343029  2178.80296326  8317.42536224 -4966.32149798]
New Q values:  [-2405.85343029  2178.80296326  -178.40224643 -4966.32149798]
Reward: -10001  Episode Reward:  -9994
xxxxx
x...x
xg. x
x ..x
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2063.90751212   278.86963148]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2063.90751212   278.86963148]
New Q values:  [ -281.736      -1150.91067548  1620.99791477   278.86963148]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.63344970e+03 -3.22965309e-01  2.26656412e+03]
------
Step:2, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.63344970e+03 -3.22965309e-01  2.26656412e+03]
New Q values:  [ 7.64171987e+01  2.05049809e+03 -3.22965309e-01  2.26656412e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1971.43838233  2109.58890912 -8896.20691497  3305.72737902]
------
Step:3, Action:West
State  216
Old Q Values:  [ 1971.43838233  2109.58890912 -8896.20691497  3305.72737902]
New Q values:  [ 1971.43838233  2109.58890912 -8896.20691497 -4151.00858992]
Reward: -9991  Episode Reward:  -9973
xxxxx
x.  x
x g x
x...x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  677.52857079  -168.92307549  1394.12666271 13147.56392215]
------
Step:1, Action:West
State  273
Old Q Values:  [  677.52857079  -168.92307549  1394.12666271 13147.56392215]
New Q values:  [ 677.52857079 -168.92307549 1394.12666271 5985.86013862]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1563.63198757   26.73544252 2404.78189919 1513.51439416]
------
Step:2, Action:East
State  261
Old Q Values:  [1563.63198757   26.73544252 2404.78189919 1513.51439416]
New Q values:  [1563.63198757   26.73544252 3615.99788296 1513.51439416]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x.. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1919.44627023  8848.95041094]
------
Step:3, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 1394.12666271 5985.86013862]
New Q values:  [ 677.52857079 -168.92307549 1394.12666271 3478.54342034]
Reward: -1  Episode Reward:  7
xxxxx
x..gx
x.. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1563.63198757   26.73544252 3615.99788296 1513.51439416]
------
Step:4, Action:East
State  261
Old Q Values:  [1563.63198757   26.73544252 3615.99788296 1513.51439416]
New Q values:  [1563.63198757   26.73544252 4100.48427647 1513.51439416]
Reward: -1  Episode Reward:  6
xxxxx
x.g.x
x.. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1919.44627023  8848.95041094]
------
Step:5, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1919.44627023  8848.95041094]
New Q values:  [-2527.46239811 -8521.23367799  1919.44627023  4769.12544732]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x.g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1563.63198757   26.73544252 4100.48427647 1513.51439416]
------
Step:6, Action:East
State  260
Old Q Values:  [  249.20175939 -2735.46306511  2431.74865545 -5679.36893145]
New Q values:  [  249.20175939 -2735.46306511  2402.83709638 -5679.36893145]
Reward: -1  Episode Reward:  4
xxxxx
x...x
xg. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1919.44627023  4769.12544732]
------
Step:7, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1919.44627023  4769.12544732]
New Q values:  [-2527.46239811 -8521.23367799  1919.44627023 -3372.09869216]
Reward: -10001  Episode Reward:  -9997
xxxxx
x...x
x.. x
xg .x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 5744.35171179 1244.78695138  358.5166536 ]
------
Step:1, Action:South
State  181
Old Q Values:  [  656.1279728   1955.53007849 -2062.97736574   262.76946019]
New Q values:  [  656.1279728   2017.75731433 -2062.97736574   262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x .gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1563.63198757   26.73544252 4100.48427647 1513.51439416]
------
Step:2, Action:East
State  261
Old Q Values:  [1563.63198757   26.73544252 4100.48427647 1513.51439416]
New Q values:  [1563.63198757   26.73544252 2221.42759166 1513.51439416]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1919.44627023 -3372.09869216]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1919.44627023 -3372.09869216]
New Q values:  [-2527.46239811 -8521.23367799  2058.28582243 -3372.09869216]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4303.69104779 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:4, Action:North
State  288
Old Q Values:  [ 4303.69104779 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [ 9658.95131403 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: 9  Episode Reward:  26
xxxxx
xg..x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25109.88511917 26440.24964972 -4584.50430574  6864.41671345]
------
Step:5, Action:South
State  208
Old Q Values:  [25109.88511917 26440.24964972 -4584.50430574  6864.41671345]
New Q values:  [25109.88511917 13473.1852541  -4584.50430574  6864.41671345]
Reward: -1  Episode Reward:  25
xxxxx
x.g.x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9658.95131403 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:6, Action:North
State  288
Old Q Values:  [ 9658.95131403 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [11395.94606136 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -1  Episode Reward:  24
xxxxx
xg..x
x .ax
x   x
xxxxx
Step:7, Action:North
State  208
Old Q Values:  [25109.88511917 13473.1852541  -4584.50430574  6864.41671345]
New Q values:  [39773.45552323 13473.1852541  -4584.50430574  6864.41671345]
Reward: 9  Episode Reward:  33
xxxxx
x.gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 11563.73606199 -8652.84       99080.33825189]
------
Step:8, Action:South
State  130
Old Q Values:  [26290.62961917  8889.42500176  -180.00807518 43598.90541624]
New Q values:  [26290.62961917 15487.20665767  -180.00807518 43598.90541624]
Reward: -1  Episode Reward:  32
xxxxx
x.. x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[39773.45552323 13473.1852541  -4584.50430574  6864.41671345]
------
Step:9, Action:North
State  208
Old Q Values:  [39773.45552323 13473.1852541  -4584.50430574  6864.41671345]
New Q values:  [28988.45383416 13473.1852541  -4584.50430574  6864.41671345]
Reward: -1  Episode Reward:  31
xxxxx
x..ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 15487.20665767  -180.00807518 43598.90541624]
------
Step:10, Action:West
State  128
Old Q Values:  [11374.93691792 11563.73606199 -8652.84       99080.33825189]
New Q values:  [11374.93691792 11563.73606199 -8652.84       60941.0467891 ]
Reward: 9  Episode Reward:  40
xxxxx
x.agx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.          2847.11106414 -5999.38454759 71011.70496116]
------
Step:11, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1264.15236623   391.97749867]
New Q values:  [ -253.44886264 -1902.20915811  1264.15236623   477.36095   ]
Reward: 9  Episode Reward:  49
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176  391.45858569 -120.29354603]
------
Step:12, Action:South
State  99
Old Q Values:  [    0.         24340.91651165 56224.12962286     0.        ]
New Q values:  [    0.         42484.73661283 56224.12962286     0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  10235.04946499      0.        ]
------
Step:13, Action:North
State  181
Old Q Values:  [  656.1279728   2017.75731433 -2062.97736574   262.76946019]
New Q values:  [  577.02113965  2017.75731433 -2062.97736574   262.76946019]
Reward: -1  Episode Reward:  47
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176  391.45858569 -120.29354603]
------
Step:14, Action:South
State  109
Old Q Values:  [ -241.10880094   984.91379608 -5413.46457526  -180.6       ]
New Q values:  [ -241.10880094   998.69271273 -5413.46457526  -180.6       ]
Reward: -1  Episode Reward:  46
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  577.02113965  2017.75731433 -2062.97736574   262.76946019]
------
Step:15, Action:South
State  181
Old Q Values:  [  577.02113965  2017.75731433 -2062.97736574   262.76946019]
New Q values:  [  577.02113965  1472.93120323 -2062.97736574   262.76946019]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1563.63198757   26.73544252 2221.42759166 1513.51439416]
------
Step:16, Action:East
State  261
Old Q Values:  [1563.63198757   26.73544252 2221.42759166 1513.51439416]
New Q values:  [1563.63198757   26.73544252 1931.53406276 1513.51439416]
Reward: -1  Episode Reward:  44
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 1394.12666271 3478.54342034]
------
Step:17, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2058.28582243 -3372.09869216]
New Q values:  [-2527.46239811 -8521.23367799  2058.28582243  -769.97925804]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1563.63198757   26.73544252 1931.53406276 1513.51439416]
------
Step:18, Action:East
State  260
Old Q Values:  [  249.20175939 -2735.46306511  2402.83709638 -5679.36893145]
New Q values:  [  249.20175939 -2735.46306511  1578.02058528 -5679.36893145]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2058.28582243  -769.97925804]
------
Step:19, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2058.28582243  -769.97925804]
New Q values:  [-2527.46239811 -8521.23367799  4241.49814738  -769.97925804]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11395.94606136 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:20, Action:North
State  288
Old Q Values:  [11395.94606136 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [13254.3145748  -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -1  Episode Reward:  40
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28988.45383416 13473.1852541  -4584.50430574  6864.41671345]
------
Step:21, Action:South
State  208
Old Q Values:  [28988.45383416 13473.1852541  -4584.50430574  6864.41671345]
New Q values:  [28988.45383416  9364.96847408 -4584.50430574  6864.41671345]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13254.3145748  -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:22, Action:North
State  288
Old Q Values:  [13254.3145748  -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [13997.66198017 -6442.16912869 -8192.20126966 -2506.6810032 ]
Reward: -1  Episode Reward:  38
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28988.45383416  9364.96847408 -4584.50430574  6864.41671345]
------
Step:23, Action:South
State  208
Old Q Values:  [28988.45383416  9364.96847408 -4584.50430574  6864.41671345]
New Q values:  [28988.45383416  7944.68598368 -4584.50430574  6864.41671345]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13997.66198017 -6442.16912869 -8192.20126966 -2506.6810032 ]
------
Step:24, Action:West
State  288
Old Q Values:  [13997.66198017 -6442.16912869 -8192.20126966 -2506.6810032 ]
New Q values:  [13997.66198017 -6442.16912869 -8192.20126966    40.29062482]
Reward: -1  Episode Reward:  36
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 1394.12666271 3478.54342034]
------
Step:25, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4241.49814738  -769.97925804]
New Q values:  [-2527.46239811 -8521.23367799  4241.49814738   270.86851561]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1563.63198757   26.73544252 1931.53406276 1513.51439416]
------
Step:26, Action:East
State  261
Old Q Values:  [1563.63198757   26.73544252 1931.53406276 1513.51439416]
New Q values:  [1563.63198757   26.73544252 1815.57665121 1513.51439416]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 1394.12666271 3478.54342034]
------
Step:27, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4241.49814738   270.86851561]
New Q values:  [-2527.46239811 -8521.23367799  4241.49814738   652.42040161]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1563.63198757   26.73544252 1815.57665121 1513.51439416]
------
Step:28, Action:East
State  261
Old Q Values:  [1563.63198757   26.73544252 1815.57665121 1513.51439416]
New Q values:  [1563.63198757   26.73544252 1769.19368658 1513.51439416]
Reward: -1  Episode Reward:  32
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 1394.12666271 3478.54342034]
------
Step:29, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 1394.12666271 3478.54342034]
New Q values:  [ 677.52857079 -168.92307549 1394.12666271 1921.57547411]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1563.63198757   26.73544252 1769.19368658 1513.51439416]
------
Step:30, Action:East
State  261
Old Q Values:  [1563.63198757   26.73544252 1769.19368658 1513.51439416]
New Q values:  [1563.63198757   26.73544252 1283.55011687 1513.51439416]
Reward: -1  Episode Reward:  30
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 1394.12666271 1921.57547411]
------
Step:31, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4241.49814738   652.42040161]
New Q values:  [-2527.46239811 -8521.23367799  4241.49814738   729.45775691]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1563.63198757   26.73544252 1283.55011687 1513.51439416]
------
Step:32, Action:North
State  260
Old Q Values:  [  249.20175939 -2735.46306511  1578.02058528 -5679.36893145]
New Q values:  [  752.72159273 -2735.46306511  1578.02058528 -5679.36893145]
Reward: -1  Episode Reward:  28
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2405.85343029  2178.80296326  -178.40224643 -4966.32149798]
------
Step:33, Action:South
State  180
Old Q Values:  [-2405.85343029  2178.80296326  -178.40224643 -4966.32149798]
New Q values:  [-2405.85343029  1344.32736089  -178.40224643 -4966.32149798]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  752.72159273 -2735.46306511  1578.02058528 -5679.36893145]
------
Step:34, Action:East
State  257
Old Q Values:  [41683.79819939  6367.89256635  9810.51390765  1875.31501677]
New Q values:  [41683.79819939  6367.89256635  5196.05500727  1875.31501677]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  4241.49814738   729.45775691]
------
Step:35, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  4241.49814738   729.45775691]
New Q values:  [-2527.46239811 -8521.23367799  5895.297853     729.45775691]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13997.66198017 -6442.16912869 -8192.20126966    40.29062482]
------
Step:36, Action:North
State  288
Old Q Values:  [13997.66198017 -6442.16912869 -8192.20126966    40.29062482]
New Q values:  [14295.00094232 -6442.16912869 -8192.20126966    40.29062482]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28988.45383416  7944.68598368 -4584.50430574  6864.41671345]
------
Step:37, Action:North
State  208
Old Q Values:  [28988.45383416  7944.68598368 -4584.50430574  6864.41671345]
New Q values:  [12274.75077042  7944.68598368 -4584.50430574  6864.41671345]
Reward: -1  Episode Reward:  23
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.05049809e+03 -3.22965309e-01  2.26656412e+03]
------
Step:38, Action:West
State  136
Old Q Values:  [ -170.77177351 19188.90189097 -2383.80019164    83.05818806]
New Q values:  [ -170.77177351 19188.90189097 -2383.80019164   489.00881499]
Reward: -1  Episode Reward:  22
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1521.28513256 -1157.45542097]
------
Step:39, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1521.28513256 -1157.45542097]
New Q values:  [-9594.56523706 -8069.05606225  6364.58462031 -1157.45542097]
Reward: -1  Episode Reward:  21
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 19188.90189097 -2383.80019164   489.00881499]
------
Step:40, Action:South
State  130
Old Q Values:  [26290.62961917 15487.20665767  -180.00807518 43598.90541624]
New Q values:  [26290.62961917  9876.7078942   -180.00807518 43598.90541624]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12274.75077042  7944.68598368 -4584.50430574  6864.41671345]
------
Step:41, Action:North
State  208
Old Q Values:  [12274.75077042  7944.68598368 -4584.50430574  6864.41671345]
New Q values:  [ 5589.26954493  7944.68598368 -4584.50430574  6864.41671345]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.05049809e+03 -3.22965309e-01  2.26656412e+03]
------
Step:42, Action:West
State  138
Old Q Values:  [ 7.64171987e+01  2.05049809e+03 -3.22965309e-01  2.26656412e+03]
New Q values:  [ 7.64171987e+01  2.05049809e+03 -3.22965309e-01  1.39232502e+03]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1620.99791477   278.86963148]
------
Step:43, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1620.99791477   278.86963148]
New Q values:  [ -281.736      -1150.91067548  1262.94859399   278.86963148]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.05049809e+03 -3.22965309e-01  1.39232502e+03]
------
Step:44, Action:South
State  130
Old Q Values:  [26290.62961917  9876.7078942   -180.00807518 43598.90541624]
New Q values:  [26290.62961917  6333.48895278  -180.00807518 43598.90541624]
Reward: -1  Episode Reward:  16
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5589.26954493  7944.68598368 -4584.50430574  6864.41671345]
------
Step:45, Action:South
State  208
Old Q Values:  [ 5589.26954493  7944.68598368 -4584.50430574  6864.41671345]
New Q values:  [ 5589.26954493  7465.77467617 -4584.50430574  6864.41671345]
Reward: -1  Episode Reward:  15
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14295.00094232 -6442.16912869 -8192.20126966    40.29062482]
------
Step:46, Action:North
State  288
Old Q Values:  [14295.00094232 -6442.16912869 -8192.20126966    40.29062482]
New Q values:  [ 7957.13277978 -6442.16912869 -8192.20126966    40.29062482]
Reward: -1  Episode Reward:  14
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5589.26954493  7465.77467617 -4584.50430574  6864.41671345]
------
Step:47, Action:South
State  208
Old Q Values:  [ 5589.26954493  7465.77467617 -4584.50430574  6864.41671345]
New Q values:  [ 5589.26954493  5372.8497044  -4584.50430574  6864.41671345]
Reward: -1  Episode Reward:  13
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7957.13277978 -6442.16912869 -8192.20126966    40.29062482]
------
Step:48, Action:North
State  288
Old Q Values:  [ 7957.13277978 -6442.16912869 -8192.20126966    40.29062482]
New Q values:  [ 5241.57812594 -6442.16912869 -8192.20126966    40.29062482]
Reward: -1  Episode Reward:  12
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5589.26954493  5372.8497044  -4584.50430574  6864.41671345]
------
Step:49, Action:North
State  210
Old Q Values:  [2843.24382695 6088.24221708  790.72804752 2961.44414407]
New Q values:  [1751.84695886 6088.24221708  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  11
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 7.64171987e+01  2.05049809e+03 -3.22965309e-01  1.39232502e+03]
------
Step:50, Action:South
State  138
Old Q Values:  [ 7.64171987e+01  2.05049809e+03 -3.22965309e-01  1.39232502e+03]
New Q values:  [ 7.64171987e+01  2.64607190e+03 -3.22965309e-01  1.39232502e+03]
Reward: -1  Episode Reward:  10
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1751.84695886 6088.24221708  790.72804752 2961.44414407]
------
Step:51, Action:South
State  208
Old Q Values:  [ 5589.26954493  5372.8497044  -4584.50430574  6864.41671345]
New Q values:  [ 5589.26954493  3721.01331954 -4584.50430574  6864.41671345]
Reward: -1  Episode Reward:  9
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5241.57812594 -6442.16912869 -8192.20126966    40.29062482]
------
Step:52, Action:North
State  288
Old Q Values:  [ 5241.57812594 -6442.16912869 -8192.20126966    40.29062482]
New Q values:  [ 4155.35626441 -6442.16912869 -8192.20126966    40.29062482]
Reward: -1  Episode Reward:  8
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5589.26954493  3721.01331954 -4584.50430574  6864.41671345]
------
Step:53, Action:West
State  208
Old Q Values:  [ 5589.26954493  3721.01331954 -4584.50430574  6864.41671345]
New Q values:  [ 5589.26954493  3721.01331954 -4584.50430574 65084.65886401]
Reward: 100009  Episode Reward:  100017
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5589.26954493  3721.01331954 -4584.50430574 65084.65886401]
------
Step:1, Action:North
State  210
Old Q Values:  [1751.84695886 6088.24221708  790.72804752 2961.44414407]
New Q values:  [1499.96035431 6088.24221708  790.72804752 2961.44414407]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.  x
x.g.x
xxxxx
Step:2, Action:North
State  138
Old Q Values:  [ 7.64171987e+01  2.64607190e+03 -3.22965309e-01  1.39232502e+03]
New Q values:  [ 6.43788450e+02  2.64607190e+03 -3.22965309e-01  1.39232502e+03]
Reward: -301  Episode Reward:  -292
xxxxx
x..ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  2.64607190e+03 -3.22965309e-01  1.39232502e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  2.64607190e+03 -3.22965309e-01  1.39232502e+03]
New Q values:  [ 6.43788450e+02  1.45832264e+04 -3.22965309e-01  1.39232502e+03]
Reward: -10001  Episode Reward:  -10293
xxxxx
x.. x
x. gx
x...x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1021.03315723 -4582.3674281    534.04109446]
------
Step:1, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  7.77651573e+03  7.77830726e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  4.88459565e+03  7.77830726e+03 -6.46606132e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  5895.297853     729.45775691]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  5895.297853     729.45775691]
New Q values:  [-2527.46239811 -8521.23367799  3610.12602052   729.45775691]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4155.35626441 -6442.16912869 -8192.20126966    40.29062482]
------
Step:3, Action:North
State  288
Old Q Values:  [ 4155.35626441 -6442.16912869 -8192.20126966    40.29062482]
New Q values:  [ 3488.01517089 -6442.16912869 -8192.20126966    40.29062482]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1499.96035431 6088.24221708  790.72804752 2961.44414407]
------
Step:4, Action:South
State  210
Old Q Values:  [1499.96035431 6088.24221708  790.72804752 2961.44414407]
New Q values:  [1499.96035431 3481.1014381   790.72804752 2961.44414407]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3488.01517089 -6442.16912869 -8192.20126966    40.29062482]
------
Step:5, Action:North
State  288
Old Q Values:  [ 3488.01517089 -6442.16912869 -8192.20126966    40.29062482]
New Q values:  [20920.00372756 -6442.16912869 -8192.20126966    40.29062482]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5589.26954493  3721.01331954 -4584.50430574 65084.65886401]
------
Step:6, Action:North
State  208
Old Q Values:  [ 5589.26954493  3721.01331954 -4584.50430574 65084.65886401]
New Q values:  [ 7997.77838526  3721.01331954 -4584.50430574 65084.65886401]
Reward: 9  Episode Reward:  24
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 19188.90189097 -2383.80019164   489.00881499]
------
Step:7, Action:South
State  130
Old Q Values:  [26290.62961917  6333.48895278  -180.00807518 43598.90541624]
New Q values:  [26290.62961917 22058.19324032  -180.00807518 43598.90541624]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7997.77838526  3721.01331954 -4584.50430574 65084.65886401]
------
Step:8, Action:North
State  208
Old Q Values:  [ 7997.77838526  3721.01331954 -4584.50430574 65084.65886401]
New Q values:  [16278.18297898  3721.01331954 -4584.50430574 65084.65886401]
Reward: -1  Episode Reward:  22
xxxxx
x..ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 22058.19324032  -180.00807518 43598.90541624]
------
Step:9, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  1.45832264e+04 -3.22965309e-01  1.39232502e+03]
New Q values:  [ 6.43788450e+02  1.45832264e+04 -3.22965309e-01  9.41214588e+02]
Reward: 9  Episode Reward:  31
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1262.94859399   278.86963148]
------
Step:10, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  6364.58462031 -1157.45542097]
New Q values:  [-9594.56523706 -8069.05606225  8301.90441542 -1157.45542097]
Reward: -1  Episode Reward:  30
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 19188.90189097 -2383.80019164   489.00881499]
------
Step:11, Action:South
State  136
Old Q Values:  [ -170.77177351 19188.90189097 -2383.80019164   489.00881499]
New Q values:  [ -170.77177351 27200.35841559 -2383.80019164   489.00881499]
Reward: -1  Episode Reward:  29
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16278.18297898  3721.01331954 -4584.50430574 65084.65886401]
------
Step:12, Action:West
State  208
Old Q Values:  [16278.18297898  3721.01331954 -4584.50430574 65084.65886401]
New Q values:  [16278.18297898  3721.01331954 -4584.50430574 28366.75572424]
Reward: -1  Episode Reward:  28
xxxxx
x.g x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  4.88459565e+03  7.77830726e+03 -6.46606132e+03]
------
Step:13, Action:East
State  193
Old Q Values:  [-5922.26708831  1021.03315723 -4582.3674281    534.04109446]
New Q values:  [-5922.26708831  1021.03315723  6676.47974603   534.04109446]
Reward: -1  Episode Reward:  27
xxxxx
x. gx
x. ax
x.  x
xxxxx
Step:14, Action:North
State  208
Old Q Values:  [16278.18297898  3721.01331954 -4584.50430574 28366.75572424]
New Q values:  [ 9020.69990886  3721.01331954 -4584.50430574 28366.75572424]
Reward: -10001  Episode Reward:  -9974
xxxxx
x.  x
x. gx
x.  x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  577.02113965  1472.93120323 -2062.97736574   262.76946019]
------
Step:1, Action:South
State  181
Old Q Values:  [  577.02113965  1472.93120323 -2062.97736574   262.76946019]
New Q values:  [  577.02113965  1063.66207756 -2062.97736574   262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1563.63198757   26.73544252 1283.55011687 1513.51439416]
------
Step:2, Action:North
State  261
Old Q Values:  [1563.63198757   26.73544252 1283.55011687 1513.51439416]
New Q values:  [2348.15830856   26.73544252 1283.55011687 1513.51439416]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa. x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 5744.35171179 1244.78695138  358.5166536 ]
------
Step:3, Action:South
State  183
Old Q Values:  [ 540.75283156 5744.35171179 1244.78695138  358.5166536 ]
New Q values:  [ 540.75283156 3001.58817728 1244.78695138  358.5166536 ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2348.15830856   26.73544252 1283.55011687 1513.51439416]
------
Step:4, Action:North
State  261
Old Q Values:  [2348.15830856   26.73544252 1283.55011687 1513.51439416]
New Q values:  [1257.76194669   26.73544252 1283.55011687 1513.51439416]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  577.02113965  1063.66207756 -2062.97736574   262.76946019]
------
Step:5, Action:South
State  183
Old Q Values:  [ 540.75283156 3001.58817728 1244.78695138  358.5166536 ]
New Q values:  [ 540.75283156 1654.08958916 1244.78695138  358.5166536 ]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1257.76194669   26.73544252 1283.55011687 1513.51439416]
------
Step:6, Action:West
State  261
Old Q Values:  [1257.76194669   26.73544252 1283.55011687 1513.51439416]
New Q values:  [1257.76194669   26.73544252 1283.55011687  878.86007591]
Reward: -301  Episode Reward:  -296
xxxxx
x...x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1257.76194669   26.73544252 1283.55011687  878.86007591]
------
Step:7, Action:East
State  261
Old Q Values:  [1257.76194669   26.73544252 1283.55011687  878.86007591]
New Q values:  [ 1257.76194669    26.73544252 -4398.1421471    878.86007591]
Reward: -9991  Episode Reward:  -10287
xxxxx
x...x
x . x
x g.x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  1.45832264e+04 -3.22965309e-01  9.41214588e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  1.45832264e+04 -3.22965309e-01  9.41214588e+02]
New Q values:  [ 6.43788450e+02  6.88302100e+03 -3.22965309e-01  9.41214588e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1499.96035431 3481.1014381   790.72804752 2961.44414407]
------
Step:2, Action:South
State  210
Old Q Values:  [1499.96035431 3481.1014381   790.72804752 2961.44414407]
New Q values:  [1499.96035431 7673.84169351  790.72804752 2961.44414407]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20920.00372756 -6442.16912869 -8192.20126966    40.29062482]
------
Step:3, Action:North
State  288
Old Q Values:  [20920.00372756 -6442.16912869 -8192.20126966    40.29062482]
New Q values:  [16877.4282083  -6442.16912869 -8192.20126966    40.29062482]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9020.69990886  3721.01331954 -4584.50430574 28366.75572424]
------
Step:4, Action:North
State  208
Old Q Values:  [ 9020.69990886  3721.01331954 -4584.50430574 28366.75572424]
New Q values:  [ 5672.5862634   3721.01331954 -4584.50430574 28366.75572424]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  6.88302100e+03 -3.22965309e-01  9.41214588e+02]
------
Step:5, Action:South
State  136
Old Q Values:  [ -170.77177351 27200.35841559 -2383.80019164   489.00881499]
New Q values:  [ -170.77177351 19389.57008351 -2383.80019164   489.00881499]
Reward: -1  Episode Reward:  15
xxxxx
xg. x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5672.5862634   3721.01331954 -4584.50430574 28366.75572424]
------
Step:6, Action:West
State  208
Old Q Values:  [ 5672.5862634   3721.01331954 -4584.50430574 28366.75572424]
New Q values:  [ 5672.5862634   3721.01331954 -4584.50430574 13685.59446833]
Reward: 9  Episode Reward:  24
xxxxx
x.. x
xga x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  4.88459565e+03  7.77830726e+03 -6.46606132e+03]
------
Step:7, Action:East
State  200
Old Q Values:  [  62.8218634  1737.6681949  1381.66096879 1141.49622464]
New Q values:  [  62.8218634  1737.6681949  1184.94106025 1141.49622464]
Reward: -1  Episode Reward:  23
xxxxx
xg. x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1971.43838233  2109.58890912 -8896.20691497 -4151.00858992]
------
Step:8, Action:South
State  216
Old Q Values:  [ 1971.43838233  2109.58890912 -8896.20691497 -4151.00858992]
New Q values:  [ 1971.43838233  5906.46402614 -8896.20691497 -4151.00858992]
Reward: -1  Episode Reward:  22
xxxxx
xg. x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16877.4282083  -6442.16912869 -8192.20126966    40.29062482]
------
Step:9, Action:North
State  288
Old Q Values:  [16877.4282083  -6442.16912869 -8192.20126966    40.29062482]
New Q values:  [10856.04962382 -6442.16912869 -8192.20126966    40.29062482]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5672.5862634   3721.01331954 -4584.50430574 13685.59446833]
------
Step:10, Action:West
State  216
Old Q Values:  [ 1971.43838233  5906.46402614 -8896.20691497 -4151.00858992]
New Q values:  [ 1971.43838233  5906.46402614 -8896.20691497 -1139.7029775 ]
Reward: -1  Episode Reward:  20
xxxxx
xg. x
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1737.6681949  1184.94106025 1141.49622464]
------
Step:11, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  4.88459565e+03  7.77830726e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  3.04227607e+03  7.77830726e+03 -6.46606132e+03]
Reward: 9  Episode Reward:  29
xxxxx
x.g x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3610.12602052   729.45775691]
------
Step:12, Action:East
State  273
Old Q Values:  [ 677.52857079 -168.92307549 1394.12666271 1921.57547411]
New Q values:  [ 677.52857079 -168.92307549 3813.86555223 1921.57547411]
Reward: -1  Episode Reward:  28
xxxxx
x..gx
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10856.04962382 -6442.16912869 -8192.20126966    40.29062482]
------
Step:13, Action:North
State  288
Old Q Values:  [10856.04962382 -6442.16912869 -8192.20126966    40.29062482]
New Q values:  [ 8447.49819003 -6442.16912869 -8192.20126966    40.29062482]
Reward: -1  Episode Reward:  27
xxxxx
x.g x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5672.5862634   3721.01331954 -4584.50430574 13685.59446833]
------
Step:14, Action:West
State  208
Old Q Values:  [ 5672.5862634   3721.01331954 -4584.50430574 13685.59446833]
New Q values:  [ 5672.5862634   3721.01331954 -4584.50430574  7476.58171114]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1021.03315723  6676.47974603   534.04109446]
------
Step:15, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  3.04227607e+03  7.77830726e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  3.04227607e+03  5.35369742e+03 -6.46606132e+03]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5672.5862634   3721.01331954 -4584.50430574  7476.58171114]
------
Step:16, Action:West
State  208
Old Q Values:  [ 5672.5862634   3721.01331954 -4584.50430574  7476.58171114]
New Q values:  [ 5672.5862634   3721.01331954 -4584.50430574  4596.14190991]
Reward: -1  Episode Reward:  24
xxxxx
xg. x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  3.04227607e+03  5.35369742e+03 -6.46606132e+03]
------
Step:17, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  3.04227607e+03  5.35369742e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  3.04227607e+03  3.84265485e+03 -6.46606132e+03]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5672.5862634   3721.01331954 -4584.50430574  4596.14190991]
------
Step:18, Action:North
State  208
Old Q Values:  [ 5672.5862634   3721.01331954 -4584.50430574  4596.14190991]
New Q values:  [14550.74854209  3721.01331954 -4584.50430574  4596.14190991]
Reward: -10001  Episode Reward:  -9978
xxxxx
x..gx
x   x
x.  x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.81388025e+03  1.63194774e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  3.04227607e+03  3.84265485e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  3.04227607e+03  5.90768650e+03 -6.46606132e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14550.74854209  3721.01331954 -4584.50430574  4596.14190991]
------
Step:2, Action:North
State  208
Old Q Values:  [14550.74854209  3721.01331954 -4584.50430574  4596.14190991]
New Q values:  [18905.37104171  3721.01331954 -4584.50430574  4596.14190991]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 22058.19324032  -180.00807518 43598.90541624]
------
Step:3, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  6.88302100e+03 -3.22965309e-01  9.41214588e+02]
New Q values:  [ 6.43788450e+02  6.88302100e+03 -3.22965309e-01  7.60770413e+02]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1262.94859399   278.86963148]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1262.94859399   278.86963148]
New Q values:  [ -281.736      -1150.91067548  2569.48573745   278.86963148]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  6.88302100e+03 -3.22965309e-01  7.60770413e+02]
------
Step:5, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  6.88302100e+03 -3.22965309e-01  7.60770413e+02]
New Q values:  [ 6.43788450e+02  5.05476091e+03 -3.22965309e-01  7.60770413e+02]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1499.96035431 7673.84169351  790.72804752 2961.44414407]
------
Step:6, Action:West
State  208
Old Q Values:  [18905.37104171  3721.01331954 -4584.50430574  4596.14190991]
New Q values:  [18905.37104171  3721.01331954 -4584.50430574  3840.80068778]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x.agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1021.03315723  6676.47974603   534.04109446]
------
Step:7, Action:South
State  192
Old Q Values:  [ 3.89777037e-01  3.04227607e+03  5.90768650e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  2.29934823e+03  5.90768650e+03 -6.46606132e+03]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x.g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3610.12602052   729.45775691]
------
Step:8, Action:East
State  273
Old Q Values:  [ 677.52857079 -168.92307549 3813.86555223 1921.57547411]
New Q values:  [ 677.52857079 -168.92307549 4065.1956779  1921.57547411]
Reward: 9  Episode Reward:  32
xxxxx
x.  x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8447.49819003 -6442.16912869 -8192.20126966    40.29062482]
------
Step:9, Action:West
State  288
Old Q Values:  [ 8447.49819003 -6442.16912869 -8192.20126966    40.29062482]
New Q values:  [ 8447.49819003 -6442.16912869 -8192.20126966  1098.55405609]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3610.12602052   729.45775691]
------
Step:10, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3610.12602052   729.45775691]
New Q values:  [-2527.46239811 -8521.23367799  3977.69986522   729.45775691]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8447.49819003 -6442.16912869 -8192.20126966  1098.55405609]
------
Step:11, Action:North
State  288
Old Q Values:  [ 8447.49819003 -6442.16912869 -8192.20126966  1098.55405609]
New Q values:  [ 9050.01058852 -6442.16912869 -8192.20126966  1098.55405609]
Reward: -1  Episode Reward:  29
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18905.37104171  3721.01331954 -4584.50430574  3840.80068778]
------
Step:12, Action:North
State  208
Old Q Values:  [18905.37104171  3721.01331954 -4584.50430574  3840.80068778]
New Q values:  [ 9077.97668904  3721.01331954 -4584.50430574  3840.80068778]
Reward: -1  Episode Reward:  28
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  5.05476091e+03 -3.22965309e-01  7.60770413e+02]
------
Step:13, Action:West
State  136
Old Q Values:  [ -170.77177351 19389.57008351 -2383.80019164   489.00881499]
New Q values:  [ -170.77177351 19389.57008351 -2383.80019164   294.51660121]
Reward: -1  Episode Reward:  27
xxxxx
x.agx
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7059.48304203   331.71025071]
------
Step:14, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1264.15236623   477.36095   ]
New Q values:  [ -253.44886264 -1902.20915811  1264.15236623   370.38350179]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  580.13040598  532.59323345 -252.78192178]
------
Step:15, Action:South
State  107
Old Q Values:  [-252.35169558  580.13040598  532.59323345 -252.78192178]
New Q values:  [-252.35169558  380.26082847  532.59323345 -252.78192178]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 476.02888691    0.           29.66548264 -178.98      ]
------
Step:16, Action:North
State  185
Old Q Values:  [ 476.02888691    0.           29.66548264 -178.98      ]
New Q values:  [ 349.5895248     0.           29.66548264 -178.98      ]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  380.26082847  532.59323345 -252.78192178]
------
Step:17, Action:East
State  105
Old Q Values:  [ -180.6         1827.67773756 -7438.53829696     0.        ]
New Q values:  [ -180.6         1827.67773756 -2876.50224357     0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7059.48304203   331.71025071]
------
Step:18, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1264.15236623   370.38350179]
New Q values:  [ -253.44886264 -1902.20915811  1264.15236623   307.33137075]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  380.26082847  532.59323345 -252.78192178]
------
Step:19, Action:East
State  107
Old Q Values:  [-252.35169558  380.26082847  532.59323345 -252.78192178]
New Q values:  [-252.35169558  380.26082847  591.68300325 -252.78192178]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1264.15236623   307.33137075]
------
Step:20, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2569.48573745   278.86963148]
New Q values:  [ -281.736      -1150.91067548  2543.62256734   278.86963148]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  5.05476091e+03 -3.22965309e-01  7.60770413e+02]
------
Step:21, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  5.05476091e+03 -3.22965309e-01  7.60770413e+02]
New Q values:  [ 6.43788450e+02  5.13247767e+03 -3.22965309e-01  7.60770413e+02]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  292.53315687 10370.57767862     0.          1039.23717408]
------
Step:22, Action:West
State  218
Old Q Values:  [  292.53315687 10370.57767862     0.          1039.23717408]
New Q values:  [  292.53315687 10370.57767862     0.          1627.78019493]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  4042.28441766     0.        ]
------
Step:23, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.22476467e+04 1.95105832e+03 9.06816004e+03]
New Q values:  [3.60604218e+00 1.22476467e+04 3.89099663e+03 9.06816004e+03]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[  292.53315687 10370.57767862     0.          1627.78019493]
------
Step:24, Action:West
State  218
Old Q Values:  [  292.53315687 10370.57767862     0.          1627.78019493]
New Q values:  [  292.53315687 10370.57767862     0.          1863.19740327]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  4042.28441766     0.        ]
------
Step:25, Action:East
State  200
Old Q Values:  [  62.8218634  1737.6681949  1184.94106025 1141.49622464]
New Q values:  [  62.8218634  1737.6681949  2245.31563194 1141.49622464]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1971.43838233  5906.46402614 -8896.20691497 -1139.7029775 ]
------
Step:26, Action:South
State  218
Old Q Values:  [  292.53315687 10370.57767862     0.          1863.19740327]
New Q values:  [ 292.53315687 6862.634248      0.         1863.19740327]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9050.01058852 -6442.16912869 -8192.20126966  1098.55405609]
------
Step:27, Action:North
State  288
Old Q Values:  [ 9050.01058852 -6442.16912869 -8192.20126966  1098.55405609]
New Q values:  [ 5391.34344325 -6442.16912869 -8192.20126966  1098.55405609]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1971.43838233  5906.46402614 -8896.20691497 -1139.7029775 ]
------
Step:28, Action:South
State  216
Old Q Values:  [ 1971.43838233  5906.46402614 -8896.20691497 -1139.7029775 ]
New Q values:  [ 1971.43838233  3979.38864343 -8896.20691497 -1139.7029775 ]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5391.34344325 -6442.16912869 -8192.20126966  1098.55405609]
------
Step:29, Action:North
State  288
Old Q Values:  [ 5391.34344325 -6442.16912869 -8192.20126966  1098.55405609]
New Q values:  [ 3349.75397033 -6442.16912869 -8192.20126966  1098.55405609]
Reward: -1  Episode Reward:  31
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1971.43838233  3979.38864343 -8896.20691497 -1139.7029775 ]
------
Step:30, Action:South
State  216
Old Q Values:  [ 1971.43838233  3979.38864343 -8896.20691497 -1139.7029775 ]
New Q values:  [ 1971.43838233  2596.08164847 -8896.20691497 -1139.7029775 ]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3349.75397033 -6442.16912869 -8192.20126966  1098.55405609]
------
Step:31, Action:North
State  288
Old Q Values:  [ 3349.75397033 -6442.16912869 -8192.20126966  1098.55405609]
New Q values:  [ 2118.12608267 -6442.16912869 -8192.20126966  1098.55405609]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1971.43838233  2596.08164847 -8896.20691497 -1139.7029775 ]
------
Step:32, Action:South
State  216
Old Q Values:  [ 1971.43838233  2596.08164847 -8896.20691497 -1139.7029775 ]
New Q values:  [ 1971.43838233  1673.27048419 -8896.20691497 -1139.7029775 ]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2118.12608267 -6442.16912869 -8192.20126966  1098.55405609]
------
Step:33, Action:West
State  288
Old Q Values:  [ 2118.12608267 -6442.16912869 -8192.20126966  1098.55405609]
New Q values:  [ 2118.12608267 -6442.16912869 -8192.20126966  1632.131582  ]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3977.69986522   729.45775691]
------
Step:34, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3977.69986522   729.45775691]
New Q values:  [-2527.46239811 -8521.23367799  2225.91777089   729.45775691]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2118.12608267 -6442.16912869 -8192.20126966  1632.131582  ]
------
Step:35, Action:North
State  288
Old Q Values:  [ 2118.12608267 -6442.16912869 -8192.20126966  1632.131582  ]
New Q values:  [ 1438.08194777 -6442.16912869 -8192.20126966  1632.131582  ]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1971.43838233  1673.27048419 -8896.20691497 -1139.7029775 ]
------
Step:36, Action:North
State  216
Old Q Values:  [ 1971.43838233  1673.27048419 -8896.20691497 -1139.7029775 ]
New Q values:  [ 2327.71865295  1673.27048419 -8896.20691497 -1139.7029775 ]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  5.13247767e+03 -3.22965309e-01  7.60770413e+02]
------
Step:37, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  5.13247767e+03 -3.22965309e-01  7.60770413e+02]
New Q values:  [ 6.43788450e+02  5.13247767e+03 -3.22965309e-01  1.06679494e+03]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2543.62256734   278.86963148]
------
Step:38, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2543.62256734   278.86963148]
New Q values:  [ -281.736      -1150.91067548  2556.59232695   278.86963148]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  5.13247767e+03 -3.22965309e-01  1.06679494e+03]
------
Step:39, Action:South
State  130
Old Q Values:  [26290.62961917 22058.19324032  -180.00807518 43598.90541624]
New Q values:  [26290.62961917 11124.82980418  -180.00807518 43598.90541624]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1499.96035431 7673.84169351  790.72804752 2961.44414407]
------
Step:40, Action:South
State  216
Old Q Values:  [ 2327.71865295  1673.27048419 -8896.20691497 -1139.7029775 ]
New Q values:  [ 2327.71865295  1158.34766828 -8896.20691497 -1139.7029775 ]
Reward: -1  Episode Reward:  20
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1438.08194777 -6442.16912869 -8192.20126966  1632.131582  ]
------
Step:41, Action:West
State  288
Old Q Values:  [ 1438.08194777 -6442.16912869 -8192.20126966  1632.131582  ]
New Q values:  [ 1438.08194777 -6442.16912869 -8192.20126966  1320.02796407]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2225.91777089   729.45775691]
------
Step:42, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2225.91777089   729.45775691]
New Q values:  [-2527.46239811 -8521.23367799  1321.19169269   729.45775691]
Reward: -1  Episode Reward:  18
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1438.08194777 -6442.16912869 -8192.20126966  1320.02796407]
------
Step:43, Action:North
State  288
Old Q Values:  [ 1438.08194777 -6442.16912869 -8192.20126966  1320.02796407]
New Q values:  [ 1272.94837499 -6442.16912869 -8192.20126966  1320.02796407]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2327.71865295  1158.34766828 -8896.20691497 -1139.7029775 ]
------
Step:44, Action:North
State  216
Old Q Values:  [ 2327.71865295  1158.34766828 -8896.20691497 -1139.7029775 ]
New Q values:  [ 6747.35848623  1158.34766828 -8896.20691497 -1139.7029775 ]
Reward: -1  Episode Reward:  16
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 19389.57008351 -2383.80019164   294.51660121]
------
Step:45, Action:South
State  136
Old Q Values:  [ -170.77177351 19389.57008351 -2383.80019164   294.51660121]
New Q values:  [ -170.77177351  9779.43557927 -2383.80019164   294.51660121]
Reward: -1  Episode Reward:  15
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 6747.35848623  1158.34766828 -8896.20691497 -1139.7029775 ]
------
Step:46, Action:South
State  216
Old Q Values:  [ 6747.35848623  1158.34766828 -8896.20691497 -1139.7029775 ]
New Q values:  [ 6747.35848623   858.74745653 -8896.20691497 -1139.7029775 ]
Reward: -1  Episode Reward:  14
xxxxx
x  gx
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1272.94837499 -6442.16912869 -8192.20126966  1320.02796407]
------
Step:47, Action:West
State  288
Old Q Values:  [ 1272.94837499 -6442.16912869 -8192.20126966  1320.02796407]
New Q values:  [ 1272.94837499 -6442.16912869 -8192.20126966  1746.969889  ]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 4065.1956779  1921.57547411]
------
Step:48, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1321.19169269   729.45775691]
New Q values:  [-2527.46239811 -8521.23367799  1051.96764377   729.45775691]
Reward: -1  Episode Reward:  12
xxxxx
x   x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1272.94837499 -6442.16912869 -8192.20126966  1746.969889  ]
------
Step:49, Action:West
State  288
Old Q Values:  [ 1272.94837499 -6442.16912869 -8192.20126966  1746.969889  ]
New Q values:  [ 1272.94837499 -6442.16912869 -8192.20126966  1013.77824873]
Reward: -1  Episode Reward:  11
xxxxx
x   x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1051.96764377   729.45775691]
------
Step:50, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1051.96764377   729.45775691]
New Q values:  [-2527.46239811 -8521.23367799   802.07157001   729.45775691]
Reward: -1  Episode Reward:  10
xxxxx
xg  x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1272.94837499 -6442.16912869 -8192.20126966  1013.77824873]
------
Step:51, Action:North
State  288
Old Q Values:  [ 1272.94837499 -6442.16912869 -8192.20126966  1013.77824873]
New Q values:  [ 2532.78689587 -6442.16912869 -8192.20126966  1013.77824873]
Reward: -1  Episode Reward:  9
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 6747.35848623   858.74745653 -8896.20691497 -1139.7029775 ]
------
Step:52, Action:North
State  216
Old Q Values:  [ 6747.35848623   858.74745653 -8896.20691497 -1139.7029775 ]
New Q values:  [ 5632.17406827   858.74745653 -8896.20691497 -1139.7029775 ]
Reward: -1  Episode Reward:  8
xxxxx
xg ax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  9779.43557927 -2383.80019164   294.51660121]
------
Step:53, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  5.13247767e+03 -3.22965309e-01  1.06679494e+03]
New Q values:  [ 6.43788450e+02  3.74204329e+03 -3.22965309e-01  1.06679494e+03]
Reward: -1  Episode Reward:  7
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5632.17406827   858.74745653 -8896.20691497 -1139.7029775 ]
------
Step:54, Action:North
State  210
Old Q Values:  [1499.96035431 7673.84169351  790.72804752 2961.44414407]
New Q values:  [13679.0557666   7673.84169351   790.72804752  2961.44414407]
Reward: -1  Episode Reward:  6
xxxxx
x  ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 11124.82980418  -180.00807518 43598.90541624]
------
Step:55, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  3.74204329e+03 -3.22965309e-01  1.06679494e+03]
New Q values:  [ 6.43788450e+02  3.74204329e+03 -3.22965309e-01  1.19309567e+03]
Reward: -1  Episode Reward:  5
xxxxx
x a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2556.59232695   278.86963148]
------
Step:56, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1264.15236623   307.33137075]
New Q values:  [ -253.44886264 -1902.20915811  1627.67393265   307.33137075]
Reward: -1  Episode Reward:  4
xxxxx
x  ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  3.74204329e+03 -3.22965309e-01  1.19309567e+03]
------
Step:57, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  3.74204329e+03 -3.22965309e-01  1.19309567e+03]
New Q values:  [ 6.43788450e+02 -2.81413046e+03 -3.22965309e-01  1.19309567e+03]
Reward: -10001  Episode Reward:  -9997
xxxxx
x   x
x  gx
x.  x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176  391.45858569 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 3167.98559724  238.35800069    0.        ]
New Q values:  [ 221.30610858 1768.82111565  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 540.75283156 1654.08958916 1244.78695138  358.5166536 ]
------
Step:2, Action:South
State  183
Old Q Values:  [ 540.75283156 1654.08958916 1244.78695138  358.5166536 ]
New Q values:  [ 540.75283156 1044.36441967 1244.78695138  358.5166536 ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x ..x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1257.76194669    26.73544252 -4398.1421471    878.86007591]
------
Step:3, Action:North
State  261
Old Q Values:  [ 1257.76194669    26.73544252 -4398.1421471    878.86007591]
New Q values:  [  821.60340195    26.73544252 -4398.1421471    878.86007591]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  577.02113965  1063.66207756 -2062.97736574   262.76946019]
------
Step:4, Action:South
State  183
Old Q Values:  [ 540.75283156 1044.36441967 1244.78695138  358.5166536 ]
New Q values:  [ 540.75283156  680.80379064 1244.78695138  358.5166536 ]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x ..x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  821.60340195    26.73544252 -4398.1421471    878.86007591]
------
Step:5, Action:West
State  261
Old Q Values:  [  821.60340195    26.73544252 -4398.1421471    878.86007591]
New Q values:  [  821.60340195    26.73544252 -4398.1421471    434.60205314]
Reward: -301  Episode Reward:  -285
xxxxx
x ..x
x .gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  821.60340195    26.73544252 -4398.1421471    434.60205314]
------
Step:6, Action:North
State  261
Old Q Values:  [  821.60340195    26.73544252 -4398.1421471    434.60205314]
New Q values:  [  647.13998405    26.73544252 -4398.1421471    434.60205314]
Reward: -1  Episode Reward:  -286
xxxxx
x ..x
xag.x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  577.02113965  1063.66207756 -2062.97736574   262.76946019]
------
Step:7, Action:South
State  180
Old Q Values:  [-2405.85343029  1344.32736089  -178.40224643 -4966.32149798]
New Q values:  [-2405.85343029  1010.53711994  -178.40224643 -4966.32149798]
Reward: -1  Episode Reward:  -287
xxxxx
x ..x
xg..x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  752.72159273 -2735.46306511  1578.02058528 -5679.36893145]
------
Step:8, Action:East
State  260
Old Q Values:  [  752.72159273 -2735.46306511  1578.02058528 -5679.36893145]
New Q values:  [  752.72159273 -2735.46306511   877.22970511 -5679.36893145]
Reward: 9  Episode Reward:  -278
xxxxx
x ..x
x ..x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   802.07157001   729.45775691]
------
Step:9, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   802.07157001   729.45775691]
New Q values:  [-2527.46239811 -8521.23367799  1080.06469676   729.45775691]
Reward: -1  Episode Reward:  -279
xxxxx
x ..x
x ..x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2532.78689587 -6442.16912869 -8192.20126966  1013.77824873]
------
Step:10, Action:North
State  288
Old Q Values:  [ 2532.78689587 -6442.16912869 -8192.20126966  1013.77824873]
New Q values:  [ 5122.23148833 -6442.16912869 -8192.20126966  1013.77824873]
Reward: 9  Episode Reward:  -270
xxxxx
x ..x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13679.0557666   7673.84169351   790.72804752  2961.44414407]
------
Step:11, Action:North
State  210
Old Q Values:  [13679.0557666   7673.84169351   790.72804752  2961.44414407]
New Q values:  [18556.69393151  7673.84169351   790.72804752  2961.44414407]
Reward: 9  Episode Reward:  -261
xxxxx
x .ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 11124.82980418  -180.00807518 43598.90541624]
------
Step:12, Action:West
State  130
Old Q Values:  [26290.62961917 11124.82980418  -180.00807518 43598.90541624]
New Q values:  [26290.62961917 11124.82980418  -180.00807518 38177.76642607]
Reward: 9  Episode Reward:  -252
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  28498.32154925 69109.34753192]
------
Step:13, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  8301.90441542 -1157.45542097]
New Q values:  [-9594.56523706 -8069.05606225  8301.90441542  -163.97435457]
Reward: -1  Episode Reward:  -253
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   998.69271273 -5413.46457526  -180.6       ]
------
Step:14, Action:South
State  109
Old Q Values:  [ -241.10880094   998.69271273 -5413.46457526  -180.6       ]
New Q values:  [ -241.10880094   717.97570836 -5413.46457526  -180.6       ]
Reward: -1  Episode Reward:  -254
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  577.02113965  1063.66207756 -2062.97736574   262.76946019]
------
Step:15, Action:South
State  181
Old Q Values:  [  577.02113965  1063.66207756 -2062.97736574   262.76946019]
New Q values:  [  577.02113965   619.00682624 -2062.97736574   262.76946019]
Reward: -1  Episode Reward:  -255
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  647.13998405    26.73544252 -4398.1421471    434.60205314]
------
Step:16, Action:North
State  257
Old Q Values:  [41683.79819939  6367.89256635  5196.05500727  1875.31501677]
New Q values:  [49421.88928793  6367.89256635  5196.05500727  1875.31501677]
Reward: -1  Episode Reward:  -256
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  10235.04946499      0.        ]
------
Step:17, Action:North
State  181
Old Q Values:  [  577.02113965   619.00682624 -2062.97736574   262.76946019]
New Q values:  [  445.60116837   619.00682624 -2062.97736574   262.76946019]
Reward: -1  Episode Reward:  -257
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   717.97570836 -5413.46457526  -180.6       ]
------
Step:18, Action:South
State  99
Old Q Values:  [    0.         42484.73661283 56224.12962286     0.        ]
New Q values:  [    0.         49742.2646533  56224.12962286     0.        ]
Reward: -1  Episode Reward:  -258
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  10235.04946499      0.        ]
------
Step:19, Action:North
State  180
Old Q Values:  [-2405.85343029  1010.53711994  -178.40224643 -4966.32149798]
New Q values:  [ -820.10444135  1010.53711994  -178.40224643 -4966.32149798]
Reward: -1  Episode Reward:  -259
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  476.12310254  429.94531313 -180.6       ]
------
Step:20, Action:East
State  110
Old Q Values:  [-239.29051573  476.12310254  429.94531313 -180.6       ]
New Q values:  [-239.29051573  476.12310254  938.35582334 -180.6       ]
Reward: -1  Episode Reward:  -260
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2556.59232695   278.86963148]
------
Step:21, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2556.59232695   278.86963148]
New Q values:  [ -281.736      -1150.91067548  1379.96563247   278.86963148]
Reward: -1  Episode Reward:  -261
xxxxx
x  ax
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02 -2.81413046e+03 -3.22965309e-01  1.19309567e+03]
------
Step:22, Action:West
State  138
Old Q Values:  [ 6.43788450e+02 -2.81413046e+03 -3.22965309e-01  1.19309567e+03]
New Q values:  [ 6.43788450e+02 -2.81413046e+03 -3.22965309e-01  8.90627959e+02]
Reward: -1  Episode Reward:  -262
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1379.96563247   278.86963148]
------
Step:23, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  8301.90441542  -163.97435457]
New Q values:  [-9594.56523706 -8069.05606225  6253.99243995  -163.97435457]
Reward: -1  Episode Reward:  -263
xxxxx
xg ax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  9779.43557927 -2383.80019164   294.51660121]
------
Step:24, Action:South
State  138
Old Q Values:  [ 6.43788450e+02 -2.81413046e+03 -3.22965309e-01  8.90627959e+02]
New Q values:  [ 6.43788450e+02  1.59714082e+03 -3.22965309e-01  8.90627959e+02]
Reward: -1  Episode Reward:  -264
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9077.97668904  3721.01331954 -4584.50430574  3840.80068778]
------
Step:25, Action:North
State  210
Old Q Values:  [18556.69393151  7673.84169351   790.72804752  2961.44414407]
New Q values:  [7901.21981886 7673.84169351  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  -265
xxxxx
x  ax
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  1.59714082e+03 -3.22965309e-01  8.90627959e+02]
------
Step:26, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  1.59714082e+03 -3.22965309e-01  8.90627959e+02]
New Q values:  [ 6.43788450e+02  3.00862227e+03 -3.22965309e-01  8.90627959e+02]
Reward: -1  Episode Reward:  -266
xxxxx
x   x
x .ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7901.21981886 7673.84169351  790.72804752 2961.44414407]
------
Step:27, Action:North
State  210
Old Q Values:  [7901.21981886 7673.84169351  790.72804752 2961.44414407]
New Q values:  [4062.47460974 7673.84169351  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  -267
xxxxx
x  ax
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  3.00862227e+03 -3.22965309e-01  8.90627959e+02]
------
Step:28, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  3.00862227e+03 -3.22965309e-01  8.90627959e+02]
New Q values:  [ 6.43788450e+02  3.92624192e+03 -3.22965309e-01  8.90627959e+02]
Reward: -1  Episode Reward:  -268
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9077.97668904  3721.01331954 -4584.50430574  3840.80068778]
------
Step:29, Action:North
State  208
Old Q Values:  [ 9077.97668904  3721.01331954 -4584.50430574  3840.80068778]
New Q values:  [15083.92060344  3721.01331954 -4584.50430574  3840.80068778]
Reward: -1  Episode Reward:  -269
xxxxx
x  ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 11124.82980418  -180.00807518 38177.76642607]
------
Step:30, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  3.92624192e+03 -3.22965309e-01  8.90627959e+02]
New Q values:  [ 6.43788450e+02  3.92624192e+03 -3.22965309e-01  8.43953363e+02]
Reward: -1  Episode Reward:  -270
xxxxx
x a x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1627.67393265   307.33137075]
------
Step:31, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1627.67393265   307.33137075]
New Q values:  [ -253.44886264 -1902.20915811  1828.34214795   307.33137075]
Reward: -1  Episode Reward:  -271
xxxxx
x  ax
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  3.92624192e+03 -3.22965309e-01  8.43953363e+02]
------
Step:32, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  3.92624192e+03 -3.22965309e-01  8.43953363e+02]
New Q values:  [ 6.43788450e+02  3.87204927e+03 -3.22965309e-01  8.43953363e+02]
Reward: -1  Episode Reward:  -272
xxxxx
x   x
x .ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4062.47460974 7673.84169351  790.72804752 2961.44414407]
------
Step:33, Action:South
State  210
Old Q Values:  [4062.47460974 7673.84169351  790.72804752 2961.44414407]
New Q values:  [ 4062.47460974 -1394.3938761    790.72804752  2961.44414407]
Reward: -10001  Episode Reward:  -10273
xxxxx
x   x
x . x
x  gx
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176  391.45858569 -120.29354603]
------
Step:1, Action:South
State  110
Old Q Values:  [-239.29051573  476.12310254  938.35582334 -180.6       ]
New Q values:  [-239.29051573  700.38286313  938.35582334 -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -7507.54632711  1681.77874037     0.        ]
------
Step:2, Action:East
State  180
Old Q Values:  [ -820.10444135  1010.53711994  -178.40224643 -4966.32149798]
New Q values:  [ -820.10444135  1010.53711994  6743.532972   -4966.32149798]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
xga.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   764.61907621 22698.31290191   239.04887894]
------
Step:3, Action:East
State  192
Old Q Values:  [ 3.89777037e-01  2.29934823e+03  5.90768650e+03 -6.46606132e+03]
New Q values:  [ 3.89777037e-01  2.29934823e+03  6.89365078e+03 -6.46606132e+03]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
x  ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15083.92060344  3721.01331954 -4584.50430574  3840.80068778]
------
Step:4, Action:North
State  216
Old Q Values:  [ 5632.17406827   858.74745653 -8896.20691497 -1139.7029775 ]
New Q values:  [ 5192.10030109   858.74745653 -8896.20691497 -1139.7029775 ]
Reward: 9  Episode Reward:  36
xxxxx
xg.ax
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  9779.43557927 -2383.80019164   294.51660121]
------
Step:5, Action:South
State  136
Old Q Values:  [ -170.77177351  9779.43557927 -2383.80019164   294.51660121]
New Q values:  [ -170.77177351  5468.80432204 -2383.80019164   294.51660121]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x  ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5192.10030109   858.74745653 -8896.20691497 -1139.7029775 ]
------
Step:6, Action:North
State  216
Old Q Values:  [ 5192.10030109   858.74745653 -8896.20691497 -1139.7029775 ]
New Q values:  [-2283.11858295   858.74745653 -8896.20691497 -1139.7029775 ]
Reward: -10001  Episode Reward:  -9966
xxxxx
x .gx
x   x
x. .x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1080.06469676   729.45775691]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1080.06469676   729.45775691]
New Q values:  [-2527.46239811 -8521.23367799  1974.0953252    729.45775691]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5122.23148833 -6442.16912869 -8192.20126966  1013.77824873]
------
Step:2, Action:North
State  288
Old Q Values:  [ 5122.23148833 -6442.16912869 -8192.20126966  1013.77824873]
New Q values:  [ 6579.46877636 -6442.16912869 -8192.20126966  1013.77824873]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15083.92060344  3721.01331954 -4584.50430574  3840.80068778]
------
Step:3, Action:North
State  208
Old Q Values:  [15083.92060344  3721.01331954 -4584.50430574  3840.80068778]
New Q values:  [17492.2981692   3721.01331954 -4584.50430574  3840.80068778]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 11124.82980418  -180.00807518 38177.76642607]
------
Step:4, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  3.87204927e+03 -3.22965309e-01  8.43953363e+02]
New Q values:  [ 6.43788450e+02  3.87204927e+03 -3.22965309e-01  7.56971035e+02]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1379.96563247   278.86963148]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1379.96563247   278.86963148]
New Q values:  [ -281.736      -1150.91067548  1713.00103536   278.86963148]
Reward: -1  Episode Reward:  35
xxxxx
x. ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  3.87204927e+03 -3.22965309e-01  7.56971035e+02]
------
Step:6, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  3.87204927e+03 -3.22965309e-01  7.56971035e+02]
New Q values:  [ 6.43788450e+02  2.76696209e+03 -3.22965309e-01  7.56971035e+02]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4062.47460974 -1394.3938761    790.72804752  2961.44414407]
------
Step:7, Action:North
State  210
Old Q Values:  [ 4062.47460974 -1394.3938761    790.72804752  2961.44414407]
New Q values:  [ 2454.47847172 -1394.3938761    790.72804752  2961.44414407]
Reward: -1  Episode Reward:  33
xxxxx
x. ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  2.76696209e+03 -3.22965309e-01  7.56971035e+02]
------
Step:8, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  2.76696209e+03 -3.22965309e-01  7.56971035e+02]
New Q values:  [ 6.43788450e+02  1.99461808e+03 -3.22965309e-01  7.56971035e+02]
Reward: -1  Episode Reward:  32
xxxxx
x.  x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2454.47847172 -1394.3938761    790.72804752  2961.44414407]
------
Step:9, Action:West
State  208
Old Q Values:  [17492.2981692   3721.01331954 -4584.50430574  3840.80068778]
New Q values:  [17492.2981692   3721.01331954 -4584.50430574  3544.66419892]
Reward: 9  Episode Reward:  41
xxxxx
x.  x
x.agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1021.03315723  6676.47974603   534.04109446]
------
Step:10, Action:South
State  193
Old Q Values:  [-5922.26708831  1021.03315723  6676.47974603   534.04109446]
New Q values:  [-5922.26708831  1627.37196626  6676.47974603   534.04109446]
Reward: -1  Episode Reward:  40
xxxxx
x. gx
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 4065.1956779  1921.57547411]
------
Step:11, Action:East
State  273
Old Q Values:  [ 677.52857079 -168.92307549 4065.1956779  1921.57547411]
New Q values:  [ 677.52857079 -168.92307549 3599.31890407 1921.57547411]
Reward: -1  Episode Reward:  39
xxxxx
x.  x
x. gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6579.46877636 -6442.16912869 -8192.20126966  1013.77824873]
------
Step:12, Action:West
State  288
Old Q Values:  [ 6579.46877636 -6442.16912869 -8192.20126966  1013.77824873]
New Q values:  [ 6579.46877636 -6442.16912869 -8192.20126966   997.13989705]
Reward: -1  Episode Reward:  38
xxxxx
x.  x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1974.0953252    729.45775691]
------
Step:13, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1974.0953252    729.45775691]
New Q values:  [-2527.46239811 -8521.23367799  2762.87876299   729.45775691]
Reward: -1  Episode Reward:  37
xxxxx
x.g x
x.  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6579.46877636 -6442.16912869 -8192.20126966   997.13989705]
------
Step:14, Action:North
State  288
Old Q Values:  [ 6579.46877636 -6442.16912869 -8192.20126966   997.13989705]
New Q values:  [ 7878.8769613  -6442.16912869 -8192.20126966   997.13989705]
Reward: -1  Episode Reward:  36
xxxxx
xg  x
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17492.2981692   3721.01331954 -4584.50430574  3544.66419892]
------
Step:15, Action:North
State  208
Old Q Values:  [17492.2981692   3721.01331954 -4584.50430574  3544.66419892]
New Q values:  [25278.63330441  3721.01331954 -4584.50430574  3544.66419892]
Reward: -1  Episode Reward:  35
xxxxx
x.gax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 11563.73606199 -8652.84       60941.0467891 ]
------
Step:16, Action:South
State  130
Old Q Values:  [26290.62961917 11124.82980418  -180.00807518 38177.76642607]
New Q values:  [26290.62961917 12032.92191299  -180.00807518 38177.76642607]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25278.63330441  3721.01331954 -4584.50430574  3544.66419892]
------
Step:17, Action:North
State  210
Old Q Values:  [ 2454.47847172 -1394.3938761    790.72804752  2961.44414407]
New Q values:  [12434.52131651 -1394.3938761    790.72804752  2961.44414407]
Reward: -1  Episode Reward:  33
xxxxx
x. ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 12032.92191299  -180.00807518 38177.76642607]
------
Step:18, Action:West
State  130
Old Q Values:  [26290.62961917 12032.92191299  -180.00807518 38177.76642607]
New Q values:  [26290.62961917 12032.92191299  -180.00807518 36003.31083   ]
Reward: -1  Episode Reward:  32
xxxxx
x.a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  28498.32154925 69109.34753192]
------
Step:19, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1713.00103536   278.86963148]
New Q values:  [ -281.736      -1150.91067548  1713.00103536   294.45275357]
Reward: 9  Episode Reward:  41
xxxxx
xa  x
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  380.26082847  591.68300325 -252.78192178]
------
Step:20, Action:East
State  106
Old Q Values:  [ -180.6        -5363.03361968   385.72226087  -180.6       ]
New Q values:  [ -180.6        -5363.03361968   667.58921495  -180.6       ]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x.  x
xg  x
xxxxx
Step:21, Action:East
State  114
Old Q Values:  [ -180.6         3557.6642036  28498.32154925 69109.34753192]
New Q values:  [ -180.6         3557.6642036  22199.7218687  69109.34753192]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 12032.92191299  -180.00807518 36003.31083   ]
------
Step:22, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  1.99461808e+03 -3.22965309e-01  7.56971035e+02]
New Q values:  [ 6.43788450e+02  1.99461808e+03 -3.22965309e-01  8.16088725e+02]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1713.00103536   294.45275357]
------
Step:23, Action:East
State  114
Old Q Values:  [ -180.6         3557.6642036  22199.7218687  69109.34753192]
New Q values:  [ -180.6         3557.6642036  19680.28199648 69109.34753192]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 12032.92191299  -180.00807518 36003.31083   ]
------
Step:24, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  1.99461808e+03 -3.22965309e-01  8.16088725e+02]
New Q values:  [ 6.43788450e+02  1.99461808e+03 -3.22965309e-01  8.39735800e+02]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1713.00103536   294.45275357]
------
Step:25, Action:East
State  114
Old Q Values:  [ -180.6         3557.6642036  19680.28199648 69109.34753192]
New Q values:  [ -180.6         3557.6642036  18672.50604759 69109.34753192]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 12032.92191299  -180.00807518 36003.31083   ]
------
Step:26, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  1.99461808e+03 -3.22965309e-01  8.39735800e+02]
New Q values:  [ 6.43788450e+02  1.99461808e+03 -3.22965309e-01  8.49194631e+02]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1713.00103536   294.45275357]
------
Step:27, Action:East
State  114
Old Q Values:  [ -180.6         3557.6642036  18672.50604759 69109.34753192]
New Q values:  [ -180.6         3557.6642036  18269.39566804 69109.34753192]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 12032.92191299  -180.00807518 36003.31083   ]
------
Step:28, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  1.99461808e+03 -3.22965309e-01  8.49194631e+02]
New Q values:  [ 6.43788450e+02  1.99461808e+03 -3.22965309e-01  8.52978163e+02]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1713.00103536   294.45275357]
------
Step:29, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1713.00103536   294.45275357]
New Q values:  [ -281.736      -1150.91067548  1282.98583824   294.45275357]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  1.99461808e+03 -3.22965309e-01  8.52978163e+02]
------
Step:30, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  1.99461808e+03 -3.22965309e-01  8.52978163e+02]
New Q values:  [ 6.43788450e+02  8.38083722e+03 -3.22965309e-01  8.52978163e+02]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25278.63330441  3721.01331954 -4584.50430574  3544.66419892]
------
Step:31, Action:North
State  210
Old Q Values:  [12434.52131651 -1394.3938761    790.72804752  2961.44414407]
New Q values:  [ 7487.45969364 -1394.3938761    790.72804752  2961.44414407]
Reward: -1  Episode Reward:  29
xxxxx
x  ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  8.38083722e+03 -3.22965309e-01  8.52978163e+02]
------
Step:32, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  8.38083722e+03 -3.22965309e-01  8.52978163e+02]
New Q values:  [ 6.43788450e+02  5.59797280e+03 -3.22965309e-01  8.52978163e+02]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x. ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 7487.45969364 -1394.3938761    790.72804752  2961.44414407]
------
Step:33, Action:North
State  210
Old Q Values:  [ 7487.45969364 -1394.3938761    790.72804752  2961.44414407]
New Q values:  [ 4673.7757167  -1394.3938761    790.72804752  2961.44414407]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  5.59797280e+03 -3.22965309e-01  8.52978163e+02]
------
Step:34, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  5.59797280e+03 -3.22965309e-01  8.52978163e+02]
New Q values:  [ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  8.52978163e+02]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25278.63330441  3721.01331954 -4584.50430574  3544.66419892]
------
Step:35, Action:North
State  208
Old Q Values:  [25278.63330441  3721.01331954 -4584.50430574  3544.66419892]
New Q values:  [11751.49461837  3721.01331954 -4584.50430574  3544.66419892]
Reward: -1  Episode Reward:  25
xxxxx
x gax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5468.80432204 -2383.80019164   294.51660121]
------
Step:36, Action:South
State  136
Old Q Values:  [ -170.77177351  5468.80432204 -2383.80019164   294.51660121]
New Q values:  [ -170.77177351  5712.37011433 -2383.80019164   294.51660121]
Reward: -1  Episode Reward:  24
xxxxx
xg  x
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11751.49461837  3721.01331954 -4584.50430574  3544.66419892]
------
Step:37, Action:North
State  208
Old Q Values:  [11751.49461837  3721.01331954 -4584.50430574  3544.66419892]
New Q values:  [15500.99109635  3721.01331954 -4584.50430574  3544.66419892]
Reward: -1  Episode Reward:  23
xxxxx
x  ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 12032.92191299  -180.00807518 36003.31083   ]
------
Step:38, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  8.52978163e+02]
New Q values:  [ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  7.25487017e+02]
Reward: -1  Episode Reward:  22
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1282.98583824   294.45275357]
------
Step:39, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1828.34214795   307.33137075]
New Q values:  [ -253.44886264 -1902.20915811  3677.39059227   307.33137075]
Reward: -1  Episode Reward:  21
xxxxx
x  ax
x. gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  7.25487017e+02]
------
Step:40, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  7.25487017e+02]
New Q values:  [ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  1.39281198e+03]
Reward: -1  Episode Reward:  20
xxxxx
x a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  3677.39059227   307.33137075]
------
Step:41, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  3677.39059227   307.33137075]
New Q values:  [ -253.44886264 -1902.20915811  4417.00997      307.33137075]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
x. gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  1.39281198e+03]
------
Step:42, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  1.39281198e+03]
New Q values:  [ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  1.88162778e+03]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  4417.00997      307.33137075]
------
Step:43, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  4417.00997      307.33137075]
New Q values:  [ -253.44886264 -1902.20915811  4712.85772109   307.33137075]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
x. gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  1.88162778e+03]
------
Step:44, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  1.88162778e+03]
New Q values:  [ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  2.16590843e+03]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  4712.85772109   307.33137075]
------
Step:45, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1282.98583824   294.45275357]
New Q values:  [ -281.736      -1150.91067548  3459.24806839   294.45275357]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  2.16590843e+03]
------
Step:46, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  9.82217911e+03 -3.22965309e-01  2.16590843e+03]
New Q values:  [ 6.43788450e+02  8.57856897e+03 -3.22965309e-01  2.16590843e+03]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15500.99109635  3721.01331954 -4584.50430574  3544.66419892]
------
Step:47, Action:North
State  208
Old Q Values:  [15500.99109635  3721.01331954 -4584.50430574  3544.66419892]
New Q values:  [ 7913.50747284  3721.01331954 -4584.50430574  3544.66419892]
Reward: -1  Episode Reward:  13
xxxxx
x gax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5712.37011433 -2383.80019164   294.51660121]
------
Step:48, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  8.57856897e+03 -3.22965309e-01  2.16590843e+03]
New Q values:  [ 6.43788450e+02  5.80487983e+03 -3.22965309e-01  2.16590843e+03]
Reward: -1  Episode Reward:  12
xxxxx
x   x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7913.50747284  3721.01331954 -4584.50430574  3544.66419892]
------
Step:49, Action:North
State  208
Old Q Values:  [ 7913.50747284  3721.01331954 -4584.50430574  3544.66419892]
New Q values:  [13965.79623814  3721.01331954 -4584.50430574  3544.66419892]
Reward: -1  Episode Reward:  11
xxxxx
x  ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 12032.92191299  -180.00807518 36003.31083   ]
------
Step:50, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  5.80487983e+03 -3.22965309e-01  2.16590843e+03]
New Q values:  [ 6.43788450e+02  5.80487983e+03 -3.22965309e-01  1.90353779e+03]
Reward: -1  Episode Reward:  10
xxxxx
x a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3459.24806839   294.45275357]
------
Step:51, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3459.24806839   294.45275357]
New Q values:  [ -281.736      -1150.91067548  3124.56317668   294.45275357]
Reward: -1  Episode Reward:  9
xxxxx
x  ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  5.80487983e+03 -3.22965309e-01  1.90353779e+03]
------
Step:52, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  5.80487983e+03 -3.22965309e-01  1.90353779e+03]
New Q values:  [ 6.43788450e+02  3.72348465e+03 -3.22965309e-01  1.90353779e+03]
Reward: -1  Episode Reward:  8
xxxxx
x   x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4673.7757167  -1394.3938761    790.72804752  2961.44414407]
------
Step:53, Action:North
State  210
Old Q Values:  [ 4673.7757167  -1394.3938761    790.72804752  2961.44414407]
New Q values:  [ 2985.95568091 -1394.3938761    790.72804752  2961.44414407]
Reward: -1  Episode Reward:  7
xxxxx
x  ax
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  3.72348465e+03 -3.22965309e-01  1.90353779e+03]
------
Step:54, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  3.72348465e+03 -3.22965309e-01  1.90353779e+03]
New Q values:  [ 6.43788450e+02  2.38458056e+03 -3.22965309e-01  1.90353779e+03]
Reward: -1  Episode Reward:  6
xxxxx
x   x
x. ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2985.95568091 -1394.3938761    790.72804752  2961.44414407]
------
Step:55, Action:North
State  210
Old Q Values:  [ 2985.95568091 -1394.3938761    790.72804752  2961.44414407]
New Q values:  [ 1909.15644134 -1394.3938761    790.72804752  2961.44414407]
Reward: -1  Episode Reward:  5
xxxxx
x  ax
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  2.38458056e+03 -3.22965309e-01  1.90353779e+03]
------
Step:56, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  2.38458056e+03 -3.22965309e-01  1.90353779e+03]
New Q values:  [ 6.43788450e+02  1.84166547e+03 -3.22965309e-01  1.90353779e+03]
Reward: -1  Episode Reward:  4
xxxxx
x   x
x. ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1909.15644134 -1394.3938761    790.72804752  2961.44414407]
------
Step:57, Action:West
State  208
Old Q Values:  [13965.79623814  3721.01331954 -4584.50430574  3544.66419892]
New Q values:  [13965.79623814  3721.01331954 -4584.50430574 -2514.63908599]
Reward: -10001  Episode Reward:  -9997
xxxxx
x   x
x.g x
x   x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  2762.87876299   729.45775691]
------
Step:1, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2762.87876299   729.45775691]
New Q values:  [-2527.46239811 -8521.23367799  3474.21459359   729.45775691]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7878.8769613  -6442.16912869 -8192.20126966   997.13989705]
------
Step:2, Action:North
State  288
Old Q Values:  [ 7878.8769613  -6442.16912869 -8192.20126966   997.13989705]
New Q values:  [ 7346.68965596 -6442.16912869 -8192.20126966   997.13989705]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13965.79623814  3721.01331954 -4584.50430574 -2514.63908599]
------
Step:3, Action:North
State  210
Old Q Values:  [ 1909.15644134 -1394.3938761    790.72804752  2961.44414407]
New Q values:  [11570.05582554 -1394.3938761    790.72804752  2961.44414407]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917 12032.92191299  -180.00807518 36003.31083   ]
------
Step:4, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  1.84166547e+03 -3.22965309e-01  1.90353779e+03]
New Q values:  [ 6.43788450e+02  1.84166547e+03 -3.22965309e-01  1.70418407e+03]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3124.56317668   294.45275357]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3124.56317668   294.45275357]
New Q values:  [ -281.736      -1150.91067548  1801.72491123   294.45275357]
Reward: -1  Episode Reward:  35
xxxxx
x. ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  1.84166547e+03 -3.22965309e-01  1.70418407e+03]
------
Step:6, Action:South
State  130
Old Q Values:  [26290.62961917 12032.92191299  -180.00807518 36003.31083   ]
New Q values:  [26290.62961917  9002.30763664  -180.00807518 36003.31083   ]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13965.79623814  3721.01331954 -4584.50430574 -2514.63908599]
------
Step:7, Action:North
State  210
Old Q Values:  [11570.05582554 -1394.3938761    790.72804752  2961.44414407]
New Q values:  [ 5179.92197077 -1394.3938761    790.72804752  2961.44414407]
Reward: -1  Episode Reward:  33
xxxxx
x. ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  1.84166547e+03 -3.22965309e-01  1.70418407e+03]
------
Step:8, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  1.84166547e+03 -3.22965309e-01  1.70418407e+03]
New Q values:  [ 6.43788450e+02  2.29004278e+03 -3.22965309e-01  1.70418407e+03]
Reward: -1  Episode Reward:  32
xxxxx
x.  x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5179.92197077 -1394.3938761    790.72804752  2961.44414407]
------
Step:9, Action:North
State  208
Old Q Values:  [13965.79623814  3721.01331954 -4584.50430574 -2514.63908599]
New Q values:  [ 6272.73132885  3721.01331954 -4584.50430574 -2514.63908599]
Reward: -1  Episode Reward:  31
xxxxx
x. ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  2.29004278e+03 -3.22965309e-01  1.70418407e+03]
------
Step:10, Action:West
State  136
Old Q Values:  [ -170.77177351  5712.37011433 -2383.80019164   294.51660121]
New Q values:  [ -170.77177351  5712.37011433 -2383.80019164   216.7197157 ]
Reward: -1  Episode Reward:  30
xxxxx
x.agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7059.48304203   331.71025071]
------
Step:11, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  4712.85772109   307.33137075]
New Q values:  [ -253.44886264 -1902.20915811  4712.85772109   305.83744928]
Reward: 9  Episode Reward:  39
xxxxx
xa  x
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  380.26082847  591.68300325 -252.78192178]
------
Step:12, Action:East
State  105
Old Q Values:  [ -180.6         1827.67773756 -2876.50224357     0.        ]
New Q values:  [ -180.6         1827.67773756 -1051.68782222     0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7059.48304203   331.71025071]
------
Step:13, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  4712.85772109   305.83744928]
New Q values:  [ -253.44886264 -1902.20915811  4712.85772109   299.23988069]
Reward: -1  Episode Reward:  37
xxxxx
xa  x
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  380.26082847  591.68300325 -252.78192178]
------
Step:14, Action:East
State  105
Old Q Values:  [ -180.6         1827.67773756 -1051.68782222     0.        ]
New Q values:  [-180.6        1827.67773756 -321.76205367    0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7059.48304203   331.71025071]
------
Step:15, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  4712.85772109   299.23988069]
New Q values:  [ -253.44886264 -1902.20915811  4712.85772109   296.60085325]
Reward: -1  Episode Reward:  35
xxxxx
xa  x
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  380.26082847  591.68300325 -252.78192178]
------
Step:16, Action:East
State  105
Old Q Values:  [-180.6        1827.67773756 -321.76205367    0.        ]
New Q values:  [-180.6        1827.67773756  -29.79174626    0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7059.48304203   331.71025071]
------
Step:17, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  4712.85772109   296.60085325]
New Q values:  [ -253.44886264 -1902.20915811  4712.85772109   295.54524228]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  380.26082847  591.68300325 -252.78192178]
------
Step:18, Action:East
State  105
Old Q Values:  [-180.6        1827.67773756  -29.79174626    0.        ]
New Q values:  [-180.6        1827.67773756   86.99637671    0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -7059.48304203   331.71025071]
------
Step:19, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  6253.99243995  -163.97435457]
New Q values:  [-9594.56523706 -8069.05606225  6253.99243995   482.11357944]
Reward: -1  Episode Reward:  31
xxxxx
xag x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        1827.67773756   86.99637671    0.        ]
------
Step:20, Action:South
State  109
Old Q Values:  [ -241.10880094   717.97570836 -5413.46457526  -180.6       ]
New Q values:  [ -241.10880094   478.29233122 -5413.46457526  -180.6       ]
Reward: 9  Episode Reward:  40
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  445.60116837   619.00682624 -2062.97736574   262.76946019]
------
Step:21, Action:South
State  181
Old Q Values:  [  445.60116837   619.00682624 -2062.97736574   262.76946019]
New Q values:  [  445.60116837   441.14472571 -2062.97736574   262.76946019]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  647.13998405    26.73544252 -4398.1421471    434.60205314]
------
Step:22, Action:North
State  257
Old Q Values:  [49421.88928793  6367.89256635  5196.05500727  1875.31501677]
New Q values:  [52517.12572334  6367.89256635  5196.05500727  1875.31501677]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  10235.04946499      0.        ]
------
Step:23, Action:North
State  183
Old Q Values:  [ 540.75283156  680.80379064 1244.78695138  358.5166536 ]
New Q values:  [ 530.87108315  680.80379064 1244.78695138  358.5166536 ]
Reward: -1  Episode Reward:  37
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176  391.45858569 -120.29354603]
------
Step:24, Action:South
State  110
Old Q Values:  [-239.29051573  700.38286313  938.35582334 -180.6       ]
New Q values:  [-239.29051573  784.08676736  938.35582334 -180.6       ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -7507.54632711  1681.77874037     0.        ]
------
Step:25, Action:East
State  176
Old Q Values:  [ 76485.61294353   1621.55095326 103233.27724906      0.        ]
New Q values:  [ 76485.61294353   1621.55095326 103366.80613407      0.        ]
Reward: 100009  Episode Reward:  100045
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1801.72491123   294.45275357]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1801.72491123   294.45275357]
New Q values:  [ -281.736      -1150.91067548  1413.10279808   294.45275357]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  2.29004278e+03 -3.22965309e-01  1.70418407e+03]
------
Step:2, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  2.29004278e+03 -3.22965309e-01  1.70418407e+03]
New Q values:  [ 6.43788450e+02  2.47539370e+03 -3.22965309e-01  1.70418407e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5179.92197077 -1394.3938761    790.72804752  2961.44414407]
------
Step:3, Action:North
State  210
Old Q Values:  [ 5179.92197077 -1394.3938761    790.72804752  2961.44414407]
New Q values:  [ 2813.98689911 -1394.3938761    790.72804752  2961.44414407]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  2.47539370e+03 -3.22965309e-01  1.70418407e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  2.47539370e+03 -3.22965309e-01  1.70418407e+03]
New Q values:  [ 6.43788450e+02  1.87799072e+03 -3.22965309e-01  1.70418407e+03]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x..ax
xg..x
xxxxx
Step:5, Action:South
State  208
Old Q Values:  [ 6272.73132885  3721.01331954 -4584.50430574 -2514.63908599]
New Q values:  [ 6272.73132885  3697.81222461 -4584.50430574 -2514.63908599]
Reward: 9  Episode Reward:  25
xxxxx
x.  x
xg. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7346.68965596 -6442.16912869 -8192.20126966   997.13989705]
------
Step:6, Action:North
State  288
Old Q Values:  [ 7346.68965596 -6442.16912869 -8192.20126966   997.13989705]
New Q values:  [ 4819.89526104 -6442.16912869 -8192.20126966   997.13989705]
Reward: -1  Episode Reward:  24
xxxxx
xg  x
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6272.73132885  3697.81222461 -4584.50430574 -2514.63908599]
------
Step:7, Action:North
State  208
Old Q Values:  [ 6272.73132885  3697.81222461 -4584.50430574 -2514.63908599]
New Q values:  [ 3071.88974883  3697.81222461 -4584.50430574 -2514.63908599]
Reward: -1  Episode Reward:  23
xxxxx
x. ax
xg. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  1.87799072e+03 -3.22965309e-01  1.70418407e+03]
------
Step:8, Action:South
State  136
Old Q Values:  [ -170.77177351  5712.37011433 -2383.80019164   216.7197157 ]
New Q values:  [ -170.77177351  3393.69171311 -2383.80019164   216.7197157 ]
Reward: -1  Episode Reward:  22
xxxxx
xg  x
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3071.88974883  3697.81222461 -4584.50430574 -2514.63908599]
------
Step:9, Action:South
State  208
Old Q Values:  [ 3071.88974883  3697.81222461 -4584.50430574 -2514.63908599]
New Q values:  [ 3071.88974883  2924.49346815 -4584.50430574 -2514.63908599]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
xg. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4819.89526104 -6442.16912869 -8192.20126966   997.13989705]
------
Step:10, Action:North
State  288
Old Q Values:  [ 4819.89526104 -6442.16912869 -8192.20126966   997.13989705]
New Q values:  [ 2848.92502906 -6442.16912869 -8192.20126966   997.13989705]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
x.gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3071.88974883  2924.49346815 -4584.50430574 -2514.63908599]
------
Step:11, Action:North
State  208
Old Q Values:  [ 3071.88974883  2924.49346815 -4584.50430574 -2514.63908599]
New Q values:  [ 2246.26341346  2924.49346815 -4584.50430574 -2514.63908599]
Reward: -1  Episode Reward:  19
xxxxx
x.gax
x.. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3393.69171311 -2383.80019164   216.7197157 ]
------
Step:12, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  1.87799072e+03 -3.22965309e-01  1.70418407e+03]
New Q values:  [ 6.43788450e+02  1.62794433e+03 -3.22965309e-01  1.70418407e+03]
Reward: -1  Episode Reward:  18
xxxxx
x.  x
x.gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2246.26341346  2924.49346815 -4584.50430574 -2514.63908599]
------
Step:13, Action:South
State  210
Old Q Values:  [ 2813.98689911 -1394.3938761    790.72804752  2961.44414407]
New Q values:  [2813.98689911  296.31995828  790.72804752 2961.44414407]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2848.92502906 -6442.16912869 -8192.20126966   997.13989705]
------
Step:14, Action:North
State  288
Old Q Values:  [ 2848.92502906 -6442.16912869 -8192.20126966   997.13989705]
New Q values:  [ 2027.40325485 -6442.16912869 -8192.20126966   997.13989705]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2813.98689911  296.31995828  790.72804752 2961.44414407]
------
Step:15, Action:West
State  208
Old Q Values:  [ 2246.26341346  2924.49346815 -4584.50430574 -2514.63908599]
New Q values:  [ 2246.26341346  2924.49346815 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  25
xxxxx
x.  x
xga x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3.89777037e-01  2.29934823e+03  6.89365078e+03 -6.46606132e+03]
------
Step:16, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.32169573e+02  1.81388025e+03  1.63194774e+03]
New Q values:  [-6.00000000e-01  1.32169573e+02  1.61338534e+03  1.63194774e+03]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2813.98689911  296.31995828  790.72804752 2961.44414407]
------
Step:17, Action:West
State  210
Old Q Values:  [2813.98689911  296.31995828  790.72804752 2961.44414407]
New Q values:  [2813.98689911  296.31995828  790.72804752 1673.56197941]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x.a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.61338534e+03  1.63194774e+03]
------
Step:18, Action:West
State  192
Old Q Values:  [ 3.89777037e-01  2.29934823e+03  6.89365078e+03 -6.46606132e+03]
New Q values:  [3.89777037e-01 2.29934823e+03 6.89365078e+03 3.01679455e+04]
Reward: 9  Episode Reward:  32
xxxxx
x.  x
xag x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  10235.04946499      0.        ]
------
Step:19, Action:North
State  179
Old Q Values:  [    0.         16101.90751562  6535.7653539      0.        ]
New Q values:  [16872.63888686 16101.90751562  6535.7653539      0.        ]
Reward: 9  Episode Reward:  41
xxxxx
xa  x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SE
[    0.         49742.2646533  56224.12962286     0.        ]
------
Step:20, Action:East
State  111
Old Q Values:  [-177.44732869 1050.56650176  391.45858569 -120.29354603]
New Q values:  [-177.44732869 1050.56650176 1569.8407506  -120.29354603]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  4712.85772109   295.54524228]
------
Step:21, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  4712.85772109   295.54524228]
New Q values:  [ -253.44886264 -1902.20915811  2395.79830945   295.54524228]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  1.62794433e+03 -3.22965309e-01  1.70418407e+03]
------
Step:22, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  1.62794433e+03 -3.22965309e-01  1.70418407e+03]
New Q values:  [ 6.43788450e+02  1.62794433e+03 -3.22965309e-01  1.39981312e+03]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2395.79830945   295.54524228]
------
Step:23, Action:East
State  114
Old Q Values:  [ -180.6         3557.6642036  18269.39566804 69109.34753192]
New Q values:  [ -180.6         3557.6642036  18108.15151622 69109.34753192]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  9002.30763664  -180.00807518 36003.31083   ]
------
Step:24, Action:West
State  138
Old Q Values:  [ 6.43788450e+02  1.62794433e+03 -3.22965309e-01  1.39981312e+03]
New Q values:  [ 6.43788450e+02  1.62794433e+03 -3.22965309e-01  9.83256088e+02]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1413.10279808   294.45275357]
------
Step:25, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1413.10279808   294.45275357]
New Q values:  [ -281.736      -1150.91067548  1053.02441828   294.45275357]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  1.62794433e+03 -3.22965309e-01  9.83256088e+02]
------
Step:26, Action:South
State  130
Old Q Values:  [26290.62961917  9002.30763664  -180.00807518 36003.31083   ]
New Q values:  [26290.62961917  4444.51912439  -180.00807518 36003.31083   ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2813.98689911  296.31995828  790.72804752 1673.56197941]
------
Step:27, Action:North
State  218
Old Q Values:  [ 292.53315687 6862.634248      0.         1863.19740327]
New Q values:  [ 604.7965618  6862.634248      0.         1863.19740327]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  1.62794433e+03 -3.22965309e-01  9.83256088e+02]
------
Step:28, Action:South
State  130
Old Q Values:  [26290.62961917  4444.51912439  -180.00807518 36003.31083   ]
New Q values:  [26290.62961917  2621.40371949  -180.00807518 36003.31083   ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2813.98689911  296.31995828  790.72804752 1673.56197941]
------
Step:29, Action:North
State  216
Old Q Values:  [-2283.11858295   858.74745653 -8896.20691497 -1139.7029775 ]
New Q values:  [ -425.46413413   858.74745653 -8896.20691497 -1139.7029775 ]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02  1.62794433e+03 -3.22965309e-01  9.83256088e+02]
------
Step:30, Action:South
State  138
Old Q Values:  [ 6.43788450e+02  1.62794433e+03 -3.22965309e-01  9.83256088e+02]
New Q values:  [ 6.43788450e+02 -5.09179803e+03 -3.22965309e-01  9.83256088e+02]
Reward: -10001  Episode Reward:  -9970
xxxxx
x   x
x  gx
x . x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  647.13998405    26.73544252 -4398.1421471    434.60205314]
------
Step:1, Action:North
State  261
Old Q Values:  [  647.13998405    26.73544252 -4398.1421471    434.60205314]
New Q values:  [  397.93634413    26.73544252 -4398.1421471    434.60205314]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  445.60116837   441.14472571 -2062.97736574   262.76946019]
------
Step:2, Action:North
State  181
Old Q Values:  [  445.60116837   441.14472571 -2062.97736574   262.76946019]
New Q values:  [  327.12816671   441.14472571 -2062.97736574   262.76946019]
Reward: 9  Episode Reward:  18
xxxxx
xag.x
x . x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   478.29233122 -5413.46457526  -180.6       ]
------
Step:3, Action:South
State  108
Old Q Values:  [-8463.16477134  2136.60898938   845.00690416     0.        ]
New Q values:  [-8463.16477134  2877.10348735   845.00690416     0.        ]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -820.10444135  1010.53711994  6743.532972   -4966.32149798]
------
Step:4, Action:East
State  189
Old Q Values:  [ 275.08817949 2886.59923314  588.19549948  154.04646645]
New Q values:  [ 275.08817949 2886.59923314 7050.17207037  154.04646645]
Reward: 9  Episode Reward:  26
xxxxx
x g.x
x a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   764.61907621 22698.31290191   239.04887894]
------
Step:5, Action:East
State  193
Old Q Values:  [-5922.26708831  1627.37196626  6676.47974603   534.04109446]
New Q values:  [-5922.26708831  1627.37196626  2927.61613537   534.04109446]
Reward: -1  Episode Reward:  25
xxxxx
x .gx
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ -425.46413413   858.74745653 -8896.20691497 -1139.7029775 ]
------
Step:6, Action:South
State  208
Old Q Values:  [ 2246.26341346  2924.49346815 -4584.50430574  1067.63960005]
New Q values:  [ 2246.26341346  1783.41836372 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
x  gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2027.40325485 -6442.16912869 -8192.20126966   997.13989705]
------
Step:7, Action:West
State  288
Old Q Values:  [ 2027.40325485 -6442.16912869 -8192.20126966   997.13989705]
New Q values:  [ 2027.40325485 -6442.16912869 -8192.20126966  1484.05163004]
Reward: 9  Episode Reward:  43
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 3599.31890407 1921.57547411]
------
Step:8, Action:East
State  276
Old Q Values:  [   16.82637525 -5807.06396197   618.89767946  2117.48112683]
New Q values:  [   16.82637525 -5807.06396197   855.18004824  2117.48112683]
Reward: -1  Episode Reward:  42
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2027.40325485 -6442.16912869 -8192.20126966  1484.05163004]
------
Step:9, Action:North
State  288
Old Q Values:  [ 2027.40325485 -6442.16912869 -8192.20126966  1484.05163004]
New Q values:  [ 1484.24032598 -6442.16912869 -8192.20126966  1484.05163004]
Reward: -1  Episode Reward:  41
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2246.26341346  1783.41836372 -4584.50430574  1067.63960005]
------
Step:10, Action:North
State  208
Old Q Values:  [ 2246.26341346  1783.41836372 -4584.50430574  1067.63960005]
New Q values:  [19186.21940212  1783.41836372 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  50
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792 11563.73606199 -8652.84       60941.0467891 ]
------
Step:11, Action:South
State  128
Old Q Values:  [11374.93691792 11563.73606199 -8652.84       60941.0467891 ]
New Q values:  [11374.93691792 10380.76024543 -8652.84       60941.0467891 ]
Reward: -1  Episode Reward:  49
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19186.21940212  1783.41836372 -4584.50430574  1067.63960005]
------
Step:12, Action:South
State  208
Old Q Values:  [19186.21940212  1783.41836372 -4584.50430574  1067.63960005]
New Q values:  [19186.21940212  1158.03944328 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  48
xxxxx
x g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1484.24032598 -6442.16912869 -8192.20126966  1484.05163004]
------
Step:13, Action:North
State  288
Old Q Values:  [ 1484.24032598 -6442.16912869 -8192.20126966  1484.05163004]
New Q values:  [ 6348.96195103 -6442.16912869 -8192.20126966  1484.05163004]
Reward: -1  Episode Reward:  47
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19186.21940212  1158.03944328 -4584.50430574  1067.63960005]
------
Step:14, Action:North
State  208
Old Q Values:  [19186.21940212  1158.03944328 -4584.50430574  1067.63960005]
New Q values:  [18474.88100985  1158.03944328 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  46
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  2621.40371949  -180.00807518 36003.31083   ]
------
Step:15, Action:West
State  128
Old Q Values:  [11374.93691792 10380.76024543 -8652.84       60941.0467891 ]
New Q values:  [ 11374.93691792  10380.76024543  -8652.84       117666.74763452]
Reward: 100009  Episode Reward:  100055
xxxxx
xga x
x   x
x   x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3474.21459359   729.45775691]
------
Step:1, Action:East
State  273
Old Q Values:  [ 677.52857079 -168.92307549 3599.31890407 1921.57547411]
New Q values:  [ 677.52857079 -168.92307549 3349.81614694 1921.57547411]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6348.96195103 -6442.16912869 -8192.20126966  1484.05163004]
------
Step:2, Action:West
State  288
Old Q Values:  [ 6348.96195103 -6442.16912869 -8192.20126966  1484.05163004]
New Q values:  [ 6348.96195103 -6442.16912869 -8192.20126966  1597.9654961 ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 3349.81614694 1921.57547411]
------
Step:3, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3474.21459359   729.45775691]
New Q values:  [-2527.46239811 -8521.23367799  3474.21459359   427.56371871]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x. .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  397.93634413    26.73544252 -4398.1421471    434.60205314]
------
Step:4, Action:West
State  261
Old Q Values:  [  397.93634413    26.73544252 -4398.1421471    434.60205314]
New Q values:  [  397.93634413    26.73544252 -4398.1421471    123.6214372 ]
Reward: -301  Episode Reward:  -284
xxxxx
x...x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  397.93634413    26.73544252 -4398.1421471    123.6214372 ]
------
Step:5, Action:North
State  261
Old Q Values:  [  397.93634413    26.73544252 -4398.1421471    123.6214372 ]
New Q values:  [  538.01062307    26.73544252 -4398.1421471    123.6214372 ]
Reward: 9  Episode Reward:  -275
xxxxx
x...x
xa .x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 530.87108315  680.80379064 1244.78695138  358.5166536 ]
------
Step:6, Action:East
State  181
Old Q Values:  [  327.12816671   441.14472571 -2062.97736574   262.76946019]
New Q values:  [ 327.12816671  441.14472571 2224.59269733  262.76946019]
Reward: -10001  Episode Reward:  -10276
xxxxx
x...x
x g.x
x   x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 327.12816671  441.14472571 2224.59269733  262.76946019]
------
Step:1, Action:South
State  181
Old Q Values:  [ 327.12816671  441.14472571 2224.59269733  262.76946019]
New Q values:  [ 327.12816671  343.2610772  2224.59269733  262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  538.01062307    26.73544252 -4398.1421471    123.6214372 ]
------
Step:2, Action:North
State  261
Old Q Values:  [  538.01062307    26.73544252 -4398.1421471    123.6214372 ]
New Q values:  [  881.98205842    26.73544252 -4398.1421471    123.6214372 ]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 327.12816671  343.2610772  2224.59269733  262.76946019]
------
Step:3, Action:East
State  181
Old Q Values:  [ 327.12816671  343.2610772  2224.59269733  262.76946019]
New Q values:  [ 327.12816671  343.2610772  1767.52191954  262.76946019]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1627.37196626  2927.61613537   534.04109446]
------
Step:4, Action:South
State  195
Old Q Values:  [  38.85388605 7930.86155831 1849.21327227 1169.39963074]
New Q values:  [  38.85388605 4182.6894674  1849.21327227 1169.39963074]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 3349.81614694 1921.57547411]
------
Step:5, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 3349.81614694 1921.57547411]
New Q values:  [ 677.52857079 -168.92307549 3349.81614694 1032.62480717]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  881.98205842    26.73544252 -4398.1421471    123.6214372 ]
------
Step:6, Action:North
State  261
Old Q Values:  [  881.98205842    26.73544252 -4398.1421471    123.6214372 ]
New Q values:  [  882.44939923    26.73544252 -4398.1421471    123.6214372 ]
Reward: -1  Episode Reward:  14
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 327.12816671  343.2610772  1767.52191954  262.76946019]
------
Step:7, Action:East
State  181
Old Q Values:  [ 327.12816671  343.2610772  1767.52191954  262.76946019]
New Q values:  [ 327.12816671  343.2610772  7515.90263839  262.76946019]
Reward: -1  Episode Reward:  13
xxxxx
x.g.x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   764.61907621 22698.31290191   239.04887894]
------
Step:8, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.29934823e+03 6.89365078e+03 3.01679455e+04]
New Q values:  [3.89777037e-01 2.29934823e+03 8.30532462e+03 3.01679455e+04]
Reward: 9  Episode Reward:  22
xxxxx
x...x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18474.88100985  1158.03944328 -4584.50430574  1067.63960005]
------
Step:9, Action:North
State  208
Old Q Values:  [18474.88100985  1158.03944328 -4584.50430574  1067.63960005]
New Q values:  [18196.34565294  1158.03944328 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  31
xxxxx
x..ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  2621.40371949  -180.00807518 36003.31083   ]
------
Step:10, Action:West
State  130
Old Q Values:  [26290.62961917  2621.40371949  -180.00807518 36003.31083   ]
New Q values:  [26290.62961917  2621.40371949  -180.00807518 53407.49791471]
Reward: 9  Episode Reward:  40
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.30002579e+05]
------
Step:11, Action:West
State  127
Old Q Values:  [   0.            1.67014986  384.73306724 1064.01799158]
New Q values:  [  0.           1.67014986 384.73306724 901.95942181]
Reward: 9  Episode Reward:  49
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176 1569.8407506  -120.29354603]
------
Step:12, Action:East
State  99
Old Q Values:  [    0.         49742.2646533  56224.12962286     0.        ]
New Q values:  [    0.         49742.2646533  61489.82543186     0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.30002579e+05]
------
Step:13, Action:West
State  126
Old Q Values:  [   0.          331.64678262 2273.64105687 1060.34671274]
New Q values:  [   0.          331.64678262 2273.64105687  894.49091028]
Reward: -1  Episode Reward:  47
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176 1569.8407506  -120.29354603]
------
Step:14, Action:East
State  110
Old Q Values:  [-239.29051573  784.08676736  938.35582334 -180.6       ]
New Q values:  [-239.29051573  784.08676736 1056.8346464  -180.6       ]
Reward: -1  Episode Reward:  46
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 2273.64105687  894.49091028]
------
Step:15, Action:East
State  126
Old Q Values:  [   0.          331.64678262 2273.64105687  894.49091028]
New Q values:  [   0.          331.64678262 1203.83324908  894.49091028]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02 -5.09179803e+03 -3.22965309e-01  9.83256088e+02]
------
Step:16, Action:West
State  136
Old Q Values:  [ -170.77177351  3393.69171311 -2383.80019164   216.7197157 ]
New Q values:  [ -170.77177351  3393.69171311 -2383.80019164  1373.92456629]
Reward: -1  Episode Reward:  44
xxxxx
xga x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 4292.78893337  686.3519147 ]
------
Step:17, Action:East
State  126
Old Q Values:  [   0.          331.64678262 1203.83324908  894.49091028]
New Q values:  [  0.         331.64678262 775.91012596 894.49091028]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02 -5.09179803e+03 -3.22965309e-01  9.83256088e+02]
------
Step:18, Action:West
State  138
Old Q Values:  [ 6.43788450e+02 -5.09179803e+03 -3.22965309e-01  9.83256088e+02]
New Q values:  [ 6.43788450e+02 -5.09179803e+03 -3.22965309e-01  6.61049708e+02]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         331.64678262 775.91012596 894.49091028]
------
Step:19, Action:West
State  127
Old Q Values:  [  0.           1.67014986 384.73306724 901.95942181]
New Q values:  [  0.           1.67014986 384.73306724 831.13599391]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176 1569.8407506  -120.29354603]
------
Step:20, Action:East
State  109
Old Q Values:  [ -241.10880094   478.29233122 -5413.46457526  -180.6       ]
New Q values:  [ -241.10880094   478.29233122 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  40
xxxxx
x agx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         1.08147795]
------
Step:21, Action:West
State  124
Old Q Values:  [   0.         1166.51141701 4292.78893337  686.3519147 ]
New Q values:  [   0.         1166.51141701 4292.78893337  417.42846525]
Reward: -1  Episode Reward:  39
xxxxx
xag x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   478.29233122 -2165.66138672  -180.6       ]
------
Step:22, Action:South
State  109
Old Q Values:  [ -241.10880094   478.29233122 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  2305.7685536  -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  38
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 2886.59923314 7050.17207037  154.04646645]
------
Step:23, Action:East
State  189
Old Q Values:  [ 275.08817949 2886.59923314 7050.17207037  154.04646645]
New Q values:  [ 275.08817949 2886.59923314 3433.80510082  154.04646645]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         1387.82535955 2047.78757557  441.58769553]
------
Step:24, Action:East
State  204
Old Q Values:  [   0.         1387.82535955 2047.78757557  441.58769553]
New Q values:  [   0.         1387.82535955 1076.13926719  441.58769553]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ -425.46413413   858.74745653 -8896.20691497 -1139.7029775 ]
------
Step:25, Action:South
State  210
Old Q Values:  [2813.98689911  296.31995828  790.72804752 1673.56197941]
New Q values:  [ 2813.98689911 62028.61656862   790.72804752  1673.56197941]
Reward: 100009  Episode Reward:  100045
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  752.72159273 -2735.46306511   877.22970511 -5679.36893145]
------
Step:1, Action:East
State  260
Old Q Values:  [  752.72159273 -2735.46306511   877.22970511 -5679.36893145]
New Q values:  [  752.72159273 -2735.46306511   991.53622009 -5679.36893145]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824  2117.48112683]
------
Step:2, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3474.21459359   427.56371871]
New Q values:  [-2527.46239811 -8521.23367799  3474.21459359   467.88635351]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  752.72159273 -2735.46306511   991.53622009 -5679.36893145]
------
Step:3, Action:East
State  261
Old Q Values:  [  882.44939923    26.73544252 -4398.1421471    123.6214372 ]
New Q values:  [  882.44939923    26.73544252 -1124.61252079   123.6214372 ]
Reward: -1  Episode Reward:  7
xxxxx
x g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824  2117.48112683]
------
Step:4, Action:West
State  273
Old Q Values:  [ 677.52857079 -168.92307549 3349.81614694 1032.62480717]
New Q values:  [ 677.52857079 -168.92307549 3349.81614694  677.18474264]
Reward: -1  Episode Reward:  6
xxxxx
x .gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  882.44939923    26.73544252 -1124.61252079   123.6214372 ]
------
Step:5, Action:North
State  261
Old Q Values:  [  882.44939923    26.73544252 -1124.61252079   123.6214372 ]
New Q values:  [ 2613.15055121    26.73544252 -1124.61252079   123.6214372 ]
Reward: 9  Episode Reward:  15
xxxxx
x ..x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 327.12816671  343.2610772  7515.90263839  262.76946019]
------
Step:6, Action:East
State  181
Old Q Values:  [ 327.12816671  343.2610772  7515.90263839  262.76946019]
New Q values:  [ 327.12816671  343.2610772  3890.04589597  262.76946019]
Reward: 9  Episode Reward:  24
xxxxx
x .gx
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1627.37196626  2927.61613537   534.04109446]
------
Step:7, Action:East
State  193
Old Q Values:  [-5922.26708831  1627.37196626  2927.61613537   534.04109446]
New Q values:  [-5922.26708831  1627.37196626   635.35015003   534.04109446]
Reward: -9991  Episode Reward:  -9967
xxxxx
x ..x
x  gx
x  .x
xxxxx
xxxxx
x..ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02 -5.09179803e+03 -3.22965309e-01  6.61049708e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 6.43788450e+02 -5.09179803e+03 -3.22965309e-01  6.61049708e+02]
New Q values:  [ 6.43788450e+02 -5.09179803e+03 -3.22965309e-01  5.85727209e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1053.02441828   294.45275357]
------
Step:2, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2395.79830945   295.54524228]
New Q values:  [ -253.44886264 -1902.20915811  1150.85585886   295.54524228]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x...x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.43788450e+02 -5.09179803e+03 -3.22965309e-01  5.85727209e+02]
------
Step:3, Action:North
State  138
Old Q Values:  [ 6.43788450e+02 -5.09179803e+03 -3.22965309e-01  5.85727209e+02]
New Q values:  [ 2.70051915e+02 -5.09179803e+03 -3.22965309e-01  5.85727209e+02]
Reward: -301  Episode Reward:  -293
xxxxx
x. ax
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -5.09179803e+03 -3.22965309e-01  5.85727209e+02]
------
Step:4, Action:West
State  138
Old Q Values:  [ 2.70051915e+02 -5.09179803e+03 -3.22965309e-01  5.85727209e+02]
New Q values:  [ 2.70051915e+02 -5.09179803e+03 -3.22965309e-01  5.49598209e+02]
Reward: -1  Episode Reward:  -294
xxxxx
x.a x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1053.02441828   294.45275357]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1053.02441828   294.45275357]
New Q values:  [ -281.736      -1150.91067548   585.48923001   294.45275357]
Reward: -1  Episode Reward:  -295
xxxxx
x. ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -5.09179803e+03 -3.22965309e-01  5.49598209e+02]
------
Step:6, Action:West
State  136
Old Q Values:  [ -170.77177351  3393.69171311 -2383.80019164  1373.92456629]
New Q values:  [ -170.77177351  3393.69171311 -2383.80019164  2425.1675585 ]
Reward: -1  Episode Reward:  -296
xxxxx
xga x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  6253.99243995   482.11357944]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   585.48923001   294.45275357]
New Q values:  [ -281.736      -1150.91067548   398.4751547    294.45275357]
Reward: -1  Episode Reward:  -297
xxxxx
x. ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -5.09179803e+03 -3.22965309e-01  5.49598209e+02]
------
Step:8, Action:West
State  138
Old Q Values:  [ 2.70051915e+02 -5.09179803e+03 -3.22965309e-01  5.49598209e+02]
New Q values:  [ 2.70051915e+02 -5.09179803e+03 -3.22965309e-01  3.38781830e+02]
Reward: -1  Episode Reward:  -298
xxxxx
x.a x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   398.4751547    294.45275357]
------
Step:9, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  6253.99243995   482.11357944]
New Q values:  [-9594.56523706 -8069.05606225  3519.10448991   482.11357944]
Reward: -1  Episode Reward:  -299
xxxxx
x.gax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3393.69171311 -2383.80019164  2425.1675585 ]
------
Step:10, Action:South
State  138
Old Q Values:  [ 2.70051915e+02 -5.09179803e+03 -3.22965309e-01  3.38781830e+02]
New Q values:  [ 2.70051915e+02  3.42758448e+03 -3.22965309e-01  3.38781830e+02]
Reward: 9  Episode Reward:  -290
xxxxx
x.  x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18196.34565294  1158.03944328 -4584.50430574  1067.63960005]
------
Step:11, Action:North
State  208
Old Q Values:  [18196.34565294  1158.03944328 -4584.50430574  1067.63960005]
New Q values:  [ 8306.21360622  1158.03944328 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  -291
xxxxx
x. ax
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  3.42758448e+03 -3.22965309e-01  3.38781830e+02]
------
Step:12, Action:West
State  138
Old Q Values:  [ 2.70051915e+02  3.42758448e+03 -3.22965309e-01  3.38781830e+02]
New Q values:  [ 2.70051915e+02  3.42758448e+03 -3.22965309e-01  4.80169490e+02]
Reward: -1  Episode Reward:  -292
xxxxx
x.a x
x.. x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1150.85585886   295.54524228]
------
Step:13, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   398.4751547    294.45275357]
New Q values:  [ -281.736      -1150.91067548  1187.06540693   294.45275357]
Reward: -1  Episode Reward:  -293
xxxxx
x. ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  3.42758448e+03 -3.22965309e-01  4.80169490e+02]
------
Step:14, Action:South
State  138
Old Q Values:  [ 2.70051915e+02  3.42758448e+03 -3.22965309e-01  4.80169490e+02]
New Q values:  [ 2.70051915e+02  1.99790188e+04 -3.22965309e-01  4.80169490e+02]
Reward: -1  Episode Reward:  -294
xxxxx
x.  x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2813.98689911 62028.61656862   790.72804752  1673.56197941]
------
Step:15, Action:South
State  210
Old Q Values:  [ 2813.98689911 62028.61656862   790.72804752  1673.56197941]
New Q values:  [ 2813.98689911 26721.53521276   790.72804752  1673.56197941]
Reward: 9  Episode Reward:  -285
xxxxx
x.  x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6348.96195103 -6442.16912869 -8192.20126966  1597.9654961 ]
------
Step:16, Action:North
State  288
Old Q Values:  [ 6348.96195103 -6442.16912869 -8192.20126966  1597.9654961 ]
New Q values:  [10555.44534424 -6442.16912869 -8192.20126966  1597.9654961 ]
Reward: -1  Episode Reward:  -286
xxxxx
x.  x
x..ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2813.98689911 26721.53521276   790.72804752  1673.56197941]
------
Step:17, Action:North
State  208
Old Q Values:  [ 8306.21360622  1158.03944328 -4584.50430574  1067.63960005]
New Q values:  [ 9315.59107168  1158.03944328 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  -287
xxxxx
x. ax
x..gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  1.99790188e+04 -3.22965309e-01  4.80169490e+02]
------
Step:18, Action:West
State  138
Old Q Values:  [ 2.70051915e+02  1.99790188e+04 -3.22965309e-01  4.80169490e+02]
New Q values:  [ 2.70051915e+02  1.99790188e+04 -3.22965309e-01  5.47587418e+02]
Reward: -1  Episode Reward:  -288
xxxxx
x.a x
x.g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1187.06540693   294.45275357]
------
Step:19, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1187.06540693   294.45275357]
New Q values:  [ -281.736      -1150.91067548  6467.93179197   294.45275357]
Reward: -1  Episode Reward:  -289
xxxxx
x. ax
xg. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  1.99790188e+04 -3.22965309e-01  5.47587418e+02]
------
Step:20, Action:South
State  138
Old Q Values:  [ 2.70051915e+02  1.99790188e+04 -3.22965309e-01  5.47587418e+02]
New Q values:  [ 2.70051915e+02  1.60074681e+04 -3.22965309e-01  5.47587418e+02]
Reward: -1  Episode Reward:  -290
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2813.98689911 26721.53521276   790.72804752  1673.56197941]
------
Step:21, Action:South
State  210
Old Q Values:  [ 2813.98689911 26721.53521276   790.72804752  1673.56197941]
New Q values:  [ 2813.98689911 13854.64768837   790.72804752  1673.56197941]
Reward: -1  Episode Reward:  -291
xxxxx
x.  x
x.. x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10555.44534424 -6442.16912869 -8192.20126966  1597.9654961 ]
------
Step:22, Action:North
State  288
Old Q Values:  [10555.44534424 -6442.16912869 -8192.20126966  1597.9654961 ]
New Q values:  [ 7016.2554592  -6442.16912869 -8192.20126966  1597.9654961 ]
Reward: -1  Episode Reward:  -292
xxxxx
x.  x
x.gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9315.59107168  1158.03944328 -4584.50430574  1067.63960005]
------
Step:23, Action:North
State  208
Old Q Values:  [ 9315.59107168  1158.03944328 -4584.50430574  1067.63960005]
New Q values:  [ 4743.74394261  1158.03944328 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  -293
xxxxx
x.gax
x.. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3393.69171311 -2383.80019164  2425.1675585 ]
------
Step:24, Action:South
State  138
Old Q Values:  [ 2.70051915e+02  1.60074681e+04 -3.22965309e-01  5.47587418e+02]
New Q values:  [ 2.70051915e+02  7.82551041e+03 -3.22965309e-01  5.47587418e+02]
Reward: -1  Episode Reward:  -294
xxxxx
x.  x
x.gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4743.74394261  1158.03944328 -4584.50430574  1067.63960005]
------
Step:25, Action:North
State  210
Old Q Values:  [ 2813.98689911 13854.64768837   790.72804752  1673.56197941]
New Q values:  [ 3472.64788281 13854.64768837   790.72804752  1673.56197941]
Reward: -1  Episode Reward:  -295
xxxxx
x. ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  7.82551041e+03 -3.22965309e-01  5.47587418e+02]
------
Step:26, Action:South
State  138
Old Q Values:  [ 2.70051915e+02  7.82551041e+03 -3.22965309e-01  5.47587418e+02]
New Q values:  [ 2.70051915e+02  7.28599847e+03 -3.22965309e-01  5.47587418e+02]
Reward: -1  Episode Reward:  -296
xxxxx
x.  x
x..ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3472.64788281 13854.64768837   790.72804752  1673.56197941]
------
Step:27, Action:North
State  210
Old Q Values:  [ 3472.64788281 13854.64768837   790.72804752  1673.56197941]
New Q values:  [ 3574.25869434 13854.64768837   790.72804752  1673.56197941]
Reward: -1  Episode Reward:  -297
xxxxx
x. ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  7.28599847e+03 -3.22965309e-01  5.47587418e+02]
------
Step:28, Action:South
State  138
Old Q Values:  [ 2.70051915e+02  7.28599847e+03 -3.22965309e-01  5.47587418e+02]
New Q values:  [ 2.70051915e+02  7.07019369e+03 -3.22965309e-01  5.47587418e+02]
Reward: -1  Episode Reward:  -298
xxxxx
x.  x
x..ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3574.25869434 13854.64768837   790.72804752  1673.56197941]
------
Step:29, Action:North
State  208
Old Q Values:  [ 4743.74394261  1158.03944328 -4584.50430574  1067.63960005]
New Q values:  [ 4017.95568548  1158.03944328 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  -299
xxxxx
x. ax
x..gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  7.07019369e+03 -3.22965309e-01  5.47587418e+02]
------
Step:30, Action:West
State  138
Old Q Values:  [ 2.70051915e+02  7.07019369e+03 -3.22965309e-01  5.47587418e+02]
New Q values:  [ 2.70051915e+02  7.07019369e+03 -3.22965309e-01  5.63691725e+02]
Reward: -1  Episode Reward:  -300
xxxxx
x.a x
x.. x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1150.85585886   295.54524228]
------
Step:31, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1150.85585886   295.54524228]
New Q values:  [ -253.44886264 -1902.20915811  2580.80045198   295.54524228]
Reward: -1  Episode Reward:  -301
xxxxx
x. ax
x.. x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  7.07019369e+03 -3.22965309e-01  5.63691725e+02]
------
Step:32, Action:South
State  138
Old Q Values:  [ 2.70051915e+02  7.07019369e+03 -3.22965309e-01  5.63691725e+02]
New Q values:  [ 2.70051915e+02  6.98387178e+03 -3.22965309e-01  5.63691725e+02]
Reward: -1  Episode Reward:  -302
xxxxx
x.  x
x..ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3574.25869434 13854.64768837   790.72804752  1673.56197941]
------
Step:33, Action:South
State  208
Old Q Values:  [ 4017.95568548  1158.03944328 -4584.50430574  1067.63960005]
New Q values:  [ 4017.95568548  2567.49241507 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  -303
xxxxx
x.  x
x.g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7016.2554592  -6442.16912869 -8192.20126966  1597.9654961 ]
------
Step:34, Action:North
State  288
Old Q Values:  [ 7016.2554592  -6442.16912869 -8192.20126966  1597.9654961 ]
New Q values:  [-1988.71111067 -6442.16912869 -8192.20126966  1597.9654961 ]
Reward: -10001  Episode Reward:  -10304
xxxxx
x.  x
x..gx
x . x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.61338534e+03  1.63194774e+03]
------
Step:1, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.32169573e+02  1.61338534e+03  1.63194774e+03]
New Q values:  [-6.00000000e-01  1.32169573e+02  1.61338534e+03  1.03161518e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 530.87108315  680.80379064 1244.78695138  358.5166536 ]
------
Step:2, Action:East
State  181
Old Q Values:  [ 327.12816671  343.2610772  3890.04589597  262.76946019]
New Q values:  [ 327.12816671  343.2610772  4605.80200201  262.76946019]
Reward: -10001  Episode Reward:  -9992
xxxxx
x...x
x g.x
x ..x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 530.87108315  680.80379064 1244.78695138  358.5166536 ]
------
Step:1, Action:East
State  181
Old Q Values:  [ 327.12816671  343.2610772  4605.80200201  262.76946019]
New Q values:  [ 327.12816671  343.2610772  2335.93239068  262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1627.37196626   635.35015003   534.04109446]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831  1627.37196626   635.35015003   534.04109446]
New Q values:  [-5922.26708831  1661.29363059   635.35015003   534.04109446]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 3349.81614694  677.18474264]
------
Step:3, Action:East
State  273
Old Q Values:  [ 677.52857079 -168.92307549 3349.81614694  677.18474264]
New Q values:  [ 677.52857079 -168.92307549 1818.7161076   677.18474264]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1988.71111067 -6442.16912869 -8192.20126966  1597.9654961 ]
------
Step:4, Action:West
State  288
Old Q Values:  [-1988.71111067 -6442.16912869 -8192.20126966  1597.9654961 ]
New Q values:  [-1988.71111067 -6442.16912869 -8192.20126966  1680.85057651]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3474.21459359   467.88635351]
------
Step:5, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3474.21459359   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799  1893.34101039   467.88635351]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x  .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1988.71111067 -6442.16912869 -8192.20126966  1680.85057651]
------
Step:6, Action:North
State  288
Old Q Values:  [-1988.71111067 -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 3366.30986224 -6442.16912869 -8192.20126966  1680.85057651]
Reward: 9  Episode Reward:  24
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3574.25869434 13854.64768837   790.72804752  1673.56197941]
------
Step:7, Action:South
State  210
Old Q Values:  [ 3574.25869434 13854.64768837   790.72804752  1673.56197941]
New Q values:  [3574.25869434 6551.15203402  790.72804752 1673.56197941]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3366.30986224 -6442.16912869 -8192.20126966  1680.85057651]
------
Step:8, Action:North
State  288
Old Q Values:  [ 3366.30986224 -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 3311.2695551  -6442.16912869 -8192.20126966  1680.85057651]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3574.25869434 6551.15203402  790.72804752 1673.56197941]
------
Step:9, Action:North
State  208
Old Q Values:  [ 4017.95568548  2567.49241507 -4584.50430574  1067.63960005]
New Q values:  [17634.83164861  2567.49241507 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  31
xxxxx
x..ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  2621.40371949  -180.00807518 53407.49791471]
------
Step:10, Action:West
State  128
Old Q Values:  [ 11374.93691792  10380.76024543  -8652.84       117666.74763452]
New Q values:  [11374.93691792 10380.76024543 -8652.84       68375.61054215]
Reward: 9  Episode Reward:  40
xxxxx
x.agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.          2847.11106414 -5999.38454759 71011.70496116]
------
Step:11, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3519.10448991   482.11357944]
New Q values:  [-9594.56523706 -8069.05606225  3519.10448991   746.54875304]
Reward: 9  Episode Reward:  49
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        1827.67773756   86.99637671    0.        ]
------
Step:12, Action:South
State  107
Old Q Values:  [-252.35169558  380.26082847  591.68300325 -252.78192178]
New Q values:  [-252.35169558  256.38118883  591.68300325 -252.78192178]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 349.5895248     0.           29.66548264 -178.98      ]
------
Step:13, Action:North
State  187
Old Q Values:  [ 429.84695601    0.         4039.55692722    0.        ]
New Q values:  [ 348.84368338    0.         4039.55692722    0.        ]
Reward: -1  Episode Reward:  47
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  256.38118883  591.68300325 -252.78192178]
------
Step:14, Action:East
State  107
Old Q Values:  [-252.35169558  256.38118883  591.68300325 -252.78192178]
New Q values:  [-252.35169558  256.38118883 1010.3133369  -252.78192178]
Reward: -1  Episode Reward:  46
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2580.80045198   295.54524228]
------
Step:15, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  6467.93179197   294.45275357]
New Q values:  [ -281.736      -1150.91067548  4681.73425212   294.45275357]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  6.98387178e+03 -3.22965309e-01  5.63691725e+02]
------
Step:16, Action:South
State  138
Old Q Values:  [ 2.70051915e+02  6.98387178e+03 -3.22965309e-01  5.63691725e+02]
New Q values:  [ 2.70051915e+02  4.85173899e+03 -3.22965309e-01  5.63691725e+02]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x  ax
x. gx
xxxxx
Step:17, Action:South
State  216
Old Q Values:  [ -425.46413413   858.74745653 -8896.20691497 -1139.7029775 ]
New Q values:  [ -425.46413413 -5399.47678043 -8896.20691497 -1139.7029775 ]
Reward: -10001  Episode Reward:  -9957
xxxxx
x   x
x  gx
x.  x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 327.12816671  343.2610772  2335.93239068  262.76946019]
------
Step:1, Action:East
State  189
Old Q Values:  [ 275.08817949 2886.59923314 3433.80510082  154.04646645]
New Q values:  [ 275.08817949 2886.59923314 1877.3101295   154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1661.29363059   635.35015003   534.04109446]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831  1661.29363059   635.35015003   534.04109446]
New Q values:  [-5922.26708831  1215.53228452   635.35015003   534.04109446]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 1818.7161076   677.18474264]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1893.34101039   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799  1756.11727069   467.88635351]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3311.2695551  -6442.16912869 -8192.20126966  1680.85057651]
------
Step:4, Action:North
State  288
Old Q Values:  [ 3311.2695551  -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 6620.35731662 -6442.16912869 -8192.20126966  1680.85057651]
Reward: 9  Episode Reward:  36
xxxxx
x..gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17634.83164861  2567.49241507 -4584.50430574  1067.63960005]
------
Step:5, Action:South
State  208
Old Q Values:  [17634.83164861  2567.49241507 -4584.50430574  1067.63960005]
New Q values:  [17634.83164861  3012.50416102 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  35
xxxxx
x.g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6620.35731662 -6442.16912869 -8192.20126966  1680.85057651]
------
Step:6, Action:North
State  288
Old Q Values:  [ 6620.35731662 -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 7937.99242123 -6442.16912869 -8192.20126966  1680.85057651]
Reward: -1  Episode Reward:  34
xxxxx
x.. x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17634.83164861  3012.50416102 -4584.50430574  1067.63960005]
------
Step:7, Action:North
State  208
Old Q Values:  [17634.83164861  3012.50416102 -4584.50430574  1067.63960005]
New Q values:  [23075.58203386  3012.50416102 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  33
xxxxx
x..ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  2621.40371949  -180.00807518 53407.49791471]
------
Step:8, Action:West
State  130
Old Q Values:  [26290.62961917  2621.40371949  -180.00807518 53407.49791471]
New Q values:  [26290.62961917  2621.40371949  -180.00807518 60369.1727486 ]
Reward: 9  Episode Reward:  42
xxxxx
x.a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.30002579e+05]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  4681.73425212   294.45275357]
New Q values:  [ -281.736      -1150.91067548  4681.73425212   426.2751025 ]
Reward: 9  Episode Reward:  51
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  256.38118883 1010.3133369  -252.78192178]
------
Step:10, Action:East
State  98
Old Q Values:  [    0.         43483.63171141 69821.51860553     0.        ]
New Q values:  [    0.         43483.63171141 48660.81170179     0.        ]
Reward: -1  Episode Reward:  50
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18108.15151622 69109.34753192]
------
Step:11, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  4681.73425212   426.2751025 ]
New Q values:  [ -281.736      -1150.91067548  4681.73425212   473.00404207]
Reward: -1  Episode Reward:  49
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  256.38118883 1010.3133369  -252.78192178]
------
Step:12, Action:East
State  107
Old Q Values:  [-252.35169558  256.38118883 1010.3133369  -252.78192178]
New Q values:  [-252.35169558  256.38118883 1177.76547035 -252.78192178]
Reward: -1  Episode Reward:  48
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2580.80045198   295.54524228]
------
Step:13, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2580.80045198   295.54524228]
New Q values:  [ -253.44886264 -1902.20915811  2487.24187725   295.54524228]
Reward: -1  Episode Reward:  47
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  4.85173899e+03 -3.22965309e-01  5.63691725e+02]
------
Step:14, Action:West
State  138
Old Q Values:  [ 2.70051915e+02  4.85173899e+03 -3.22965309e-01  5.63691725e+02]
New Q values:  [ 2.70051915e+02  4.85173899e+03 -3.22965309e-01  9.71049253e+02]
Reward: -1  Episode Reward:  46
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2487.24187725   295.54524228]
------
Step:15, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2487.24187725   295.54524228]
New Q values:  [ -253.44886264 -1902.20915811  2449.81844735   295.54524228]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  4.85173899e+03 -3.22965309e-01  9.71049253e+02]
------
Step:16, Action:West
State  138
Old Q Values:  [ 2.70051915e+02  4.85173899e+03 -3.22965309e-01  9.71049253e+02]
New Q values:  [ 2.70051915e+02  4.85173899e+03 -3.22965309e-01  1.12276524e+03]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2449.81844735   295.54524228]
------
Step:17, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2449.81844735   295.54524228]
New Q values:  [ -253.44886264 -1902.20915811  2434.84907539   295.54524228]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  4.85173899e+03 -3.22965309e-01  1.12276524e+03]
------
Step:18, Action:West
State  138
Old Q Values:  [ 2.70051915e+02  4.85173899e+03 -3.22965309e-01  1.12276524e+03]
New Q values:  [ 2.70051915e+02  4.85173899e+03 -3.22965309e-01  1.17896082e+03]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2434.84907539   295.54524228]
------
Step:19, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2434.84907539   295.54524228]
New Q values:  [ -253.44886264 -1902.20915811  2428.86132661   295.54524228]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x  gx
x.  x
xxxxx
Step:20, Action:South
State  138
Old Q Values:  [ 2.70051915e+02  4.85173899e+03 -3.22965309e-01  1.17896082e+03]
New Q values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  1.17896082e+03]
Reward: -10001  Episode Reward:  -9960
xxxxx
x  ax
x   x
x. gx
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3574.25869434 6551.15203402  790.72804752 1673.56197941]
------
Step:1, Action:South
State  208
Old Q Values:  [23075.58203386  3012.50416102 -4584.50430574  1067.63960005]
New Q values:  [23075.58203386  3591.79939078 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7937.99242123 -6442.16912869 -8192.20126966  1680.85057651]
------
Step:2, Action:North
State  288
Old Q Values:  [ 7937.99242123 -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 5139.9425787  -6442.16912869 -8192.20126966  1680.85057651]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3574.25869434 6551.15203402  790.72804752 1673.56197941]
------
Step:3, Action:South
State  210
Old Q Values:  [3574.25869434 6551.15203402  790.72804752 1673.56197941]
New Q values:  [ 3574.25869434 -1838.15641278   790.72804752  1673.56197941]
Reward: -10001  Episode Reward:  -9993
xxxxx
x...x
x.. x
x. gx
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
Step:1, Action:West
State  138
Old Q Values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  1.17896082e+03]
New Q values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  1.88150460e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4681.73425212   473.00404207]
------
Step:2, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2428.86132661   295.54524228]
New Q values:  [ -253.44886264 -1902.20915811  1535.39591135   295.54524228]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  1.88150460e+03]
------
Step:3, Action:West
State  138
Old Q Values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  1.88150460e+03]
New Q values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  2.15652212e+03]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4681.73425212   473.00404207]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1535.39591135   295.54524228]
New Q values:  [ -253.44886264 -1902.20915811  1260.51499951   295.54524228]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  2.15652212e+03]
------
Step:5, Action:West
State  138
Old Q Values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  2.15652212e+03]
New Q values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  2.26652912e+03]
Reward: -1  Episode Reward:  5
xxxxx
x.a x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4681.73425212   473.00404207]
------
Step:6, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1260.51499951   295.54524228]
New Q values:  [ -253.44886264 -1902.20915811  1183.56473648   295.54524228]
Reward: -1  Episode Reward:  4
xxxxx
x. ax
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  2.26652912e+03]
------
Step:7, Action:West
State  138
Old Q Values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  2.26652912e+03]
New Q values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  2.31053192e+03]
Reward: -1  Episode Reward:  3
xxxxx
x.a x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4681.73425212   473.00404207]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  4681.73425212   473.00404207]
New Q values:  [ -281.736      -1150.91067548  2565.25327821   473.00404207]
Reward: -1  Episode Reward:  2
xxxxx
x. ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  2.31053192e+03]
------
Step:9, Action:West
State  136
Old Q Values:  [ -170.77177351  3393.69171311 -2383.80019164  2425.1675585 ]
New Q values:  [ -170.77177351  3393.69171311 -2383.80019164 -3974.80162963]
Reward: -10001  Episode Reward:  -9999
xxxxx
x.g x
x. .x
x...x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2613.15055121    26.73544252 -1124.61252079   123.6214372 ]
------
Step:1, Action:North
State  261
Old Q Values:  [ 2613.15055121    26.73544252 -1124.61252079   123.6214372 ]
New Q values:  [ 1751.43993769    26.73544252 -1124.61252079   123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 327.12816671  343.2610772  2335.93239068  262.76946019]
------
Step:2, Action:East
State  189
Old Q Values:  [ 275.08817949 2886.59923314 1877.3101295   154.04646645]
New Q values:  [ 275.08817949 2886.59923314 9806.70769542  154.04646645]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.29934823e+03 8.30532462e+03 3.01679455e+04]
------
Step:3, Action:West
State  192
Old Q Values:  [3.89777037e-01 2.29934823e+03 8.30532462e+03 3.01679455e+04]
New Q values:  [3.89777037e-01 2.29934823e+03 8.30532462e+03 1.27673579e+04]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 327.12816671  343.2610772  2335.93239068  262.76946019]
------
Step:4, Action:South
State  183
Old Q Values:  [ 530.87108315  680.80379064 1244.78695138  358.5166536 ]
New Q values:  [ 530.87108315  797.15349756 1244.78695138  358.5166536 ]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1751.43993769    26.73544252 -1124.61252079   123.6214372 ]
------
Step:5, Action:North
State  261
Old Q Values:  [ 1751.43993769    26.73544252 -1124.61252079   123.6214372 ]
New Q values:  [ 1400.75569228    26.73544252 -1124.61252079   123.6214372 ]
Reward: -1  Episode Reward:  15
xxxxx
x...x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 327.12816671  343.2610772  2335.93239068  262.76946019]
------
Step:6, Action:South
State  181
Old Q Values:  [ 327.12816671  343.2610772  2335.93239068  262.76946019]
New Q values:  [ 327.12816671  556.93113857 2335.93239068  262.76946019]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1400.75569228    26.73544252 -1124.61252079   123.6214372 ]
------
Step:7, Action:North
State  261
Old Q Values:  [ 1400.75569228    26.73544252 -1124.61252079   123.6214372 ]
New Q values:  [  933.13836233    26.73544252 -1124.61252079   123.6214372 ]
Reward: -1  Episode Reward:  13
xxxxx
x...x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 530.87108315  797.15349756 1244.78695138  358.5166536 ]
------
Step:8, Action:East
State  183
Old Q Values:  [ 530.87108315  797.15349756 1244.78695138  358.5166536 ]
New Q values:  [530.87108315 797.15349756 981.33038323 358.5166536 ]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.61338534e+03  1.03161518e+03]
------
Step:9, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.32169573e+02  1.61338534e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.32169573e+02  1.71703175e+03  1.03161518e+03]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3574.25869434 -1838.15641278   790.72804752  1673.56197941]
------
Step:10, Action:North
State  210
Old Q Values:  [ 3574.25869434 -1838.15641278   790.72804752  1673.56197941]
New Q values:  [19545.85530232 -1838.15641278   790.72804752  1673.56197941]
Reward: 9  Episode Reward:  20
xxxxx
x..ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  2621.40371949  -180.00807518 60369.1727486 ]
------
Step:11, Action:West
State  130
Old Q Values:  [26290.62961917  2621.40371949  -180.00807518 60369.1727486 ]
New Q values:  [26290.62961917  2621.40371949  -180.00807518 44885.87335901]
Reward: 9  Episode Reward:  29
xxxxx
x.a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18108.15151622 69109.34753192]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  2565.25327821   473.00404207]
New Q values:  [ -281.736      -1150.91067548  2565.25327821   665.55384201]
Reward: 9  Episode Reward:  38
xxxxx
xa  x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176 1569.8407506  -120.29354603]
------
Step:13, Action:East
State  110
Old Q Values:  [-239.29051573  784.08676736 1056.8346464  -180.6       ]
New Q values:  [-239.29051573  784.08676736 1191.70984202 -180.6       ]
Reward: -1  Episode Reward:  37
xxxxx
x a x
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2565.25327821   665.55384201]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2565.25327821   665.55384201]
New Q values:  [ -281.736      -1150.91067548  1718.66088865   665.55384201]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  2.31053192e+03]
------
Step:15, Action:West
State  138
Old Q Values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  2.31053192e+03]
New Q values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  1.19196004e+03]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         331.64678262 775.91012596 894.49091028]
------
Step:16, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1718.66088865   665.55384201]
New Q values:  [ -281.736      -1150.91067548  1718.66088865   736.57376198]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176 1569.8407506  -120.29354603]
------
Step:17, Action:East
State  111
Old Q Values:  [-177.44732869 1050.56650176 1569.8407506  -120.29354603]
New Q values:  [-177.44732869 1050.56650176  982.40572119 -120.29354603]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1183.56473648   295.54524228]
------
Step:18, Action:East
State  121
Old Q Values:  [    0.             0.         -7059.48304203   331.71025071]
New Q values:  [    0.             0.         -7806.28570288   331.71025071]
Reward: -10001  Episode Reward:  -9968
xxxxx
x  gx
x   x
x ..x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[530.87108315 797.15349756 981.33038323 358.5166536 ]
------
Step:1, Action:East
State  181
Old Q Values:  [ 327.12816671  556.93113857 2335.93239068  262.76946019]
New Q values:  [ 327.12816671  556.93113857 1304.43264163  262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1215.53228452   635.35015003   534.04109446]
------
Step:2, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.29934823e+03 8.30532462e+03 1.27673579e+04]
New Q values:  [3.89777037e-01 1.45197447e+03 8.30532462e+03 1.27673579e+04]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1756.11727069   467.88635351]
------
Step:3, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1756.11727069   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799  2243.82968188   467.88635351]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5139.9425787  -6442.16912869 -8192.20126966  1680.85057651]
------
Step:4, Action:North
State  288
Old Q Values:  [ 5139.9425787  -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 7925.13362217 -6442.16912869 -8192.20126966  1680.85057651]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[19545.85530232 -1838.15641278   790.72804752  1673.56197941]
------
Step:5, Action:North
State  210
Old Q Values:  [19545.85530232 -1838.15641278   790.72804752  1673.56197941]
New Q values:  [21289.50412863 -1838.15641278   790.72804752  1673.56197941]
Reward: 9  Episode Reward:  35
xxxxx
x..ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  2621.40371949  -180.00807518 44885.87335901]
------
Step:6, Action:West
State  130
Old Q Values:  [26290.62961917  2621.40371949  -180.00807518 44885.87335901]
New Q values:  [26290.62961917  2621.40371949  -180.00807518 38692.55360318]
Reward: 9  Episode Reward:  44
xxxxx
x.a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18108.15151622 69109.34753192]
------
Step:7, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1183.56473648   295.54524228]
New Q values:  [ -253.44886264 -1902.20915811  1183.56473648   476.94773802]
Reward: 9  Episode Reward:  53
xxxxx
xa  x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  256.38118883 1177.76547035 -252.78192178]
------
Step:8, Action:East
State  107
Old Q Values:  [-252.35169558  256.38118883 1177.76547035 -252.78192178]
New Q values:  [-252.35169558  256.38118883  825.57560909 -252.78192178]
Reward: -1  Episode Reward:  52
xxxxx
x a x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1183.56473648   476.94773802]
------
Step:9, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1718.66088865   736.57376198]
New Q values:  [ -281.736      -1150.91067548  1044.45236833   736.57376198]
Reward: -1  Episode Reward:  51
xxxxx
x  ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  1.19196004e+03]
------
Step:10, Action:West
State  136
Old Q Values:  [ -170.77177351  3393.69171311 -2383.80019164 -3974.80162963]
New Q values:  [ -170.77177351  3393.69171311 -2383.80019164 -6534.78930488]
Reward: -10001  Episode Reward:  -9950
xxxxx
x g x
x   x
x.  x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1215.53228452   635.35015003   534.04109446]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831  1215.53228452   635.35015003   534.04109446]
New Q values:  [-5922.26708831  1037.22774609   635.35015003   534.04109446]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x. gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 1818.7161076   677.18474264]
------
Step:2, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  2243.82968188   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799  3280.47195941   467.88635351]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7925.13362217 -6442.16912869 -8192.20126966  1680.85057651]
------
Step:3, Action:North
State  288
Old Q Values:  [ 7925.13362217 -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [10098.12805903 -6442.16912869 -8192.20126966  1680.85057651]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23075.58203386  3591.79939078 -4584.50430574  1067.63960005]
------
Step:4, Action:North
State  208
Old Q Values:  [23075.58203386  3591.79939078 -4584.50430574  1067.63960005]
New Q values:  [10247.74032748  3591.79939078 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  26
xxxxx
xg.ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3393.69171311 -2383.80019164 -6534.78930488]
------
Step:5, Action:South
State  130
Old Q Values:  [26290.62961917  2621.40371949  -180.00807518 38692.55360318]
New Q values:  [26290.62961917  4122.28358604  -180.00807518 38692.55360318]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10247.74032748  3591.79939078 -4584.50430574  1067.63960005]
------
Step:6, Action:North
State  208
Old Q Values:  [10247.74032748  3591.79939078 -4584.50430574  1067.63960005]
New Q values:  [ 5116.60364492  3591.79939078 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  24
xxxxx
xg.ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3393.69171311 -2383.80019164 -6534.78930488]
------
Step:7, Action:South
State  136
Old Q Values:  [ -170.77177351  3393.69171311 -2383.80019164 -6534.78930488]
New Q values:  [ -170.77177351  2891.85777872 -2383.80019164 -6534.78930488]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5116.60364492  3591.79939078 -4584.50430574  1067.63960005]
------
Step:8, Action:North
State  208
Old Q Values:  [ 5116.60364492  3591.79939078 -4584.50430574  1067.63960005]
New Q values:  [16558.72462062  3591.79939078 -4584.50430574  1067.63960005]
Reward: -10001  Episode Reward:  -9978
xxxxx
x..gx
x.  x
x.  x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21289.50412863 -1838.15641278   790.72804752  1673.56197941]
------
Step:1, Action:North
State  210
Old Q Values:  [21289.50412863 -1838.15641278   790.72804752  1673.56197941]
New Q values:  [ 8878.78966432 -1838.15641278   790.72804752  1673.56197941]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  1.19196004e+03]
------
Step:2, Action:West
State  138
Old Q Values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  1.19196004e+03]
New Q values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  7.95519728e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1044.45236833   736.57376198]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1044.45236833   736.57376198]
New Q values:  [ -281.736      -1150.91067548   655.83686563   736.57376198]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  7.95519728e+02]
------
Step:4, Action:West
State  138
Old Q Values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  7.95519728e+02]
New Q values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  5.38580020e+02]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   655.83686563   736.57376198]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   655.83686563   736.57376198]
New Q values:  [ -281.736      -1150.91067548   655.83686563   657.5424574 ]
Reward: 9  Episode Reward:  25
xxxxx
xa  x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  784.08676736 1191.70984202 -180.6       ]
------
Step:6, Action:East
State  110
Old Q Values:  [-239.29051573  784.08676736 1191.70984202 -180.6       ]
New Q values:  [-239.29051573  784.08676736  673.34667403 -180.6       ]
Reward: -1  Episode Reward:  24
xxxxx
x a x
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   655.83686563   657.5424574 ]
------
Step:7, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3519.10448991   746.54875304]
New Q values:  [-9594.56523706 -8069.05606225  3519.10448991 -4838.84945258]
Reward: -10001  Episode Reward:  -9977
xxxxx
xg  x
x.. x
x ..x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  5.38580020e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  5.38580020e+02]
New Q values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  5.75901429e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1183.56473648   476.94773802]
------
Step:2, Action:East
State  121
Old Q Values:  [    0.             0.         -7806.28570288   331.71025071]
New Q values:  [    0.             0.         -8255.55694753   331.71025071]
Reward: -10001  Episode Reward:  -9992
xxxxx
x. gx
x. .x
x...x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.45197447e+03 8.30532462e+03 1.27673579e+04]
------
Step:1, Action:West
State  196
Old Q Values:  [-2469.90645144   764.61907621 22698.31290191   239.04887894]
New Q values:  [-2469.90645144   764.61907621 22698.31290191   492.34934406]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 327.12816671  556.93113857 1304.43264163  262.76946019]
------
Step:2, Action:South
State  183
Old Q Values:  [530.87108315 797.15349756 981.33038323 358.5166536 ]
New Q values:  [530.87108315 604.20290772 981.33038323 358.5166536 ]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  933.13836233    26.73544252 -1124.61252079   123.6214372 ]
------
Step:3, Action:North
State  261
Old Q Values:  [  933.13836233    26.73544252 -1124.61252079   123.6214372 ]
New Q values:  [  763.98513742    26.73544252 -1124.61252079   123.6214372 ]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 327.12816671  556.93113857 1304.43264163  262.76946019]
------
Step:4, Action:South
State  183
Old Q Values:  [530.87108315 604.20290772 981.33038323 358.5166536 ]
New Q values:  [530.87108315 470.27670432 981.33038323 358.5166536 ]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  763.98513742    26.73544252 -1124.61252079   123.6214372 ]
------
Step:5, Action:North
State  261
Old Q Values:  [  763.98513742    26.73544252 -1124.61252079   123.6214372 ]
New Q values:  [  599.39316994    26.73544252 -1124.61252079   123.6214372 ]
Reward: -1  Episode Reward:  15
xxxxx
x. .x
xa .x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[530.87108315 470.27670432 981.33038323 358.5166536 ]
------
Step:6, Action:East
State  189
Old Q Values:  [ 275.08817949 2886.59923314 9806.70769542  154.04646645]
New Q values:  [ 275.08817949 2886.59923314 4233.251402    154.04646645]
Reward: -1  Episode Reward:  14
xxxxx
x. .x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1037.22774609   635.35015003   534.04109446]
------
Step:7, Action:South
State  196
Old Q Values:  [-2469.90645144   764.61907621 22698.31290191   492.34934406]
New Q values:  [-2469.90645144   946.49196853 22698.31290191   492.34934406]
Reward: 9  Episode Reward:  23
xxxxx
x. .x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824  2117.48112683]
------
Step:8, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   855.18004824  2117.48112683]
New Q values:  [   16.82637525 -5807.06396197   855.18004824  1026.21040171]
Reward: -1  Episode Reward:  22
xxxxx
x.g.x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  599.39316994    26.73544252 -1124.61252079   123.6214372 ]
------
Step:9, Action:North
State  261
Old Q Values:  [  599.39316994    26.73544252 -1124.61252079   123.6214372 ]
New Q values:  [ 3.29881273e+04  2.67354425e+01 -1.12461252e+03  1.23621437e+02]
Reward: -1  Episode Reward:  21
xxxxx
x. gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  10235.04946499      0.        ]
------
Step:10, Action:North
State  181
Old Q Values:  [ 327.12816671  556.93113857 1304.43264163  262.76946019]
New Q values:  [ 666.89760138  556.93113857 1304.43264163  262.76946019]
Reward: 9  Episode Reward:  30
xxxxx
xa .x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1768.82111565  238.35800069    0.        ]
------
Step:11, Action:South
State  103
Old Q Values:  [ 221.30610858 1768.82111565  238.35800069    0.        ]
New Q values:  [ 221.30610858 1001.32756123  238.35800069    0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x  .x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[530.87108315 470.27670432 981.33038323 358.5166536 ]
------
Step:12, Action:East
State  183
Old Q Values:  [530.87108315 470.27670432 981.33038323 358.5166536 ]
New Q values:  [ 530.87108315  470.27670432 5033.33960352  358.5166536 ]
Reward: -1  Episode Reward:  28
xxxxx
x  .x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:13, Action:East
State  199
Old Q Values:  [  14.86214194  479.07551978 1337.73675385 1915.70494401]
New Q values:  [  14.86214194  479.07551978 3204.13160084 1915.70494401]
Reward: 9  Episode Reward:  37
xxxxx
x  .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 8878.78966432 -1838.15641278   790.72804752  1673.56197941]
------
Step:14, Action:North
State  218
Old Q Values:  [ 604.7965618  6862.634248      0.         1863.19740327]
New Q values:  [ 420.08905336 6862.634248      0.         1863.19740327]
Reward: 9  Episode Reward:  46
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  5.75901429e+02]
------
Step:15, Action:West
State  138
Old Q Values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  5.75901429e+02]
New Q values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  4.98107845e+02]
Reward: -1  Episode Reward:  45
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         331.64678262 775.91012596 894.49091028]
------
Step:16, Action:West
State  124
Old Q Values:  [   0.         1166.51141701 4292.78893337  417.42846525]
New Q values:  [   0.         1166.51141701 4292.78893337  858.10195218]
Reward: -1  Episode Reward:  44
xxxxx
xag x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  2305.7685536  -2165.66138672  -180.6       ]
------
Step:17, Action:South
State  108
Old Q Values:  [-8463.16477134  2877.10348735   845.00690416     0.        ]
New Q values:  [-8463.16477134  2259.8349134    845.00690416     0.        ]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  3698.64506154  1470.30928837     0.        ]
------
Step:18, Action:South
State  188
Old Q Values:  [-6523.78898263  3698.64506154  1470.30928837     0.        ]
New Q values:  [-6523.78898263  1776.31889064  1470.30928837     0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  752.72159273 -2735.46306511   991.53622009 -5679.36893145]
------
Step:19, Action:East
State  261
Old Q Values:  [ 3.29881273e+04  2.67354425e+01 -1.12461252e+03  1.23621437e+02]
New Q values:  [ 3.29881273e+04  2.67354425e+01 -1.42581888e+02  1.23621437e+02]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824  1026.21040171]
------
Step:20, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   855.18004824  1026.21040171]
New Q values:  [   16.82637525 -5807.06396197   855.18004824   707.34502671]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  752.72159273 -2735.46306511   991.53622009 -5679.36893145]
------
Step:21, Action:East
State  260
Old Q Values:  [  752.72159273 -2735.46306511   991.53622009 -5679.36893145]
New Q values:  [  752.72159273 -2735.46306511   652.56850251 -5679.36893145]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824   707.34502671]
------
Step:22, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3280.47195941   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799 64347.02720147   467.88635351]
Reward: 100009  Episode Reward:  100048
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16558.72462062  3591.79939078 -4584.50430574  1067.63960005]
------
Step:1, Action:North
State  216
Old Q Values:  [ -425.46413413 -5399.47678043 -8896.20691497 -1139.7029775 ]
New Q values:  [  702.77167996 -5399.47678043 -8896.20691497 -1139.7029775 ]
Reward: 9  Episode Reward:  9
xxxxx
xg.ax
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2891.85777872 -2383.80019164 -6534.78930488]
------
Step:2, Action:South
State  138
Old Q Values:  [ 2.70051915e+02 -2.60438271e+03 -3.22965309e-01  4.98107845e+02]
New Q values:  [ 2.70051915e+02  3.92526430e+03 -3.22965309e-01  4.98107845e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16558.72462062  3591.79939078 -4584.50430574  1067.63960005]
------
Step:3, Action:North
State  216
Old Q Values:  [  702.77167996 -5399.47678043 -8896.20691497 -1139.7029775 ]
New Q values:  [ 1458.08796285 -5399.47678043 -8896.20691497 -1139.7029775 ]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  3.92526430e+03 -3.22965309e-01  4.98107845e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [ 2.70051915e+02  3.92526430e+03 -3.22965309e-01  4.98107845e+02]
New Q values:  [ 2.70051915e+02  5.37123107e+02 -3.22965309e-01  4.98107845e+02]
Reward: -10001  Episode Reward:  -9994
xxxxx
x.. x
x .gx
x...x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16558.72462062  3591.79939078 -4584.50430574  1067.63960005]
------
Step:1, Action:North
State  216
Old Q Values:  [ 1458.08796285 -5399.47678043 -8896.20691497 -1139.7029775 ]
New Q values:  [  749.77211734 -5399.47678043 -8896.20691497 -1139.7029775 ]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  5.37123107e+02 -3.22965309e-01  4.98107845e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 2.70051915e+02  5.37123107e+02 -3.22965309e-01  4.98107845e+02]
New Q values:  [ 2.70051915e+02  2.87788614e+03 -3.22965309e-01  4.98107845e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 8878.78966432 -1838.15641278   790.72804752  1673.56197941]
------
Step:3, Action:North
State  210
Old Q Values:  [ 8878.78966432 -1838.15641278   790.72804752  1673.56197941]
New Q values:  [ 4414.2817084  -1838.15641278   790.72804752  1673.56197941]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  2.87788614e+03 -3.22965309e-01  4.98107845e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [ 2.70051915e+02  2.87788614e+03 -3.22965309e-01  4.98107845e+02]
New Q values:  [ 2.70051915e+02  1.18171843e+02 -3.22965309e-01  4.98107845e+02]
Reward: -10001  Episode Reward:  -9994
xxxxx
x.  x
x..gx
x...x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.32169573e+02  1.71703175e+03  1.03161518e+03]
------
Step:1, Action:East
State  195
Old Q Values:  [  38.85388605 4182.6894674  1849.21327227 1169.39963074]
New Q values:  [  38.85388605 4182.6894674  2069.36982143 1169.39963074]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4414.2817084  -1838.15641278   790.72804752  1673.56197941]
------
Step:2, Action:North
State  208
Old Q Values:  [16558.72462062  3591.79939078 -4584.50430574  1067.63960005]
New Q values:  [ 6778.32220163  3591.79939078 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  1.18171843e+02 -3.22965309e-01  4.98107845e+02]
------
Step:3, Action:West
State  136
Old Q Values:  [ -170.77177351  2891.85777872 -2383.80019164 -6534.78930488]
New Q values:  [ -170.77177351  2891.85777872 -2383.80019164 -2509.00264674]
Reward: 9  Episode Reward:  27
xxxxx
x.agx
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   331.71025071]
------
Step:4, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1183.56473648   476.94773802]
New Q values:  [ -253.44886264 -1902.20915811  1183.56473648   443.85177793]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  256.38118883  825.57560909 -252.78192178]
------
Step:5, Action:East
State  107
Old Q Values:  [-252.35169558  256.38118883  825.57560909 -252.78192178]
New Q values:  [-252.35169558  256.38118883  684.69966458 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1183.56473648   443.85177793]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   655.83686563   657.5424574 ]
New Q values:  [ -281.736      -1150.91067548   411.16709963   657.5424574 ]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  1.18171843e+02 -3.22965309e-01  4.98107845e+02]
------
Step:7, Action:West
State  138
Old Q Values:  [ 2.70051915e+02  1.18171843e+02 -3.22965309e-01  4.98107845e+02]
New Q values:  [ 2.70051915e+02  1.18171843e+02 -3.22965309e-01  5.53712559e+02]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1183.56473648   443.85177793]
------
Step:8, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1183.56473648   443.85177793]
New Q values:  [ -253.44886264 -1902.20915811   638.93966223   443.85177793]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  1.18171843e+02 -3.22965309e-01  5.53712559e+02]
------
Step:9, Action:West
State  138
Old Q Values:  [ 2.70051915e+02  1.18171843e+02 -3.22965309e-01  5.53712559e+02]
New Q values:  [ 2.70051915e+02  1.18171843e+02 -3.22965309e-01  4.18147761e+02]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   411.16709963   657.5424574 ]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   411.16709963   657.5424574 ]
New Q values:  [ -281.736      -1150.91067548   411.16709963   462.69374745]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -5363.03361968   667.58921495  -180.6       ]
------
Step:11, Action:East
State  107
Old Q Values:  [-252.35169558  256.38118883  684.69966458 -252.78192178]
New Q values:  [-252.35169558  256.38118883  412.08799007 -252.78192178]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   411.16709963   462.69374745]
------
Step:12, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3519.10448991 -4838.84945258]
New Q values:  [-9594.56523706 -8069.05606225  3519.10448991 -1387.83645976]
Reward: -1  Episode Reward:  28
xxxxx
xag x
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        1827.67773756   86.99637671    0.        ]
------
Step:13, Action:South
State  105
Old Q Values:  [-180.6        1827.67773756   86.99637671    0.        ]
New Q values:  [-180.6         841.34795246   86.99637671    0.        ]
Reward: 9  Episode Reward:  37
xxxxx
x  gx
xa  x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 349.5895248     0.           29.66548264 -178.98      ]
------
Step:14, Action:North
State  185
Old Q Values:  [ 349.5895248     0.           29.66548264 -178.98      ]
New Q values:  [ 262.86220694    0.           29.66548264 -178.98      ]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  256.38118883  412.08799007 -252.78192178]
------
Step:15, Action:East
State  107
Old Q Values:  [-252.35169558  256.38118883  412.08799007 -252.78192178]
New Q values:  [-252.35169558  256.38118883  303.04332026 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   411.16709963   462.69374745]
------
Step:16, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   411.16709963   462.69374745]
New Q values:  [ -281.736      -1150.91067548   411.16709963   275.39049506]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  256.38118883  303.04332026 -252.78192178]
------
Step:17, Action:East
State  107
Old Q Values:  [-252.35169558  256.38118883  303.04332026 -252.78192178]
New Q values:  [-252.35169558  256.38118883  312.29922677 -252.78192178]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   638.93966223   443.85177793]
------
Step:18, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   638.93966223   443.85177793]
New Q values:  [ -253.44886264 -1902.20915811   380.42019311   443.85177793]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.70051915e+02  1.18171843e+02 -3.22965309e-01  4.18147761e+02]
------
Step:19, Action:West
State  138
Old Q Values:  [ 2.70051915e+02  1.18171843e+02 -3.22965309e-01  4.18147761e+02]
New Q values:  [270.05191518 118.17184308  -0.32296531 299.81463767]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   380.42019311   443.85177793]
------
Step:20, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   411.16709963   275.39049506]
New Q values:  [ -281.736      -1150.91067548   411.16709963   203.24596605]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  256.38118883  312.29922677 -252.78192178]
------
Step:21, Action:East
State  107
Old Q Values:  [-252.35169558  256.38118883  312.29922677 -252.78192178]
New Q values:  [-252.35169558  256.38118883  247.6698206  -252.78192178]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   411.16709963   203.24596605]
------
Step:22, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   411.16709963   203.24596605]
New Q values:  [ -281.736      -1150.91067548   253.81123116   203.24596605]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[270.05191518 118.17184308  -0.32296531 299.81463767]
------
Step:23, Action:West
State  138
Old Q Values:  [270.05191518 118.17184308  -0.32296531 299.81463767]
New Q values:  [270.05191518 118.17184308  -0.32296531 195.46922442]
Reward: -1  Episode Reward:  27
xxxxx
x a x
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   253.81123116   203.24596605]
------
Step:24, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3519.10448991 -1387.83645976]
New Q values:  [-9594.56523706 -8069.05606225  2274.59912958 -1387.83645976]
Reward: -1  Episode Reward:  26
xxxxx
x gax
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  2891.85777872 -2383.80019164 -2509.00264674]
------
Step:25, Action:South
State  136
Old Q Values:  [ -170.77177351  2891.85777872 -2383.80019164 -2509.00264674]
New Q values:  [ -170.77177351  1381.07474669 -2383.80019164 -2509.00264674]
Reward: -1  Episode Reward:  25
xxxxx
x  gx
x  ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  749.77211734 -5399.47678043 -8896.20691497 -1139.7029775 ]
------
Step:26, Action:West
State  216
Old Q Values:  [  749.77211734 -5399.47678043 -8896.20691497 -1139.7029775 ]
New Q values:  [  749.77211734 -5399.47678043 -8896.20691497   217.11349858]
Reward: -1  Episode Reward:  24
xxxxx
x g x
x a x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1737.6681949  2245.31563194 1141.49622464]
------
Step:27, Action:East
State  200
Old Q Values:  [  62.8218634  1737.6681949  2245.31563194 1141.49622464]
New Q values:  [  62.8218634  1737.6681949  1122.45788798 1141.49622464]
Reward: -1  Episode Reward:  23
xxxxx
xg  x
x  ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  749.77211734 -5399.47678043 -8896.20691497   217.11349858]
------
Step:28, Action:North
State  216
Old Q Values:  [  749.77211734 -5399.47678043 -8896.20691497   217.11349858]
New Q values:  [  380.32442149 -5399.47678043 -8896.20691497   217.11349858]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[270.05191518 118.17184308  -0.32296531 195.46922442]
------
Step:29, Action:North
State  138
Old Q Values:  [270.05191518 118.17184308  -0.32296531 195.46922442]
New Q values:  [  8.43634063 118.17184308  -0.32296531 195.46922442]
Reward: -301  Episode Reward:  -279
xxxxx
x  ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 118.17184308  -0.32296531 195.46922442]
------
Step:30, Action:West
State  138
Old Q Values:  [  8.43634063 118.17184308  -0.32296531 195.46922442]
New Q values:  [  8.43634063 118.17184308  -0.32296531 153.73105911]
Reward: -1  Episode Reward:  -280
xxxxx
x a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   253.81123116   203.24596605]
------
Step:31, Action:East
State  126
Old Q Values:  [  0.         331.64678262 775.91012596 894.49091028]
New Q values:  [  0.         331.64678262 355.88336812 894.49091028]
Reward: -1  Episode Reward:  -281
xxxxx
x  ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 118.17184308  -0.32296531 153.73105911]
------
Step:32, Action:West
State  138
Old Q Values:  [  8.43634063 118.17184308  -0.32296531 153.73105911]
New Q values:  [  8.43634063 118.17184308  -0.32296531 137.03579299]
Reward: -1  Episode Reward:  -282
xxxxx
x a x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   253.81123116   203.24596605]
------
Step:33, Action:East
State  126
Old Q Values:  [  0.         331.64678262 355.88336812 894.49091028]
New Q values:  [  0.         331.64678262 182.86408515 894.49091028]
Reward: -1  Episode Reward:  -283
xxxxx
x  ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 118.17184308  -0.32296531 137.03579299]
------
Step:34, Action:West
State  138
Old Q Values:  [  8.43634063 118.17184308  -0.32296531 137.03579299]
New Q values:  [  8.43634063 118.17184308  -0.32296531 130.35768654]
Reward: -1  Episode Reward:  -284
xxxxx
x a x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   253.81123116   203.24596605]
------
Step:35, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2274.59912958 -1387.83645976]
New Q values:  [-9594.56523706 -8069.05606225  1323.56207584 -1387.83645976]
Reward: -1  Episode Reward:  -285
xxxxx
xg ax
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1381.07474669 -2383.80019164 -2509.00264674]
------
Step:36, Action:South
State  136
Old Q Values:  [ -170.77177351  1381.07474669 -2383.80019164 -2509.00264674]
New Q values:  [ -170.77177351   665.92722512 -2383.80019164 -2509.00264674]
Reward: -1  Episode Reward:  -286
xxxxx
x g x
x  ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  380.32442149 -5399.47678043 -8896.20691497   217.11349858]
------
Step:37, Action:North
State  216
Old Q Values:  [  380.32442149 -5399.47678043 -8896.20691497   217.11349858]
New Q values:  [-5648.69206387 -5399.47678043 -8896.20691497   217.11349858]
Reward: -10001  Episode Reward:  -10287
xxxxx
x  gx
x   x
x. .x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   380.42019311   443.85177793]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   253.81123116   203.24596605]
New Q values:  [ -281.736      -1150.91067548   253.81123116   163.61274307]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  256.38118883  247.6698206  -252.78192178]
------
Step:2, Action:South
State  110
Old Q Values:  [-239.29051573  784.08676736  673.34667403 -180.6       ]
New Q values:  [-239.29051573  823.56832906  673.34667403 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa..x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -7507.54632711  1681.77874037     0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [ 530.87108315  470.27670432 5033.33960352  358.5166536 ]
New Q values:  [ 530.87108315  470.27670432 6660.14329163  358.5166536 ]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:4, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.22476467e+04 3.89099663e+03 9.06816004e+03]
New Q values:  [3.60604218e+00 1.22476467e+04 3.62058893e+03 9.06816004e+03]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 420.08905336 6862.634248      0.         1863.19740327]
------
Step:5, Action:West
State  216
Old Q Values:  [-5648.69206387 -5399.47678043 -8896.20691497   217.11349858]
New Q values:  [-5648.69206387 -5399.47678043 -8896.20691497   581.38588397]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.65046828e+03 -5.74006972e+03  2.00341972e+02]
------
Step:6, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  1.65046828e+03 -5.74006972e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  1.21120214e+03 -5.74006972e+03  2.00341972e+02]
Reward: 9  Episode Reward:  44
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 1818.7161076   677.18474264]
------
Step:7, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 64347.02720147   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799 28767.6492983    467.88635351]
Reward: -1  Episode Reward:  43
xxxxx
x g.x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10098.12805903 -6442.16912869 -8192.20126966  1680.85057651]
------
Step:8, Action:North
State  288
Old Q Values:  [10098.12805903 -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 6072.1478841  -6442.16912869 -8192.20126966  1680.85057651]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6778.32220163  3591.79939078 -4584.50430574  1067.63960005]
------
Step:9, Action:North
State  216
Old Q Values:  [-5648.69206387 -5399.47678043 -8896.20691497   581.38588397]
New Q values:  [-2214.96951958 -5399.47678043 -8896.20691497   581.38588397]
Reward: 9  Episode Reward:  51
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 118.17184308  -0.32296531 130.35768654]
------
Step:10, Action:West
State  138
Old Q Values:  [  8.43634063 118.17184308  -0.32296531 130.35768654]
New Q values:  [  8.43634063 118.17184308  -0.32296531 184.698608  ]
Reward: -1  Episode Reward:  50
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   380.42019311   443.85177793]
------
Step:11, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   253.81123116   163.61274307]
New Q values:  [ -281.736      -1150.91067548   253.81123116   141.75945388]
Reward: -1  Episode Reward:  49
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  256.38118883  247.6698206  -252.78192178]
------
Step:12, Action:South
State  107
Old Q Values:  [-252.35169558  256.38118883  247.6698206  -252.78192178]
New Q values:  [-252.35169558 1313.8195537   247.6698206  -252.78192178]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 348.84368338    0.         4039.55692722    0.        ]
------
Step:13, Action:East
State  187
Old Q Values:  [ 348.84368338    0.         4039.55692722    0.        ]
New Q values:  [ 348.84368338    0.         2827.90809619    0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  4042.28441766     0.        ]
------
Step:14, Action:East
State  200
Old Q Values:  [  62.8218634  1737.6681949  1122.45788798 1141.49622464]
New Q values:  [  62.8218634  1737.6681949   622.79892038 1141.49622464]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-2214.96951958 -5399.47678043 -8896.20691497   581.38588397]
------
Step:15, Action:North
State  218
Old Q Values:  [ 420.08905336 6862.634248      0.         1863.19740327]
New Q values:  [ 222.84520374 6862.634248      0.         1863.19740327]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 118.17184308  -0.32296531 184.698608  ]
------
Step:16, Action:West
State  138
Old Q Values:  [  8.43634063 118.17184308  -0.32296531 184.698608  ]
New Q values:  [  8.43634063 118.17184308  -0.32296531 206.43497658]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   380.42019311   443.85177793]
------
Step:17, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   380.42019311   443.85177793]
New Q values:  [ -253.44886264 -1902.20915811   380.42019311   571.08657728]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1313.8195537   247.6698206  -252.78192178]
------
Step:18, Action:South
State  107
Old Q Values:  [-252.35169558 1313.8195537   247.6698206  -252.78192178]
New Q values:  [-252.35169558  603.78648356  247.6698206  -252.78192178]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 262.86220694    0.           29.66548264 -178.98      ]
------
Step:19, Action:North
State  187
Old Q Values:  [ 348.84368338    0.         2827.90809619    0.        ]
New Q values:  [ 320.07341842    0.         2827.90809619    0.        ]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  603.78648356  247.6698206  -252.78192178]
------
Step:20, Action:South
State  98
Old Q Values:  [    0.         43483.63171141 48660.81170179     0.        ]
New Q values:  [    0.         43751.82428687 48660.81170179     0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:21, Action:East
State  184
Old Q Values:  [ 162.71210215    0.         6289.0510124     0.        ]
New Q values:  [ 162.71210215    0.         3036.32086343    0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1737.6681949   622.79892038 1141.49622464]
------
Step:22, Action:South
State  200
Old Q Values:  [  62.8218634  1737.6681949   622.79892038 1141.49622464]
New Q values:  [  62.8218634  9324.76206745  622.79892038 1141.49622464]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 28767.6492983    467.88635351]
------
Step:23, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 28767.6492983    467.88635351]
New Q values:  [-2527.46239811 -8521.23367799 13328.10408455   467.88635351]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6072.1478841  -6442.16912869 -8192.20126966  1680.85057651]
------
Step:24, Action:North
State  288
Old Q Values:  [ 6072.1478841  -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 2602.67491883 -6442.16912869 -8192.20126966  1680.85057651]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-2214.96951958 -5399.47678043 -8896.20691497   581.38588397]
------
Step:25, Action:North
State  216
Old Q Values:  [-2214.96951958 -5399.47678043 -8896.20691497   581.38588397]
New Q values:  [ -824.65731486 -5399.47678043 -8896.20691497   581.38588397]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 118.17184308  -0.32296531 206.43497658]
------
Step:26, Action:West
State  136
Old Q Values:  [ -170.77177351   665.92722512 -2383.80019164 -2509.00264674]
New Q values:  [ -170.77177351   665.92722512 -2383.80019164  -904.68798348]
Reward: -1  Episode Reward:  34
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   331.71025071]
------
Step:27, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   380.42019311   571.08657728]
New Q values:  [ -253.44886264 -1902.20915811   380.42019311   408.97057598]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  603.78648356  247.6698206  -252.78192178]
------
Step:28, Action:South
State  107
Old Q Values:  [-252.35169558  603.78648356  247.6698206  -252.78192178]
New Q values:  [-252.35169558  319.77325551  247.6698206  -252.78192178]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 262.86220694    0.           29.66548264 -178.98      ]
------
Step:29, Action:North
State  184
Old Q Values:  [ 162.71210215    0.         3036.32086343    0.        ]
New Q values:  [ 264.76160535    0.         3036.32086343    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -5363.03361968   667.58921495  -180.6       ]
------
Step:30, Action:East
State  107
Old Q Values:  [-252.35169558  319.77325551  247.6698206  -252.78192178]
New Q values:  [-252.35169558  319.77325551  174.61129759 -252.78192178]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   253.81123116   141.75945388]
------
Step:31, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1323.56207584 -1387.83645976]
New Q values:  [-9594.56523706 -8069.05606225   728.60299787 -1387.83645976]
Reward: -1  Episode Reward:  29
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   665.92722512 -2383.80019164  -904.68798348]
------
Step:32, Action:South
State  138
Old Q Values:  [  8.43634063 118.17184308  -0.32296531 206.43497658]
New Q values:  [  8.43634063 221.08450242  -0.32296531 206.43497658]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ -824.65731486 -5399.47678043 -8896.20691497   581.38588397]
------
Step:33, Action:North
State  218
Old Q Values:  [ 222.84520374 6862.634248      0.         1863.19740327]
New Q values:  [ 154.86343222 6862.634248      0.         1863.19740327]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 221.08450242  -0.32296531 206.43497658]
------
Step:34, Action:South
State  130
Old Q Values:  [26290.62961917  4122.28358604  -180.00807518 38692.55360318]
New Q values:  [26290.62961917  2972.59794694  -180.00807518 38692.55360318]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4414.2817084  -1838.15641278   790.72804752  1673.56197941]
------
Step:35, Action:North
State  216
Old Q Values:  [ -824.65731486 -5399.47678043 -8896.20691497   581.38588397]
New Q values:  [ -264.13757522 -5399.47678043 -8896.20691497   581.38588397]
Reward: -1  Episode Reward:  25
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 221.08450242  -0.32296531 206.43497658]
------
Step:36, Action:South
State  138
Old Q Values:  [  8.43634063 221.08450242  -0.32296531 206.43497658]
New Q values:  [  8.43634063 262.24956616  -0.32296531 206.43497658]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ -264.13757522 -5399.47678043 -8896.20691497   581.38588397]
------
Step:37, Action:North
State  216
Old Q Values:  [ -264.13757522 -5399.47678043 -8896.20691497   581.38588397]
New Q values:  [   93.52313745 -5399.47678043 -8896.20691497   581.38588397]
Reward: -1  Episode Reward:  23
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   665.92722512 -2383.80019164  -904.68798348]
------
Step:38, Action:South
State  138
Old Q Values:  [  8.43634063 262.24956616  -0.32296531 206.43497658]
New Q values:  [  8.43634063 278.71559165  -0.32296531 206.43497658]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   93.52313745 -5399.47678043 -8896.20691497   581.38588397]
------
Step:39, Action:North
State  216
Old Q Values:  [   93.52313745 -5399.47678043 -8896.20691497   581.38588397]
New Q values:  [  120.42393248 -5399.47678043 -8896.20691497   581.38588397]
Reward: -1  Episode Reward:  21
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 278.71559165  -0.32296531 206.43497658]
------
Step:40, Action:West
State  138
Old Q Values:  [  8.43634063 278.71559165  -0.32296531 206.43497658]
New Q values:  [  8.43634063 278.71559165  -0.32296531 158.11735998]
Reward: -1  Episode Reward:  20
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   253.81123116   141.75945388]
------
Step:41, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   728.60299787 -1387.83645976]
New Q values:  [-9594.56523706 -8069.05606225   490.61936669 -1387.83645976]
Reward: -1  Episode Reward:  19
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   665.92722512 -2383.80019164  -904.68798348]
------
Step:42, Action:South
State  136
Old Q Values:  [ -170.77177351   665.92722512 -2383.80019164  -904.68798348]
New Q values:  [ -170.77177351   440.18665524 -2383.80019164  -904.68798348]
Reward: -1  Episode Reward:  18
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  120.42393248 -5399.47678043 -8896.20691497   581.38588397]
------
Step:43, Action:West
State  216
Old Q Values:  [  120.42393248 -5399.47678043 -8896.20691497   581.38588397]
New Q values:  [  120.42393248 -5399.47678043 -8896.20691497  3029.38297382]
Reward: -1  Episode Reward:  17
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  9324.76206745  622.79892038 1141.49622464]
------
Step:44, Action:South
State  200
Old Q Values:  [  62.8218634  9324.76206745  622.79892038 1141.49622464]
New Q values:  [  62.8218634  7727.73605234  622.79892038 1141.49622464]
Reward: -1  Episode Reward:  16
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799 13328.10408455   467.88635351]
------
Step:45, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799 13328.10408455   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799  6111.44410947   467.88635351]
Reward: -1  Episode Reward:  15
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2602.67491883 -6442.16912869 -8192.20126966  1680.85057651]
------
Step:46, Action:North
State  288
Old Q Values:  [ 2602.67491883 -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 1949.28485968 -6442.16912869 -8192.20126966  1680.85057651]
Reward: -1  Episode Reward:  14
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  120.42393248 -5399.47678043 -8896.20691497  3029.38297382]
------
Step:47, Action:West
State  216
Old Q Values:  [  120.42393248 -5399.47678043 -8896.20691497  3029.38297382]
New Q values:  [  120.42393248 -5399.47678043 -8896.20691497  3529.47400523]
Reward: -1  Episode Reward:  13
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  7727.73605234  622.79892038 1141.49622464]
------
Step:48, Action:South
State  200
Old Q Values:  [  62.8218634  7727.73605234  622.79892038 1141.49622464]
New Q values:  [  62.8218634  4923.92765378  622.79892038 1141.49622464]
Reward: -1  Episode Reward:  12
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  6111.44410947   467.88635351]
------
Step:49, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  6111.44410947   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799  3028.76310169   467.88635351]
Reward: -1  Episode Reward:  11
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1949.28485968 -6442.16912869 -8192.20126966  1680.85057651]
------
Step:50, Action:North
State  288
Old Q Values:  [ 1949.28485968 -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 1837.95614544 -6442.16912869 -8192.20126966  1680.85057651]
Reward: -1  Episode Reward:  10
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  120.42393248 -5399.47678043 -8896.20691497  3529.47400523]
------
Step:51, Action:West
State  216
Old Q Values:  [  120.42393248 -5399.47678043 -8896.20691497  3529.47400523]
New Q values:  [  120.42393248 -5399.47678043 -8896.20691497  2888.36789823]
Reward: -1  Episode Reward:  9
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  4923.92765378  622.79892038 1141.49622464]
------
Step:52, Action:South
State  194
Old Q Values:  [-6.00000000e-01  1.32169573e+02  1.71703175e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  9.60896760e+02  1.71703175e+03  1.03161518e+03]
Reward: -1  Episode Reward:  8
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  3028.76310169   467.88635351]
------
Step:53, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  3028.76310169   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799  1762.29208431   467.88635351]
Reward: -1  Episode Reward:  7
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1837.95614544 -6442.16912869 -8192.20126966  1680.85057651]
------
Step:54, Action:North
State  288
Old Q Values:  [ 1837.95614544 -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 1601.09282764 -6442.16912869 -8192.20126966  1680.85057651]
Reward: -1  Episode Reward:  6
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  120.42393248 -5399.47678043 -8896.20691497  2888.36789823]
------
Step:55, Action:North
State  216
Old Q Values:  [  120.42393248 -5399.47678043 -8896.20691497  2888.36789823]
New Q values:  [  131.18425049 -5399.47678043 -8896.20691497  2888.36789823]
Reward: -1  Episode Reward:  5
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 278.71559165  -0.32296531 158.11735998]
------
Step:56, Action:South
State  130
Old Q Values:  [26290.62961917  2972.59794694  -180.00807518 38692.55360318]
New Q values:  [26290.62961917  2512.72369129  -180.00807518 38692.55360318]
Reward: -1  Episode Reward:  4
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4414.2817084  -1838.15641278   790.72804752  1673.56197941]
------
Step:57, Action:North
State  218
Old Q Values:  [ 154.86343222 6862.634248      0.         1863.19740327]
New Q values:  [ 144.96005039 6862.634248      0.         1863.19740327]
Reward: -1  Episode Reward:  3
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 278.71559165  -0.32296531 158.11735998]
------
Step:58, Action:South
State  130
Old Q Values:  [26290.62961917  2512.72369129  -180.00807518 38692.55360318]
New Q values:  [26290.62961917  2328.77398904  -180.00807518 38692.55360318]
Reward: -1  Episode Reward:  2
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4414.2817084  -1838.15641278   790.72804752  1673.56197941]
------
Step:59, Action:North
State  218
Old Q Values:  [ 144.96005039 6862.634248      0.         1863.19740327]
New Q values:  [ 140.99869765 6862.634248      0.         1863.19740327]
Reward: -1  Episode Reward:  1
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 278.71559165  -0.32296531 158.11735998]
------
Step:60, Action:South
State  130
Old Q Values:  [26290.62961917  2328.77398904  -180.00807518 38692.55360318]
New Q values:  [26290.62961917  2255.19410813  -180.00807518 38692.55360318]
Reward: -1  Episode Reward:  0
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4414.2817084  -1838.15641278   790.72804752  1673.56197941]
------
Step:61, Action:North
State  218
Old Q Values:  [ 140.99869765 6862.634248      0.         1863.19740327]
New Q values:  [ 139.41415656 6862.634248      0.         1863.19740327]
Reward: -1  Episode Reward:  -1
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 278.71559165  -0.32296531 158.11735998]
------
Step:62, Action:South
State  130
Old Q Values:  [26290.62961917  2255.19410813  -180.00807518 38692.55360318]
New Q values:  [26290.62961917  2225.76215577  -180.00807518 38692.55360318]
Reward: -1  Episode Reward:  -2
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4414.2817084  -1838.15641278   790.72804752  1673.56197941]
------
Step:63, Action:North
State  218
Old Q Values:  [ 139.41415656 6862.634248      0.         1863.19740327]
New Q values:  [ 138.78034012 6862.634248      0.         1863.19740327]
Reward: -1  Episode Reward:  -3
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 278.71559165  -0.32296531 158.11735998]
------
Step:64, Action:South
State  130
Old Q Values:  [26290.62961917  2225.76215577  -180.00807518 38692.55360318]
New Q values:  [26290.62961917  2213.98937483  -180.00807518 38692.55360318]
Reward: -1  Episode Reward:  -4
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4414.2817084  -1838.15641278   790.72804752  1673.56197941]
------
Step:65, Action:North
State  216
Old Q Values:  [  131.18425049 -5399.47678043 -8896.20691497  2888.36789823]
New Q values:  [  135.48837769 -5399.47678043 -8896.20691497  2888.36789823]
Reward: -1  Episode Reward:  -5
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 278.71559165  -0.32296531 158.11735998]
------
Step:66, Action:South
State  136
Old Q Values:  [ -170.77177351   440.18665524 -2383.80019164  -904.68798348]
New Q values:  [ -170.77177351  1041.98503156 -2383.80019164  -904.68798348]
Reward: -1  Episode Reward:  -6
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  135.48837769 -5399.47678043 -8896.20691497  2888.36789823]
------
Step:67, Action:West
State  216
Old Q Values:  [  135.48837769 -5399.47678043 -8896.20691497  2888.36789823]
New Q values:  [  135.48837769 -5399.47678043 -8896.20691497  2631.92545542]
Reward: -1  Episode Reward:  -7
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  4923.92765378  622.79892038 1141.49622464]
------
Step:68, Action:South
State  200
Old Q Values:  [  62.8218634  4923.92765378  622.79892038 1141.49622464]
New Q values:  [  62.8218634  2497.6586868   622.79892038 1141.49622464]
Reward: -1  Episode Reward:  -8
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1762.29208431   467.88635351]
------
Step:69, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1762.29208431   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799  1208.57200668   467.88635351]
Reward: -1  Episode Reward:  -9
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1601.09282764 -6442.16912869 -8192.20126966  1680.85057651]
------
Step:70, Action:North
State  288
Old Q Values:  [ 1601.09282764 -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 1429.41476768 -6442.16912869 -8192.20126966  1680.85057651]
Reward: -1  Episode Reward:  -10
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  135.48837769 -5399.47678043 -8896.20691497  2631.92545542]
------
Step:71, Action:North
State  216
Old Q Values:  [  135.48837769 -5399.47678043 -8896.20691497  2631.92545542]
New Q values:  [  366.19086055 -5399.47678043 -8896.20691497  2631.92545542]
Reward: -1  Episode Reward:  -11
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1041.98503156 -2383.80019164  -904.68798348]
------
Step:72, Action:South
State  136
Old Q Values:  [ -170.77177351  1041.98503156 -2383.80019164  -904.68798348]
New Q values:  [ -170.77177351  1205.77164925 -2383.80019164  -904.68798348]
Reward: -1  Episode Reward:  -12
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  366.19086055 -5399.47678043 -8896.20691497  2631.92545542]
------
Step:73, Action:West
State  216
Old Q Values:  [  366.19086055 -5399.47678043 -8896.20691497  2631.92545542]
New Q values:  [  366.19086055 -5399.47678043 -8896.20691497  1801.46778821]
Reward: -1  Episode Reward:  -13
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  2497.6586868   622.79892038 1141.49622464]
------
Step:74, Action:South
State  200
Old Q Values:  [  62.8218634  2497.6586868   622.79892038 1141.49622464]
New Q values:  [  62.8218634  1361.03507672  622.79892038 1141.49622464]
Reward: -1  Episode Reward:  -14
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799  1208.57200668   467.88635351]
------
Step:75, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799  1208.57200668   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799   987.08397563   467.88635351]
Reward: -1  Episode Reward:  -15
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1429.41476768 -6442.16912869 -8192.20126966  1680.85057651]
------
Step:76, Action:West
State  288
Old Q Values:  [ 1429.41476768 -6442.16912869 -8192.20126966  1680.85057651]
New Q values:  [ 1429.41476768 -6442.16912869 -8192.20126966   967.86542329]
Reward: -1  Episode Reward:  -16
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   987.08397563   467.88635351]
------
Step:77, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   987.08397563   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799   823.05802056   467.88635351]
Reward: -1  Episode Reward:  -17
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1429.41476768 -6442.16912869 -8192.20126966   967.86542329]
------
Step:78, Action:North
State  288
Old Q Values:  [ 1429.41476768 -6442.16912869 -8192.20126966   967.86542329]
New Q values:  [ 1111.60624354 -6442.16912869 -8192.20126966   967.86542329]
Reward: -1  Episode Reward:  -18
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  366.19086055 -5399.47678043 -8896.20691497  1801.46778821]
------
Step:79, Action:North
State  216
Old Q Values:  [  366.19086055 -5399.47678043 -8896.20691497  1801.46778821]
New Q values:  [  229.49102171 -5399.47678043 -8896.20691497  1801.46778821]
Reward: -1  Episode Reward:  -19
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 278.71559165  -0.32296531 158.11735998]
------
Step:80, Action:West
State  138
Old Q Values:  [  8.43634063 278.71559165  -0.32296531 158.11735998]
New Q values:  [  8.43634063 278.71559165  -0.32296531 138.79031334]
Reward: -1  Episode Reward:  -20
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   253.81123116   141.75945388]
------
Step:81, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   380.42019311   408.97057598]
New Q values:  [ -253.44886264 -1902.20915811   235.18275474   408.97057598]
Reward: -1  Episode Reward:  -21
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 278.71559165  -0.32296531 138.79031334]
------
Step:82, Action:West
State  138
Old Q Values:  [  8.43634063 278.71559165  -0.32296531 138.79031334]
New Q values:  [  8.43634063 278.71559165  -0.32296531 177.60729813]
Reward: -1  Episode Reward:  -22
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   235.18275474   408.97057598]
------
Step:83, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   235.18275474   408.97057598]
New Q values:  [ -253.44886264 -1902.20915811   235.18275474   258.92020704]
Reward: -1  Episode Reward:  -23
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  319.77325551  174.61129759 -252.78192178]
------
Step:84, Action:South
State  105
Old Q Values:  [-180.6         841.34795246   86.99637671    0.        ]
New Q values:  [-180.6         414.79784307   86.99637671    0.        ]
Reward: -1  Episode Reward:  -24
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 262.86220694    0.           29.66548264 -178.98      ]
------
Step:85, Action:North
State  185
Old Q Values:  [ 262.86220694    0.           29.66548264 -178.98      ]
New Q values:  [ 200.47685943    0.           29.66548264 -178.98      ]
Reward: -1  Episode Reward:  -25
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  319.77325551  174.61129759 -252.78192178]
------
Step:86, Action:South
State  107
Old Q Values:  [-252.35169558  319.77325551  174.61129759 -252.78192178]
New Q values:  [-252.35169558  975.68173106  174.61129759 -252.78192178]
Reward: -1  Episode Reward:  -26
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         2827.90809619    0.        ]
------
Step:87, Action:East
State  185
Old Q Values:  [ 200.47685943    0.           29.66548264 -178.98      ]
New Q values:  [ 200.47685943    0.          374.62683656 -178.98      ]
Reward: -1  Episode Reward:  -27
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.21120214e+03 -5.74006972e+03  2.00341972e+02]
------
Step:88, Action:South
State  203
Old Q Values:  [3.60604218e+00 1.22476467e+04 3.62058893e+03 9.06816004e+03]
New Q values:  [3.60604218e+00 5.44407350e+03 3.62058893e+03 9.06816004e+03]
Reward: -1  Episode Reward:  -28
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 677.52857079 -168.92307549 1818.7161076   677.18474264]
------
Step:89, Action:North
State  273
Old Q Values:  [ 677.52857079 -168.92307549 1818.7161076   677.18474264]
New Q values:  [ 633.77207181 -168.92307549 1818.7161076   677.18474264]
Reward: -1  Episode Reward:  -29
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.21120214e+03 -5.74006972e+03  2.00341972e+02]
------
Step:90, Action:South
State  200
Old Q Values:  [  62.8218634  1361.03507672  622.79892038 1141.49622464]
New Q values:  [  62.8218634   790.73143686  622.79892038 1141.49622464]
Reward: -1  Episode Reward:  -30
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   823.05802056   467.88635351]
------
Step:91, Action:East
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   823.05802056   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799   662.10508128   467.88635351]
Reward: -1  Episode Reward:  -31
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1111.60624354 -6442.16912869 -8192.20126966   967.86542329]
------
Step:92, Action:North
State  288
Old Q Values:  [ 1111.60624354 -6442.16912869 -8192.20126966   967.86542329]
New Q values:  [  984.48283388 -6442.16912869 -8192.20126966   967.86542329]
Reward: -1  Episode Reward:  -32
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  229.49102171 -5399.47678043 -8896.20691497  1801.46778821]
------
Step:93, Action:West
State  216
Old Q Values:  [  229.49102171 -5399.47678043 -8896.20691497  1801.46778821]
New Q values:  [  229.49102171 -5399.47678043 -8896.20691497  1083.34775878]
Reward: -1  Episode Reward:  -33
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.21120214e+03 -5.74006972e+03  2.00341972e+02]
------
Step:94, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  1.21120214e+03 -5.74006972e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  1.02949569e+03 -5.74006972e+03  2.00341972e+02]
Reward: -1  Episode Reward:  -34
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 1818.7161076   677.18474264]
------
Step:95, Action:East
State  273
Old Q Values:  [ 633.77207181 -168.92307549 1818.7161076   677.18474264]
New Q values:  [ 633.77207181 -168.92307549 1022.2312932   677.18474264]
Reward: -1  Episode Reward:  -35
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  984.48283388 -6442.16912869 -8192.20126966   967.86542329]
------
Step:96, Action:West
State  288
Old Q Values:  [  984.48283388 -6442.16912869 -8192.20126966   967.86542329]
New Q values:  [  984.48283388 -6442.16912869 -8192.20126966   693.21555728]
Reward: -1  Episode Reward:  -36
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 1022.2312932   677.18474264]
------
Step:97, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   662.10508128   467.88635351]
New Q values:  [-2527.46239811 -8521.23367799   662.10508128 75947.69225841]
Reward: 100009  Episode Reward:  99973
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6778.32220163  3591.79939078 -4584.50430574  1067.63960005]
------
Step:1, Action:North
State  216
Old Q Values:  [  229.49102171 -5399.47678043 -8896.20691497  1083.34775878]
New Q values:  [  180.81108618 -5399.47678043 -8896.20691497  1083.34775878]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 278.71559165  -0.32296531 177.60729813]
------
Step:2, Action:South
State  136
Old Q Values:  [ -170.77177351  1205.77164925 -2383.80019164  -904.68798348]
New Q values:  [ -170.77177351   806.71298734 -2383.80019164  -904.68798348]
Reward: -1  Episode Reward:  8
xxxxx
xg. x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  180.81108618 -5399.47678043 -8896.20691497  1083.34775878]
------
Step:3, Action:West
State  216
Old Q Values:  [  180.81108618 -5399.47678043 -8896.20691497  1083.34775878]
New Q values:  [  180.81108618 -5399.47678043 -8896.20691497  4262.94647612]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.45197447e+03 8.30532462e+03 1.27673579e+04]
------
Step:4, Action:West
State  193
Old Q Values:  [-5922.26708831  1037.22774609   635.35015003   534.04109446]
New Q values:  [-5922.26708831  1037.22774609   635.35015003  1488.99185838]
Reward: 9  Episode Reward:  16
xxxxx
x..gx
xa  x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 2886.59923314 4233.251402    154.04646645]
------
Step:5, Action:East
State  189
Old Q Values:  [ 275.08817949 2886.59923314 4233.251402    154.04646645]
New Q values:  [ 275.08817949 2886.59923314 2139.39811831  154.04646645]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1037.22774609   635.35015003  1488.99185838]
------
Step:6, Action:West
State  193
Old Q Values:  [-5922.26708831  1037.22774609   635.35015003  1488.99185838]
New Q values:  [-5922.26708831  1037.22774609   635.35015003  1460.9765133 ]
Reward: -1  Episode Reward:  14
xxxxx
x..gx
xa  x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 2886.59923314 2139.39811831  154.04646645]
------
Step:7, Action:South
State  189
Old Q Values:  [ 275.08817949 2886.59923314 2139.39811831  154.04646645]
New Q values:  [  275.08817949 11056.4778761   2139.39811831   154.04646645]
Reward: 9  Episode Reward:  23
xxxxx
x.. x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3.29881273e+04  2.67354425e+01 -1.42581888e+02  1.23621437e+02]
------
Step:8, Action:North
State  261
Old Q Values:  [ 3.29881273e+04  2.67354425e+01 -1.42581888e+02  1.23621437e+02]
New Q values:  [16511.59427329    26.73544252  -142.5818878    123.6214372 ]
Reward: -1  Episode Reward:  22
xxxxx
x..gx
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  275.08817949 11056.4778761   2139.39811831   154.04646645]
------
Step:9, Action:South
State  189
Old Q Values:  [  275.08817949 11056.4778761   2139.39811831   154.04646645]
New Q values:  [ 275.08817949 9375.46943243 2139.39811831  154.04646645]
Reward: -1  Episode Reward:  21
xxxxx
x..gx
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[16511.59427329    26.73544252  -142.5818878    123.6214372 ]
------
Step:10, Action:North
State  261
Old Q Values:  [16511.59427329    26.73544252  -142.5818878    123.6214372 ]
New Q values:  [9416.67853904   26.73544252 -142.5818878   123.6214372 ]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 9375.46943243 2139.39811831  154.04646645]
------
Step:11, Action:South
State  189
Old Q Values:  [ 275.08817949 9375.46943243 2139.39811831  154.04646645]
New Q values:  [ 275.08817949 6574.59133468 2139.39811831  154.04646645]
Reward: -1  Episode Reward:  19
xxxxx
x..gx
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[9416.67853904   26.73544252 -142.5818878   123.6214372 ]
------
Step:12, Action:North
State  261
Old Q Values:  [9416.67853904   26.73544252 -142.5818878   123.6214372 ]
New Q values:  [5738.44881602   26.73544252 -142.5818878   123.6214372 ]
Reward: -1  Episode Reward:  18
xxxxx
x.g x
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 6574.59133468 2139.39811831  154.04646645]
------
Step:13, Action:South
State  189
Old Q Values:  [ 275.08817949 6574.59133468 2139.39811831  154.04646645]
New Q values:  [ 275.08817949 4350.77117868 2139.39811831  154.04646645]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5738.44881602   26.73544252 -142.5818878   123.6214372 ]
------
Step:14, Action:North
State  261
Old Q Values:  [5738.44881602   26.73544252 -142.5818878   123.6214372 ]
New Q values:  [3600.01088001   26.73544252 -142.5818878   123.6214372 ]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 4350.77117868 2139.39811831  154.04646645]
------
Step:15, Action:South
State  188
Old Q Values:  [-6523.78898263  1776.31889064  1470.30928837     0.        ]
New Q values:  [-6523.78898263   935.74403408  1470.30928837     0.        ]
Reward: -1  Episode Reward:  15
xxxxx
xg. x
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  752.72159273 -2735.46306511   652.56850251 -5679.36893145]
------
Step:16, Action:North
State  260
Old Q Values:  [  752.72159273 -2735.46306511   652.56850251 -5679.36893145]
New Q values:  [-5258.4185764  -2735.46306511   652.56850251 -5679.36893145]
Reward: -10001  Episode Reward:  -9986
xxxxx
x.. x
xg  x
x ..x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1050.56650176  982.40572119 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 1001.32756123  238.35800069    0.        ]
New Q values:  [221.30610858 797.26081698 238.35800069   0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 666.89760138  556.93113857 1304.43264163  262.76946019]
------
Step:2, Action:East
State  183
Old Q Values:  [ 530.87108315  470.27670432 6660.14329163  358.5166536 ]
New Q values:  [ 530.87108315  470.27670432 3630.6967969   358.5166536 ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  14.86214194  479.07551978 3204.13160084 1915.70494401]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  9.60896760e+02  1.71703175e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  9.60896760e+02  2.01649721e+03  1.03161518e+03]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4414.2817084  -1838.15641278   790.72804752  1673.56197941]
------
Step:4, Action:North
State  210
Old Q Values:  [ 4414.2817084  -1838.15641278   790.72804752  1673.56197941]
New Q values:  [13378.87876431 -1838.15641278   790.72804752  1673.56197941]
Reward: 9  Episode Reward:  36
xxxxx
x .ax
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  2213.98937483  -180.00807518 38692.55360318]
------
Step:5, Action:West
State  138
Old Q Values:  [  8.43634063 278.71559165  -0.32296531 177.60729813]
New Q values:  [  8.43634063 278.71559165  -0.32296531 152.5862886 ]
Reward: 9  Episode Reward:  45
xxxxx
x a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   253.81123116   141.75945388]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   253.81123116   141.75945388]
New Q values:  [ -281.736      -1150.91067548   184.53916996   141.75945388]
Reward: -1  Episode Reward:  44
xxxxx
x  ax
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 278.71559165  -0.32296531 152.5862886 ]
------
Step:7, Action:South
State  138
Old Q Values:  [  8.43634063 278.71559165  -0.32296531 152.5862886 ]
New Q values:  [ 8.43634063e+00  1.38977018e+03 -3.22965309e-01  1.52586289e+02]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  180.81108618 -5399.47678043 -8896.20691497  4262.94647612]
------
Step:8, Action:West
State  216
Old Q Values:  [  180.81108618 -5399.47678043 -8896.20691497  4262.94647612]
New Q values:  [  180.81108618 -5399.47678043 -8896.20691497  2047.02745784]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634   790.73143686  622.79892038 1141.49622464]
------
Step:9, Action:West
State  200
Old Q Values:  [  62.8218634   790.73143686  622.79892038 1141.49622464]
New Q values:  [ 62.8218634  790.73143686 622.79892038 568.38654082]
Reward: -1  Episode Reward:  41
xxxxx
x g x
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 200.47685943    0.          374.62683656 -178.98      ]
------
Step:10, Action:East
State  184
Old Q Values:  [ 264.76160535    0.         3036.32086343    0.        ]
New Q values:  [ 264.76160535    0.         1451.14777643    0.        ]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 62.8218634  790.73143686 622.79892038 568.38654082]
------
Step:11, Action:South
State  200
Old Q Values:  [ 62.8218634  790.73143686 622.79892038 568.38654082]
New Q values:  [   62.8218634  23106.00025226   622.79892038   568.38654082]
Reward: 9  Episode Reward:  49
xxxxx
x   x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[-2527.46239811 -8521.23367799   662.10508128 75947.69225841]
------
Step:12, Action:West
State  272
Old Q Values:  [-2527.46239811 -8521.23367799   662.10508128 75947.69225841]
New Q values:  [ -2527.46239811  -8521.23367799    662.10508128 100538.04874336]
Reward: 100009  Episode Reward:  100058
xxxxx
xg  x
x   x
xa  x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -2527.46239811  -8521.23367799    662.10508128 100538.04874336]
------
Step:1, Action:West
State  276
Old Q Values:  [   16.82637525 -5807.06396197   855.18004824   707.34502671]
New Q values:  [   16.82637525 -5807.06396197   855.18004824   484.10856144]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5258.4185764  -2735.46306511   652.56850251 -5679.36893145]
------
Step:2, Action:East
State  260
Old Q Values:  [-5258.4185764  -2735.46306511   652.56850251 -5679.36893145]
New Q values:  [-5258.4185764  -2735.46306511 30421.84202401 -5679.36893145]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -2527.46239811  -8521.23367799    662.10508128 100538.04874336]
------
Step:3, Action:East
State  272
Old Q Values:  [ -2527.46239811  -8521.23367799    662.10508128 100538.04874336]
New Q values:  [ -2527.46239811  -8521.23367799    565.58688268 100538.04874336]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  984.48283388 -6442.16912869 -8192.20126966   693.21555728]
------
Step:4, Action:North
State  288
Old Q Values:  [  984.48283388 -6442.16912869 -8192.20126966   693.21555728]
New Q values:  [ 4412.85676285 -6442.16912869 -8192.20126966   693.21555728]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13378.87876431 -1838.15641278   790.72804752  1673.56197941]
------
Step:5, Action:North
State  210
Old Q Values:  [13378.87876431 -1838.15641278   790.72804752  1673.56197941]
New Q values:  [ 5773.88255957 -1838.15641278   790.72804752  1673.56197941]
Reward: 9  Episode Reward:  35
xxxxx
x .ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.38977018e+03 -3.22965309e-01  1.52586289e+02]
------
Step:6, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.38977018e+03 -3.22965309e-01  1.52586289e+02]
New Q values:  [ 8.43634063e+00  2.28747284e+03 -3.22965309e-01  1.52586289e+02]
Reward: -1  Episode Reward:  34
xxxxx
x . x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5773.88255957 -1838.15641278   790.72804752  1673.56197941]
------
Step:7, Action:North
State  208
Old Q Values:  [ 6778.32220163  3591.79939078 -4584.50430574  1067.63960005]
New Q values:  [14318.49496161  3591.79939078 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  33
xxxxx
x .ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  2213.98937483  -180.00807518 38692.55360318]
------
Step:8, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  2.28747284e+03 -3.22965309e-01  1.52586289e+02]
New Q values:  [ 8.43634063e+00  2.28747284e+03 -3.22965309e-01  1.21796266e+02]
Reward: 9  Episode Reward:  42
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.53916996   141.75945388]
------
Step:9, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   235.18275474   258.92020704]
New Q values:  [ -253.44886264 -1902.20915811   779.7149538    258.92020704]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.28747284e+03 -3.22965309e-01  1.21796266e+02]
------
Step:10, Action:West
State  136
Old Q Values:  [ -170.77177351   806.71298734 -2383.80019164  -904.68798348]
New Q values:  [ -170.77177351   806.71298734 -2383.80019164  -262.96211818]
Reward: -1  Episode Reward:  40
xxxxx
x agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   331.71025071]
------
Step:11, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   779.7149538    258.92020704]
New Q values:  [ -253.44886264 -1902.20915811   779.7149538    395.67260214]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  975.68173106  174.61129759 -252.78192178]
------
Step:12, Action:South
State  109
Old Q Values:  [ -241.10880094  2305.7685536  -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1319.03721393 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  48
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 666.89760138  556.93113857 1304.43264163  262.76946019]
------
Step:13, Action:East
State  177
Old Q Values:  [109163.23336057  23344.73803738  10235.04946499      0.        ]
New Q values:  [109163.23336057  23344.73803738  64537.71273998      0.        ]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ -2527.46239811  -8521.23367799    565.58688268 100538.04874336]
------
Step:1, Action:East
State  272
Old Q Values:  [ -2527.46239811  -8521.23367799    565.58688268 100538.04874336]
New Q values:  [ -2527.46239811  -8521.23367799   1555.49178192 100538.04874336]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4412.85676285 -6442.16912869 -8192.20126966   693.21555728]
------
Step:2, Action:North
State  288
Old Q Values:  [ 4412.85676285 -6442.16912869 -8192.20126966   693.21555728]
New Q values:  [ 3502.70747301 -6442.16912869 -8192.20126966   693.21555728]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5773.88255957 -1838.15641278   790.72804752  1673.56197941]
------
Step:3, Action:North
State  208
Old Q Values:  [14318.49496161  3591.79939078 -4584.50430574  1067.63960005]
New Q values:  [17340.5640656   3591.79939078 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  2213.98937483  -180.00807518 38692.55360318]
------
Step:4, Action:West
State  136
Old Q Values:  [ -170.77177351   806.71298734 -2383.80019164  -262.96211818]
New Q values:  [ -170.77177351   806.71298734 -2383.80019164    47.40096273]
Reward: 9  Episode Reward:  36
xxxxx
xga x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   490.61936669 -1387.83645976]
------
Step:5, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   490.61936669 -1387.83645976]
New Q values:  [-9594.56523706 -8069.05606225   437.66164288 -1387.83645976]
Reward: -1  Episode Reward:  35
xxxxx
x.gax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   806.71298734 -2383.80019164    47.40096273]
------
Step:6, Action:South
State  130
Old Q Values:  [26290.62961917  2213.98937483  -180.00807518 38692.55360318]
New Q values:  [26290.62961917  6087.16496961  -180.00807518 38692.55360318]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17340.5640656   3591.79939078 -4584.50430574  1067.63960005]
------
Step:7, Action:North
State  208
Old Q Values:  [17340.5640656   3591.79939078 -4584.50430574  1067.63960005]
New Q values:  [ 7621.86747814  3591.79939078 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  33
xxxxx
x. ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.28747284e+03 -3.22965309e-01  1.21796266e+02]
------
Step:8, Action:West
State  136
Old Q Values:  [ -170.77177351   806.71298734 -2383.80019164    47.40096273]
New Q values:  [ -170.77177351   806.71298734 -2383.80019164   117.87346031]
Reward: -1  Episode Reward:  32
xxxxx
x.agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   331.71025071]
------
Step:9, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   437.66164288 -1387.83645976]
New Q values:  [-9594.56523706 -8069.05606225   437.66164288  -425.29523099]
Reward: 9  Episode Reward:  41
xxxxx
xag x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         414.79784307   86.99637671    0.        ]
------
Step:10, Action:South
State  109
Old Q Values:  [ -241.10880094  1319.03721393 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   924.34467806 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  50
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 666.89760138  556.93113857 1304.43264163  262.76946019]
------
Step:11, Action:East
State  177
Old Q Values:  [109163.23336057  23344.73803738  64537.71273998      0.        ]
New Q values:  [109163.23336057  23344.73803738  86258.77804998      0.        ]
Reward: 100009  Episode Reward:  100059
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 530.87108315  470.27670432 3630.6967969   358.5166536 ]
------
Step:1, Action:East
State  182
Old Q Values:  [    0.         -7507.54632711  1681.77874037     0.        ]
New Q values:  [    0.         -7507.54632711  1283.06065933     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.60896760e+02  2.01649721e+03  1.03161518e+03]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.45197447e+03 8.30532462e+03 1.27673579e+04]
New Q values:  [3.89777037e-01 1.45197447e+03 5.61409009e+03 1.27673579e+04]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7621.86747814  3591.79939078 -4584.50430574  1067.63960005]
------
Step:3, Action:North
State  208
Old Q Values:  [ 7621.86747814  3591.79939078 -4584.50430574  1067.63960005]
New Q values:  [14661.91307221  3591.79939078 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  6087.16496961  -180.00807518 38692.55360318]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  2.28747284e+03 -3.22965309e-01  1.21796266e+02]
New Q values:  [ 8.43634063e+00  2.28747284e+03 -3.22965309e-01  2.07869228e+04]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18108.15151622 69109.34753192]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.53916996   141.75945388]
New Q values:  [ -281.736      -1150.91067548   184.53916996   354.80830087]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  975.68173106  174.61129759 -252.78192178]
------
Step:6, Action:South
State  106
Old Q Values:  [ -180.6        -5363.03361968   667.58921495  -180.6       ]
New Q values:  [ -180.6        -7710.46911494   667.58921495  -180.6       ]
Reward: -10001  Episode Reward:  -9956
xxxxx
x   x
xg  x
x. .x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3502.70747301 -6442.16912869 -8192.20126966   693.21555728]
------
Step:1, Action:North
State  288
Old Q Values:  [ 3502.70747301 -6442.16912869 -8192.20126966   693.21555728]
New Q values:  [ -194.94308913 -6442.16912869 -8192.20126966   693.21555728]
Reward: -9991  Episode Reward:  -9991
xxxxx
x.. x
x..gx
x.. x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5773.88255957 -1838.15641278   790.72804752  1673.56197941]
------
Step:1, Action:North
State  208
Old Q Values:  [14661.91307221  3591.79939078 -4584.50430574  1067.63960005]
New Q values:  [12106.24205873  3591.79939078 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.28747284e+03 -3.22965309e-01  2.07869228e+04]
------
Step:2, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  2.28747284e+03 -3.22965309e-01  2.07869228e+04]
New Q values:  [ 8.43634063e+00  2.28747284e+03 -3.22965309e-01  8.42661160e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.53916996   354.80830087]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.53916996   354.80830087]
New Q values:  [ -281.736      -1150.91067548   184.53916996   440.02783966]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  975.68173106  174.61129759 -252.78192178]
------
Step:4, Action:South
State  110
Old Q Values:  [-239.29051573  823.56832906  673.34667403 -180.6       ]
New Q values:  [-239.29051573  719.74552942  673.34667403 -180.6       ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xa. x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -7507.54632711  1283.06065933     0.        ]
------
Step:5, Action:East
State  184
Old Q Values:  [ 264.76160535    0.         1451.14777643    0.        ]
New Q values:  [ 264.76160535    0.         7517.65918625    0.        ]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xga x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  23106.00025226   622.79892038   568.38654082]
------
Step:6, Action:South
State  200
Old Q Values:  [   62.8218634  23106.00025226   622.79892038   568.38654082]
New Q values:  [   62.8218634  39409.21472391   622.79892038   568.38654082]
Reward: 9  Episode Reward:  54
xxxxx
xg  x
x   x
x.a x
xxxxx
Step:7, Action:North
State  272
Old Q Values:  [ -2527.46239811  -8521.23367799   1555.49178192 100538.04874336]
New Q values:  [ 10811.17945793  -8521.23367799   1555.49178192 100538.04874336]
Reward: -1  Episode Reward:  53
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  39409.21472391   622.79892038   568.38654082]
------
Step:8, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  1.02949569e+03 -5.74006972e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  7.17867664e+02 -5.74006972e+03  2.00341972e+02]
Reward: -1  Episode Reward:  52
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 1022.2312932   677.18474264]
------
Step:9, Action:East
State  272
Old Q Values:  [ 10811.17945793  -8521.23367799   1555.49178192 100538.04874336]
New Q values:  [ 10811.17945793  -8521.23367799    829.56137995 100538.04874336]
Reward: -1  Episode Reward:  51
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -194.94308913 -6442.16912869 -8192.20126966   693.21555728]
------
Step:10, Action:West
State  288
Old Q Values:  [ -194.94308913 -6442.16912869 -8192.20126966   693.21555728]
New Q values:  [ -194.94308913 -6442.16912869 -8192.20126966   583.35561087]
Reward: -1  Episode Reward:  50
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 1022.2312932   677.18474264]
------
Step:11, Action:East
State  272
Old Q Values:  [ 10811.17945793  -8521.23367799    829.56137995 100538.04874336]
New Q values:  [ 10811.17945793  -8521.23367799    506.23123524 100538.04874336]
Reward: -1  Episode Reward:  49
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -194.94308913 -6442.16912869 -8192.20126966   583.35561087]
------
Step:12, Action:West
State  288
Old Q Values:  [ -194.94308913 -6442.16912869 -8192.20126966   583.35561087]
New Q values:  [ -194.94308913 -6442.16912869 -8192.20126966 30394.15686736]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 10811.17945793  -8521.23367799    506.23123524 100538.04874336]
------
Step:13, Action:West
State  272
Old Q Values:  [ 10811.17945793  -8521.23367799    506.23123524 100538.04874336]
New Q values:  [ 10811.17945793  -8521.23367799    506.23123524 115975.75721434]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   806.71298734 -2383.80019164   117.87346031]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.28747284e+03 -3.22965309e-01  8.42661160e+03]
New Q values:  [ 8.43634063e+00  1.53449737e+03 -3.22965309e-01  8.42661160e+03]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  180.81108618 -5399.47678043 -8896.20691497  2047.02745784]
------
Step:2, Action:West
State  210
Old Q Values:  [ 5773.88255957 -1838.15641278   790.72804752  1673.56197941]
New Q values:  [ 5773.88255957 -1838.15641278   790.72804752  1279.77395494]
Reward: 9  Episode Reward:  18
xxxxx
x . x
x.a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.60896760e+02  2.01649721e+03  1.03161518e+03]
------
Step:3, Action:East
State  200
Old Q Values:  [   62.8218634  39409.21472391   622.79892038   568.38654082]
New Q values:  [   62.8218634  39409.21472391   862.62780551   568.38654082]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  180.81108618 -5399.47678043 -8896.20691497  2047.02745784]
------
Step:4, Action:West
State  210
Old Q Values:  [ 5773.88255957 -1838.15641278   790.72804752  1279.77395494]
New Q values:  [ 5773.88255957 -1838.15641278   790.72804752  1116.25874516]
Reward: -1  Episode Reward:  16
xxxxx
x . x
x.a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.60896760e+02  2.01649721e+03  1.03161518e+03]
------
Step:5, Action:East
State  200
Old Q Values:  [   62.8218634  39409.21472391   862.62780551   568.38654082]
New Q values:  [   62.8218634  39409.21472391   958.55935955   568.38654082]
Reward: -1  Episode Reward:  15
xxxxx
x . x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  180.81108618 -5399.47678043 -8896.20691497  2047.02745784]
------
Step:6, Action:West
State  210
Old Q Values:  [ 5773.88255957 -1838.15641278   790.72804752  1116.25874516]
New Q values:  [ 5773.88255957 -1838.15641278   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  14
xxxxx
x . x
x.a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.60896760e+02  2.01649721e+03  1.03161518e+03]
------
Step:7, Action:East
State  200
Old Q Values:  [   62.8218634  39409.21472391   958.55935955   568.38654082]
New Q values:  [   62.8218634  39409.21472391   996.93198117   568.38654082]
Reward: -1  Episode Reward:  13
xxxxx
x . x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  180.81108618 -5399.47678043 -8896.20691497  2047.02745784]
------
Step:8, Action:West
State  216
Old Q Values:  [  180.81108618 -5399.47678043 -8896.20691497  2047.02745784]
New Q values:  [  180.81108618 -5399.47678043 -8896.20691497 -1351.58164425]
Reward: -10001  Episode Reward:  -9988
xxxxx
x . x
x.g x
x...x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   331.71025071]
------
Step:1, Action:West
State  121
Old Q Values:  [    0.             0.         -8255.55694753   331.71025071]
New Q values:  [    0.             0.         -8255.55694753   415.3875037 ]
Reward: 9  Episode Reward:  9
xxxxx
xa gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   924.34467806 -2165.66138672  -180.6       ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 1050.56650176  982.40572119 -120.29354603]
New Q values:  [-177.44732869 1730.85795431  982.40572119 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x   x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 4350.77117868 2139.39811831  154.04646645]
------
Step:3, Action:South
State  189
Old Q Values:  [ 275.08817949 4350.77117868 2139.39811831  154.04646645]
New Q values:  [ 275.08817949 2825.71173548 2139.39811831  154.04646645]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3600.01088001   26.73544252 -142.5818878   123.6214372 ]
------
Step:4, Action:North
State  261
Old Q Values:  [3600.01088001   26.73544252 -142.5818878   123.6214372 ]
New Q values:  [2528.61339108   26.73544252 -142.5818878   123.6214372 ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 530.87108315  470.27670432 3630.6967969   358.5166536 ]
------
Step:5, Action:East
State  189
Old Q Values:  [ 275.08817949 2825.71173548 2139.39811831  154.04646645]
New Q values:  [ 275.08817949 2825.71173548 1670.6531179   154.04646645]
Reward: -9991  Episode Reward:  -9965
xxxxx
x   x
x g.x
x ..x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:1, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   437.66164288  -425.29523099]
New Q values:  [-9594.56523706 -8069.05606225   437.66164288   112.58531102]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   924.34467806 -2165.66138672  -180.6       ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 1730.85795431  982.40572119 -120.29354603]
New Q values:  [-177.44732869 1545.45670237  982.40572119 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 2825.71173548 1670.6531179   154.04646645]
------
Step:3, Action:South
State  189
Old Q Values:  [ 275.08817949 2825.71173548 1670.6531179   154.04646645]
New Q values:  [ 275.08817949 1894.26871151 1670.6531179   154.04646645]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2528.61339108   26.73544252 -142.5818878   123.6214372 ]
------
Step:4, Action:North
State  261
Old Q Values:  [2528.61339108   26.73544252 -142.5818878   123.6214372 ]
New Q values:  [1579.12596988   26.73544252 -142.5818878   123.6214372 ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1894.26871151 1670.6531179   154.04646645]
------
Step:5, Action:South
State  183
Old Q Values:  [ 530.87108315  470.27670432 3630.6967969   358.5166536 ]
New Q values:  [ 530.87108315  661.24847269 3630.6967969   358.5166536 ]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1579.12596988   26.73544252 -142.5818878   123.6214372 ]
------
Step:6, Action:North
State  261
Old Q Values:  [1579.12596988   26.73544252 -142.5818878   123.6214372 ]
New Q values:  [1199.33100141   26.73544252 -142.5818878   123.6214372 ]
Reward: -1  Episode Reward:  24
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1894.26871151 1670.6531179   154.04646645]
------
Step:7, Action:South
State  180
Old Q Values:  [ -820.10444135  1010.53711994  6743.532972   -4966.32149798]
New Q values:  [ -820.10444135  9530.16745518  6743.532972   -4966.32149798]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5258.4185764  -2735.46306511 30421.84202401 -5679.36893145]
------
Step:8, Action:East
State  260
Old Q Values:  [-5258.4185764  -2735.46306511 30421.84202401 -5679.36893145]
New Q values:  [-5258.4185764  -2735.46306511 12430.69082408 -5679.36893145]
Reward: 9  Episode Reward:  32
xxxxx
xg  x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824   484.10856144]
------
Step:9, Action:East
State  272
Old Q Values:  [ 10811.17945793  -8521.23367799    506.23123524 115975.75721434]
New Q values:  [ 10811.17945793  -8521.23367799   9326.1395543  115975.75721434]
Reward: 9  Episode Reward:  41
xxxxx
x g x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -194.94308913 -6442.16912869 -8192.20126966 30394.15686736]
------
Step:10, Action:West
State  288
Old Q Values:  [ -194.94308913 -6442.16912869 -8192.20126966 30394.15686736]
New Q values:  [ -194.94308913 -6442.16912869 -8192.20126966 12413.61676141]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824   484.10856144]
------
Step:11, Action:East
State  273
Old Q Values:  [ 633.77207181 -168.92307549 1022.2312932   677.18474264]
New Q values:  [ 633.77207181 -168.92307549 4132.37754571  677.18474264]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -194.94308913 -6442.16912869 -8192.20126966 12413.61676141]
------
Step:12, Action:West
State  288
Old Q Values:  [ -194.94308913 -6442.16912869 -8192.20126966 12413.61676141]
New Q values:  [ -194.94308913 -6442.16912869 -8192.20126966  6204.55996828]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x ..x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 4132.37754571  677.18474264]
------
Step:13, Action:West
State  272
Old Q Values:  [ 10811.17945793  -8521.23367799   9326.1395543  115975.75721434]
New Q values:  [10811.17945793 -8521.23367799  9326.1395543  46749.50218616]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x ..x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1199.33100141   26.73544252 -142.5818878   123.6214372 ]
------
Step:14, Action:North
State  261
Old Q Values:  [1199.33100141   26.73544252 -142.5818878   123.6214372 ]
New Q values:  [ 870.46219305   26.73544252 -142.5818878   123.6214372 ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 666.89760138  556.93113857 1304.43264163  262.76946019]
------
Step:15, Action:North
State  183
Old Q Values:  [ 530.87108315  661.24847269 3630.6967969   358.5166536 ]
New Q values:  [ 675.38544397  661.24847269 3630.6967969   358.5166536 ]
Reward: -1  Episode Reward:  35
xxxxx
xa  x
x ..x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1545.45670237  982.40572119 -120.29354603]
------
Step:16, Action:South
State  110
Old Q Values:  [-239.29051573  719.74552942  673.34667403 -180.6       ]
New Q values:  [-239.29051573  672.21640957  673.34667403 -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xa..x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -7507.54632711  1283.06065933     0.        ]
------
Step:17, Action:East
State  180
Old Q Values:  [ -820.10444135  9530.16745518  6743.532972   -4966.32149798]
New Q values:  [ -820.10444135  9530.16745518  9512.30705937 -4966.32149798]
Reward: 9  Episode Reward:  43
xxxxx
x   x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   946.49196853 22698.31290191   492.34934406]
------
Step:18, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.45197447e+03 5.61409009e+03 1.27673579e+04]
New Q values:  [3.89777037e-01 1.45197447e+03 6.58829087e+04 1.27673579e+04]
Reward: 100009  Episode Reward:  100052
xxxxx
xg  x
x  ax
x   x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -194.94308913 -6442.16912869 -8192.20126966  6204.55996828]
------
Step:1, Action:West
State  288
Old Q Values:  [ -194.94308913 -6442.16912869 -8192.20126966  6204.55996828]
New Q values:  [ -194.94308913 -6442.16912869 -8192.20126966 16512.07464316]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10811.17945793 -8521.23367799  9326.1395543  46749.50218616]
------
Step:2, Action:West
State  272
Old Q Values:  [10811.17945793 -8521.23367799  9326.1395543  46749.50218616]
New Q values:  [10811.17945793 -8521.23367799  9326.1395543  18966.33953238]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 870.46219305   26.73544252 -142.5818878   123.6214372 ]
------
Step:3, Action:North
State  261
Old Q Values:  [ 870.46219305   26.73544252 -142.5818878   123.6214372 ]
New Q values:  [ 744.91466971   26.73544252 -142.5818878   123.6214372 ]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 666.89760138  556.93113857 1304.43264163  262.76946019]
------
Step:4, Action:North
State  181
Old Q Values:  [ 666.89760138  556.93113857 1304.43264163  262.76946019]
New Q values:  [ 549.46244397  556.93113857 1304.43264163  262.76946019]
Reward: 9  Episode Reward:  36
xxxxx
xag.x
x ..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   924.34467806 -2165.66138672  -180.6       ]
------
Step:5, Action:South
State  108
Old Q Values:  [-8463.16477134  2259.8349134    845.00690416     0.        ]
New Q values:  [-8463.16477134  3762.38420191   845.00690416     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
xg .x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -820.10444135  9530.16745518  9512.30705937 -4966.32149798]
------
Step:6, Action:South
State  181
Old Q Values:  [ 549.46244397  556.93113857 1304.43264163  262.76946019]
New Q values:  [ 549.46244397  445.64685634 1304.43264163  262.76946019]
Reward: -1  Episode Reward:  34
xxxxx
x g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 744.91466971   26.73544252 -142.5818878   123.6214372 ]
------
Step:7, Action:North
State  261
Old Q Values:  [ 744.91466971   26.73544252 -142.5818878   123.6214372 ]
New Q values:  [ 688.69566037   26.73544252 -142.5818878   123.6214372 ]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 549.46244397  445.64685634 1304.43264163  262.76946019]
------
Step:8, Action:East
State  181
Old Q Values:  [ 549.46244397  445.64685634 1304.43264163  262.76946019]
New Q values:  [549.46244397 445.64685634 693.93176586 262.76946019]
Reward: 9  Episode Reward:  42
xxxxx
x  .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[ 1.32443385e-01  5.55862364e+02 -4.51080211e+03  4.03062559e+02]
------
Step:9, Action:South
State  196
Old Q Values:  [-2469.90645144   946.49196853 22698.31290191   492.34934406]
New Q values:  [-2469.90645144   634.55080188 22698.31290191   492.34934406]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824   484.10856144]
------
Step:10, Action:East
State  277
Old Q Values:  [  1.64433      0.         -29.77444073 451.39143215]
New Q values:  [1.64433000e+00 0.00000000e+00 4.94111262e+03 4.51391432e+02]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
x  gx
x  ax
xxxxx
Step:11, Action:West
State  288
Old Q Values:  [ -194.94308913 -6442.16912869 -8192.20126966 16512.07464316]
New Q values:  [ -194.94308913 -6442.16912869 -8192.20126966  8086.56364226]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 4.94111262e+03 4.51391432e+02]
------
Step:12, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 4.94111262e+03 4.51391432e+02]
New Q values:  [1.64433000e+00 0.00000000e+00 4.94111262e+03 3.86565271e+02]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 688.69566037   26.73544252 -142.5818878   123.6214372 ]
------
Step:13, Action:North
State  261
Old Q Values:  [ 688.69566037   26.73544252 -142.5818878   123.6214372 ]
New Q values:  [ 483.05779391   26.73544252 -142.5818878   123.6214372 ]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
xa .x
x   x
xxxxx
Step:14, Action:East
State  181
Old Q Values:  [549.46244397 445.64685634 693.93176586 262.76946019]
New Q values:  [ 549.46244397  445.64685634 7086.46657692  262.76946019]
Reward: -1  Episode Reward:  36
xxxxx
x g.x
x a.x
x   x
xxxxx
Step:15, Action:North
State  197
Old Q Values:  [ 1.32443385e-01  5.55862364e+02 -4.51080211e+03  4.03062559e+02]
New Q values:  [-5833.78831344   555.86236402 -4510.80210702   403.06255908]
Reward: -10001  Episode Reward:  -9965
xxxxx
x  gx
x a.x
x   x
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -820.10444135  9530.16745518  9512.30705937 -4966.32149798]
------
Step:1, Action:South
State  180
Old Q Values:  [ -820.10444135  9530.16745518  9512.30705937 -4966.32149798]
New Q values:  [ -820.10444135  7546.67422929  9512.30705937 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5258.4185764  -2735.46306511 12430.69082408 -5679.36893145]
------
Step:2, Action:East
State  260
Old Q Values:  [-5258.4185764  -2735.46306511 12430.69082408 -5679.36893145]
New Q values:  [-5258.4185764  -2735.46306511  5234.2303441  -5679.36893145]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824   484.10856144]
------
Step:3, Action:East
State  272
Old Q Values:  [10811.17945793 -8521.23367799  9326.1395543  18966.33953238]
New Q values:  [10811.17945793 -8521.23367799  6161.8249144  18966.33953238]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -194.94308913 -6442.16912869 -8192.20126966  8086.56364226]
------
Step:4, Action:West
State  288
Old Q Values:  [ -194.94308913 -6442.16912869 -8192.20126966  8086.56364226]
New Q values:  [ -194.94308913 -6442.16912869 -8192.20126966  3490.57947138]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824   484.10856144]
------
Step:5, Action:East
State  272
Old Q Values:  [10811.17945793 -8521.23367799  6161.8249144  18966.33953238]
New Q values:  [10811.17945793 -8521.23367799  3511.30380717 18966.33953238]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x ..x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -194.94308913 -6442.16912869 -8192.20126966  3490.57947138]
------
Step:6, Action:North
State  288
Old Q Values:  [ -194.94308913 -6442.16912869 -8192.20126966  3490.57947138]
New Q values:  [ 1659.58753222 -6442.16912869 -8192.20126966  3490.57947138]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5773.88255957 -1838.15641278   790.72804752  1050.85266124]
------
Step:7, Action:North
State  210
Old Q Values:  [ 5773.88255957 -1838.15641278   790.72804752  1050.85266124]
New Q values:  [13922.71910478 -1838.15641278   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  43
xxxxx
x .ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  6087.16496961  -180.00807518 38692.55360318]
------
Step:8, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.53449737e+03 -3.22965309e-01  8.42661160e+03]
New Q values:  [ 8.43634063e+00  1.53449737e+03 -3.22965309e-01  3.50805299e+03]
Reward: 9  Episode Reward:  52
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.53916996   440.02783966]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.53916996   440.02783966]
New Q values:  [ -281.736      -1150.91067548   184.53916996   377.41513807]
Reward: -1  Episode Reward:  51
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  672.21640957  673.34667403 -180.6       ]
------
Step:10, Action:East
State  110
Old Q Values:  [-239.29051573  672.21640957  673.34667403 -180.6       ]
New Q values:  [-239.29051573  672.21640957  381.96321103 -180.6       ]
Reward: -1  Episode Reward:  50
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.53916996   377.41513807]
------
Step:11, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.53916996   377.41513807]
New Q values:  [ -281.736      -1150.91067548   184.53916996   352.0309781 ]
Reward: -1  Episode Reward:  49
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  672.21640957  381.96321103 -180.6       ]
------
Step:12, Action:East
State  99
Old Q Values:  [    0.         49742.2646533  61489.82543186     0.        ]
New Q values:  [    0.         49742.2646533  45328.13443232     0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18108.15151622 69109.34753192]
------
Step:13, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.53916996   352.0309781 ]
New Q values:  [ -281.736      -1150.91067548   184.53916996   603.84940195]
Reward: -1  Episode Reward:  47
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1545.45670237  982.40572119 -120.29354603]
------
Step:14, Action:South
State  99
Old Q Values:  [    0.         49742.2646533  45328.13443232     0.        ]
New Q values:  [    0.         52645.27586949 45328.13443232     0.        ]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  86258.77804998      0.        ]
------
Step:15, Action:North
State  181
Old Q Values:  [ 549.46244397  445.64685634 7086.46657692  262.76946019]
New Q values:  [ 496.48838101  445.64685634 7086.46657692  262.76946019]
Reward: -1  Episode Reward:  45
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   924.34467806 -2165.66138672  -180.6       ]
------
Step:16, Action:South
State  99
Old Q Values:  [    0.         52645.27586949 45328.13443232     0.        ]
New Q values:  [    0.         53806.48035597 45328.13443232     0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  86258.77804998      0.        ]
------
Step:17, Action:North
State  180
Old Q Values:  [ -820.10444135  7546.67422929  9512.30705937 -4966.32149798]
New Q values:  [ -126.97685367  7546.67422929  9512.30705937 -4966.32149798]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  672.21640957  381.96321103 -180.6       ]
------
Step:18, Action:East
State  108
Old Q Values:  [-8463.16477134  3762.38420191   845.00690416     0.        ]
New Q values:  [-8463.16477134  3762.38420191   468.70125453     0.        ]
Reward: -1  Episode Reward:  42
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   437.66164288   112.58531102]
------
Step:19, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.53916996   603.84940195]
New Q values:  [ -281.736      -1150.91067548  1125.63156516   603.84940195]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.53449737e+03 -3.22965309e-01  3.50805299e+03]
------
Step:20, Action:West
State  136
Old Q Values:  [ -170.77177351   806.71298734 -2383.80019164   117.87346031]
New Q values:  [ -170.77177351   806.71298734 -2383.80019164   177.84787698]
Reward: -1  Episode Reward:  40
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   437.66164288   112.58531102]
------
Step:21, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   437.66164288   112.58531102]
New Q values:  [-9594.56523706 -8069.05606225   416.47855335   112.58531102]
Reward: -1  Episode Reward:  39
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   806.71298734 -2383.80019164   177.84787698]
------
Step:22, Action:South
State  136
Old Q Values:  [ -170.77177351   806.71298734 -2383.80019164   177.84787698]
New Q values:  [ -170.77177351  3953.95781255 -2383.80019164   177.84787698]
Reward: -1  Episode Reward:  38
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12106.24205873  3591.79939078 -4584.50430574  1067.63960005]
------
Step:23, Action:South
State  208
Old Q Values:  [12106.24205873  3591.79939078 -4584.50430574  1067.63960005]
New Q values:  [12106.24205873  2483.29359772 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1659.58753222 -6442.16912869 -8192.20126966  3490.57947138]
------
Step:24, Action:West
State  288
Old Q Values:  [ 1659.58753222 -6442.16912869 -8192.20126966  3490.57947138]
New Q values:  [ 1659.58753222 -6442.16912869 -8192.20126966  7085.53364826]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10811.17945793 -8521.23367799  3511.30380717 18966.33953238]
------
Step:25, Action:West
State  272
Old Q Values:  [10811.17945793 -8521.23367799  3511.30380717 18966.33953238]
New Q values:  [10811.17945793 -8521.23367799  3511.30380717  7730.85315112]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 483.05779391   26.73544252 -142.5818878   123.6214372 ]
------
Step:26, Action:North
State  257
Old Q Values:  [52517.12572334  6367.89256635  5196.05500727  1875.31501677]
New Q values:  [53755.22029751  6367.89256635  5196.05500727  1875.31501677]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  86258.77804998      0.        ]
------
Step:27, Action:North
State  180
Old Q Values:  [ -126.97685367  7546.67422929  9512.30705937 -4966.32149798]
New Q values:  [  150.2741814   7546.67422929  9512.30705937 -4966.32149798]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  672.21640957  381.96321103 -180.6       ]
------
Step:28, Action:East
State  99
Old Q Values:  [    0.         53806.48035597 45328.13443232     0.        ]
New Q values:  [    0.         53806.48035597 38863.4580325      0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18108.15151622 69109.34753192]
------
Step:29, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   779.7149538    395.67260214]
New Q values:  [ -253.44886264 -1902.20915811   779.7149538    621.30605156]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1545.45670237  982.40572119 -120.29354603]
------
Step:30, Action:South
State  109
Old Q Values:  [ -241.10880094   924.34467806 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  2495.0778443  -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  30
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 496.48838101  445.64685634 7086.46657692  262.76946019]
------
Step:31, Action:East
State  177
Old Q Values:  [109163.23336057  23344.73803738  86258.77804998      0.        ]
New Q values:  [109163.23336057  23344.73803738 114273.78381604      0.        ]
Reward: 100009  Episode Reward:  100039
xxxxx
x g x
x a x
x   x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13922.71910478 -1838.15641278   790.72804752  1050.85266124]
------
Step:1, Action:North
State  208
Old Q Values:  [12106.24205873  2483.29359772 -4584.50430574  1067.63960005]
New Q values:  [ 5900.31272067  2483.29359772 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.53449737e+03 -3.22965309e-01  3.50805299e+03]
------
Step:2, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.53449737e+03 -3.22965309e-01  3.50805299e+03]
New Q values:  [ 8.43634063e+00  1.53449737e+03 -3.22965309e-01  1.74631067e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1125.63156516   603.84940195]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1125.63156516   603.84940195]
New Q values:  [ -281.736      -1150.91067548   973.5458258    603.84940195]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.53449737e+03 -3.22965309e-01  1.74631067e+03]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.53449737e+03 -3.22965309e-01  1.74631067e+03]
New Q values:  [ 8.43634063e+00  1.53449737e+03 -3.22965309e-01  9.89988014e+02]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   973.5458258    603.84940195]
------
Step:5, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   416.47855335   112.58531102]
New Q values:  [-9594.56523706 -8069.05606225  1352.17876511   112.58531102]
Reward: -1  Episode Reward:  15
xxxxx
x.gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3953.95781255 -2383.80019164   177.84787698]
------
Step:6, Action:South
State  136
Old Q Values:  [ -170.77177351  3953.95781255 -2383.80019164   177.84787698]
New Q values:  [ -170.77177351  3351.07694122 -2383.80019164   177.84787698]
Reward: -1  Episode Reward:  14
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5900.31272067  2483.29359772 -4584.50430574  1067.63960005]
------
Step:7, Action:North
State  208
Old Q Values:  [ 5900.31272067  2483.29359772 -4584.50430574  1067.63960005]
New Q values:  [ 2819.87430023  2483.29359772 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.53449737e+03 -3.22965309e-01  9.89988014e+02]
------
Step:8, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.53449737e+03 -3.22965309e-01  9.89988014e+02]
New Q values:  [ 8.43634063e+00  1.45916124e+03 -3.22965309e-01  9.89988014e+02]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2819.87430023  2483.29359772 -4584.50430574  1067.63960005]
------
Step:9, Action:North
State  210
Old Q Values:  [13922.71910478 -1838.15641278   790.72804752  1050.85266124]
New Q values:  [ 6006.23601372 -1838.15641278   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.45916124e+03 -3.22965309e-01  9.89988014e+02]
------
Step:10, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.45916124e+03 -3.22965309e-01  9.89988014e+02]
New Q values:  [ 8.43634063e+00  2.38493530e+03 -3.22965309e-01  9.89988014e+02]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6006.23601372 -1838.15641278   790.72804752  1050.85266124]
------
Step:11, Action:North
State  210
Old Q Values:  [ 6006.23601372 -1838.15641278   790.72804752  1050.85266124]
New Q values:  [ 3117.37499545 -1838.15641278   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  9
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.38493530e+03 -3.22965309e-01  9.89988014e+02]
------
Step:12, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.38493530e+03 -3.22965309e-01  9.89988014e+02]
New Q values:  [ 8.43634063e+00  1.88858662e+03 -3.22965309e-01  9.89988014e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3117.37499545 -1838.15641278   790.72804752  1050.85266124]
------
Step:13, Action:North
State  210
Old Q Values:  [ 3117.37499545 -1838.15641278   790.72804752  1050.85266124]
New Q values:  [ 1812.92598375 -1838.15641278   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.88858662e+03 -3.22965309e-01  9.89988014e+02]
------
Step:14, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.88858662e+03 -3.22965309e-01  9.89988014e+02]
New Q values:  [ 8.43634063e+00  1.29871244e+03 -3.22965309e-01  9.89988014e+02]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1812.92598375 -1838.15641278   790.72804752  1050.85266124]
------
Step:15, Action:North
State  210
Old Q Values:  [ 1812.92598375 -1838.15641278   790.72804752  1050.85266124]
New Q values:  [ 1114.18412627 -1838.15641278   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.29871244e+03 -3.22965309e-01  9.89988014e+02]
------
Step:16, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.29871244e+03 -3.22965309e-01  9.89988014e+02]
New Q values:  [ 8.43634063e+00  1.36484727e+03 -3.22965309e-01  9.89988014e+02]
Reward: -1  Episode Reward:  4
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2819.87430023  2483.29359772 -4584.50430574  1067.63960005]
------
Step:17, Action:North
State  208
Old Q Values:  [ 2819.87430023  2483.29359772 -4584.50430574  1067.63960005]
New Q values:  [ 1536.80390022  2483.29359772 -4584.50430574  1067.63960005]
Reward: -1  Episode Reward:  3
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.36484727e+03 -3.22965309e-01  9.89988014e+02]
------
Step:18, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.36484727e+03 -3.22965309e-01  9.89988014e+02]
New Q values:  [ 8.43634063e+00  1.36484727e+03 -3.22965309e-01  6.87458953e+02]
Reward: -1  Episode Reward:  2
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   973.5458258    603.84940195]
------
Step:19, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   779.7149538    621.30605156]
New Q values:  [ -253.44886264 -1902.20915811   720.74016165   621.30605156]
Reward: -1  Episode Reward:  1
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.36484727e+03 -3.22965309e-01  6.87458953e+02]
------
Step:20, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.36484727e+03 -3.22965309e-01  6.87458953e+02]
New Q values:  [ 8.43634063e+00  1.36484727e+03 -3.22965309e-01  5.66447329e+02]
Reward: -1  Episode Reward:  0
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   973.5458258    603.84940195]
------
Step:21, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   720.74016165   621.30605156]
New Q values:  [ -253.44886264 -1902.20915811   697.15024479   621.30605156]
Reward: -1  Episode Reward:  -1
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.36484727e+03 -3.22965309e-01  5.66447329e+02]
------
Step:22, Action:West
State  136
Old Q Values:  [ -170.77177351  3351.07694122 -2383.80019164   177.84787698]
New Q values:  [ -170.77177351  3351.07694122 -2383.80019164   195.1554019 ]
Reward: -1  Episode Reward:  -2
xxxxx
x.agx
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:23, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   697.15024479   621.30605156]
New Q values:  [ -253.44886264 -1902.20915811   697.15024479   546.62693994]
Reward: 9  Episode Reward:  7
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  975.68173106  174.61129759 -252.78192178]
------
Step:24, Action:South
State  109
Old Q Values:  [ -241.10880094  2495.0778443  -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  3129.37111079 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  16
xxxxx
x  gx
xa. x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 496.48838101  445.64685634 7086.46657692  262.76946019]
------
Step:25, Action:East
State  185
Old Q Values:  [ 200.47685943    0.          374.62683656 -178.98      ]
New Q values:  [ 200.47685943    0.          370.61103384 -178.98      ]
Reward: 9  Episode Reward:  25
xxxxx
x  gx
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  7.17867664e+02 -5.74006972e+03  2.00341972e+02]
------
Step:26, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  7.17867664e+02 -5.74006972e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  1.53226033e+03 -5.74006972e+03  2.00341972e+02]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 4132.37754571  677.18474264]
------
Step:27, Action:East
State  273
Old Q Values:  [ 633.77207181 -168.92307549 4132.37754571  677.18474264]
New Q values:  [ 633.77207181 -168.92307549 3778.01111276  677.18474264]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1659.58753222 -6442.16912869 -8192.20126966  7085.53364826]
------
Step:28, Action:West
State  288
Old Q Values:  [ 1659.58753222 -6442.16912869 -8192.20126966  7085.53364826]
New Q values:  [ 1659.58753222 -6442.16912869 -8192.20126966  6076.96729669]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10811.17945793 -8521.23367799  3511.30380717  7730.85315112]
------
Step:29, Action:North
State  272
Old Q Values:  [10811.17945793 -8521.23367799  3511.30380717  7730.85315112]
New Q values:  [10146.63620035 -8521.23367799  3511.30380717  7730.85315112]
Reward: -10001  Episode Reward:  -9969
xxxxx
x   x
x g x
x.  x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10146.63620035 -8521.23367799  3511.30380717  7730.85315112]
------
Step:1, Action:West
State  272
Old Q Values:  [10146.63620035 -8521.23367799  3511.30380717  7730.85315112]
New Q values:  [10146.63620035 -8521.23367799  3511.30380717  3242.65859862]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 483.05779391   26.73544252 -142.5818878   123.6214372 ]
------
Step:2, Action:North
State  261
Old Q Values:  [ 483.05779391   26.73544252 -142.5818878   123.6214372 ]
New Q values:  [1287.83215663   26.73544252 -142.5818878   123.6214372 ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 675.38544397  661.24847269 3630.6967969   358.5166536 ]
------
Step:3, Action:East
State  183
Old Q Values:  [ 675.38544397  661.24847269 3630.6967969   358.5166536 ]
New Q values:  [ 675.38544397  661.24847269 2056.62788194  358.5166536 ]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.60896760e+02  2.01649721e+03  1.03161518e+03]
------
Step:4, Action:East
State  194
Old Q Values:  [-6.00000000e-01  9.60896760e+02  2.01649721e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  9.60896760e+02  1.14625412e+03  1.03161518e+03]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1114.18412627 -1838.15641278   790.72804752  1050.85266124]
------
Step:5, Action:North
State  208
Old Q Values:  [ 1536.80390022  2483.29359772 -4584.50430574  1067.63960005]
New Q values:  [12227.88764104  2483.29359772 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  35
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  6087.16496961  -180.00807518 38692.55360318]
------
Step:6, Action:West
State  130
Old Q Values:  [26290.62961917  6087.16496961  -180.00807518 38692.55360318]
New Q values:  [26290.62961917  6087.16496961  -180.00807518 36215.22570085]
Reward: 9  Episode Reward:  44
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18108.15151622 69109.34753192]
------
Step:7, Action:West
State  126
Old Q Values:  [  0.         331.64678262 182.86408515 894.49091028]
New Q values:  [  0.         331.64678262 182.86408515 564.86128698]
Reward: 9  Episode Reward:  53
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  672.21640957  381.96321103 -180.6       ]
------
Step:8, Action:East
State  110
Old Q Values:  [-239.29051573  672.21640957  381.96321103 -180.6       ]
New Q values:  [-239.29051573  672.21640957  321.64367051 -180.6       ]
Reward: -1  Episode Reward:  52
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         331.64678262 182.86408515 564.86128698]
------
Step:9, Action:West
State  126
Old Q Values:  [  0.         331.64678262 182.86408515 564.86128698]
New Q values:  [  0.         331.64678262 182.86408515 427.00943766]
Reward: -1  Episode Reward:  51
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  672.21640957  321.64367051 -180.6       ]
------
Step:10, Action:East
State  111
Old Q Values:  [-177.44732869 1545.45670237  982.40572119 -120.29354603]
New Q values:  [-177.44732869 1545.45670237  520.46511977 -120.29354603]
Reward: -1  Episode Reward:  50
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         331.64678262 182.86408515 427.00943766]
------
Step:11, Action:West
State  124
Old Q Values:  [   0.         1166.51141701 4292.78893337  858.10195218]
New Q values:  [   0.         1166.51141701 4292.78893337 1281.45211411]
Reward: -1  Episode Reward:  49
xxxxx
xag x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  3129.37111079 -2165.66138672  -180.6       ]
------
Step:12, Action:South
State  111
Old Q Values:  [-177.44732869 1545.45670237  520.46511977 -120.29354603]
New Q values:  [-177.44732869 1185.8632944   520.46511977 -120.29354603]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1894.26871151 1670.6531179   154.04646645]
------
Step:13, Action:South
State  191
Old Q Values:  [  3.06655861 970.40507756 513.29369084   0.        ]
New Q values:  [  3.06655861 773.91167801 513.29369084   0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x   x
xag.x
xxxxx
Step:14, Action:South
State  260
Old Q Values:  [-5258.4185764  -2735.46306511  5234.2303441  -5679.36893145]
New Q values:  [-5258.4185764  -5704.51612281  5234.2303441  -5679.36893145]
Reward: -10301  Episode Reward:  -10254
xxxxx
x   x
x   x
xg .x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  672.21640957  321.64367051 -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869 1185.8632944   520.46511977 -120.29354603]
New Q values:  [-177.44732869 1096.73368234  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 675.38544397  661.24847269 2056.62788194  358.5166536 ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 675.38544397  661.24847269 2056.62788194  358.5166536 ]
New Q values:  [ 675.38544397  661.24847269 1789.29063303  358.5166536 ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  14.86214194  479.07551978 3204.13160084 1915.70494401]
------
Step:3, Action:East
State  193
Old Q Values:  [-5922.26708831  1037.22774609   635.35015003  1460.9765133 ]
New Q values:  [-5922.26708831  1037.22774609 -2072.09364767  1460.9765133 ]
Reward: -9991  Episode Reward:  -9973
xxxxx
x ..x
x  gx
x ..x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  672.21640957  321.64367051 -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869 1096.73368234  520.46511977 -120.29354603]
New Q values:  [-177.44732869  980.88066285  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 675.38544397  661.24847269 1789.29063303  358.5166536 ]
------
Step:2, Action:East
State  183
Old Q Values:  [ 675.38544397  661.24847269 1789.29063303  358.5166536 ]
New Q values:  [ 675.38544397  661.24847269 1682.35573346  358.5166536 ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  14.86214194  479.07551978 3204.13160084 1915.70494401]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  9.60896760e+02  1.14625412e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  9.60896760e+02  7.98156887e+02  1.03161518e+03]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1114.18412627 -1838.15641278   790.72804752  1050.85266124]
------
Step:4, Action:North
State  216
Old Q Values:  [  180.81108618 -5399.47678043 -8896.20691497 -1351.58164425]
New Q values:  [  487.1786146  -5399.47678043 -8896.20691497 -1351.58164425]
Reward: 9  Episode Reward:  36
xxxxx
x .ax
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.36484727e+03 -3.22965309e-01  5.66447329e+02]
------
Step:5, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.36484727e+03 -3.22965309e-01  5.66447329e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  5.66447329e+02]
Reward: -10001  Episode Reward:  -9965
xxxxx
x . x
x  gx
x ..x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 1114.18412627 -1838.15641278   790.72804752  1050.85266124]
------
Step:1, Action:North
State  210
Old Q Values:  [ 1114.18412627 -1838.15641278   790.72804752  1050.85266124]
New Q values:  [  621.00784923 -1838.15641278   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  5.66447329e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  5.66447329e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  5.24042679e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   973.5458258    603.84940195]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   973.5458258    603.84940195]
New Q values:  [ -281.736      -1150.91067548   546.03113413   603.84940195]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  5.24042679e+02]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  5.24042679e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  3.90171892e+02]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   546.03113413   603.84940195]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   546.03113413   603.84940195]
New Q values:  [ -281.736      -1150.91067548   546.03113413   539.6442801 ]
Reward: 9  Episode Reward:  25
xxxxx
xa  x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  975.68173106  174.61129759 -252.78192178]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869  980.88066285  520.46511977 -120.29354603]
New Q values:  [-177.44732869  902.45898518  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  34
xxxxx
x   x
xa. x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 675.38544397  661.24847269 1682.35573346  358.5166536 ]
------
Step:7, Action:East
State  185
Old Q Values:  [ 200.47685943    0.          370.61103384 -178.98      ]
New Q values:  [ 200.47685943    0.         5976.40883071 -178.98      ]
Reward: -9991  Episode Reward:  -9957
xxxxx
x   x
x g x
x. .x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -7507.54632711  1283.06065933     0.        ]
------
Step:1, Action:East
State  180
Old Q Values:  [  150.2741814   7546.67422929  9512.30705937 -4966.32149798]
New Q values:  [  150.2741814   7546.67422929 23575.19541979 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.45197447e+03 6.58829087e+04 1.27673579e+04]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.45197447e+03 6.58829087e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 1.45197447e+03 3.00269298e+04 1.27673579e+04]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12227.88764104  2483.29359772 -4584.50430574  1067.63960005]
------
Step:3, Action:North
State  208
Old Q Values:  [12227.88764104  2483.29359772 -4584.50430574  1067.63960005]
New Q values:  [15761.12276667  2483.29359772 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  6087.16496961  -180.00807518 36215.22570085]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  3.90171892e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.08942730e+04]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18108.15151622 69109.34753192]
------
Step:5, Action:West
State  126
Old Q Values:  [  0.         331.64678262 182.86408515 427.00943766]
New Q values:  [  0.         331.64678262 182.86408515 446.94147062]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  902.45898518  520.46511977 -120.29354603]
------
Step:6, Action:South
State  110
Old Q Values:  [-239.29051573  672.21640957  321.64367051 -180.6       ]
New Q values:  [-239.29051573  806.74767153  321.64367051 -180.6       ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xa  x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  1.79487036e+03  0.00000000e+00]
------
Step:7, Action:East
State  191
Old Q Values:  [  3.06655861 773.91167801 513.29369084   0.        ]
New Q values:  [  3.06655861 773.91167801 597.60025341   0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[    0.         -1406.21014518  1309.6092569      0.        ]
------
Step:8, Action:East
State  202
Old Q Values:  [    0.         -8753.98842238  4042.28441766     0.        ]
New Q values:  [    0.         -8753.98842238  3675.10404147     0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 138.78034012 6862.634248      0.         1863.19740327]
------
Step:9, Action:South
State  210
Old Q Values:  [  621.00784923 -1838.15641278   790.72804752  1050.85266124]
New Q values:  [ 621.00784923 1093.22762389  790.72804752 1050.85266124]
Reward: 9  Episode Reward:  51
xxxxx
x   x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1659.58753222 -6442.16912869 -8192.20126966  6076.96729669]
------
Step:10, Action:North
State  288
Old Q Values:  [ 1659.58753222 -6442.16912869 -8192.20126966  6076.96729669]
New Q values:  [  809.38859727 -6442.16912869 -8192.20126966  6076.96729669]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  487.1786146  -5399.47678043 -8896.20691497 -1351.58164425]
------
Step:11, Action:North
State  210
Old Q Values:  [ 621.00784923 1093.22762389  790.72804752 1050.85266124]
New Q values:  [11112.37084995  1093.22762389   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  49
xxxxx
x  ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  6087.16496961  -180.00807518 36215.22570085]
------
Step:12, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.08942730e+04]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  8.52091855e+03]
Reward: -1  Episode Reward:  48
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   546.03113413   539.6442801 ]
------
Step:13, Action:East
State  114
Old Q Values:  [ -180.6         3557.6642036  18108.15151622 69109.34753192]
New Q values:  [ -180.6         3557.6642036  18107.22831674 69109.34753192]
Reward: -1  Episode Reward:  47
xxxxx
x  ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  6087.16496961  -180.00807518 36215.22570085]
------
Step:14, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  8.52091855e+03]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  3.57157676e+03]
Reward: -1  Episode Reward:  46
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   546.03113413   539.6442801 ]
------
Step:15, Action:East
State  114
Old Q Values:  [ -180.6         3557.6642036  18107.22831674 69109.34753192]
New Q values:  [ -180.6         3557.6642036  18106.85903695 69109.34753192]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  6087.16496961  -180.00807518 36215.22570085]
------
Step:16, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  3.57157676e+03]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  1.63717578e+03]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   697.15024479   546.62693994]
------
Step:17, Action:East
State  114
Old Q Values:  [ -180.6         3557.6642036  18106.85903695 69109.34753192]
New Q values:  [ -180.6         3557.6642036  18106.71132503 69109.34753192]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  6087.16496961  -180.00807518 36215.22570085]
------
Step:18, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  1.63717578e+03]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  8.18079651e+02]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   546.03113413   539.6442801 ]
------
Step:19, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   697.15024479   546.62693994]
New Q values:  [ -253.44886264 -1902.20915811   523.68399323   546.62693994]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  8.18079651e+02]
------
Step:20, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  8.18079651e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  4.90441201e+02]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   546.03113413   539.6442801 ]
------
Step:21, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1352.17876511   112.58531102]
New Q values:  [-9594.56523706 -8069.05606225  1545.59458841   112.58531102]
Reward: -1  Episode Reward:  39
xxxxx
x gax
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3351.07694122 -2383.80019164   195.1554019 ]
------
Step:22, Action:South
State  136
Old Q Values:  [ -170.77177351  3351.07694122 -2383.80019164   195.1554019 ]
New Q values:  [ -170.77177351  1485.98436087 -2383.80019164   195.1554019 ]
Reward: -1  Episode Reward:  38
xxxxx
x  gx
x  ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  487.1786146  -5399.47678043 -8896.20691497 -1351.58164425]
------
Step:23, Action:West
State  216
Old Q Values:  [  487.1786146  -5399.47678043 -8896.20691497 -1351.58164425]
New Q values:  [  487.1786146  -5399.47678043 -8896.20691497   -81.5545589 ]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x agx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.53226033e+03 -5.74006972e+03  2.00341972e+02]
------
Step:24, Action:South
State  193
Old Q Values:  [-5922.26708831  1037.22774609 -2072.09364767  1460.9765133 ]
New Q values:  [-5922.26708831 61553.69443226 -2072.09364767  1460.9765133 ]
Reward: 100009  Episode Reward:  100046
xxxxx
x  gx
x   x
x a x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 675.38544397  661.24847269 1682.35573346  358.5166536 ]
------
Step:1, Action:East
State  181
Old Q Values:  [ 496.48838101  445.64685634 7086.46657692  262.76946019]
New Q values:  [  496.48838101   445.64685634 21306.09496045   262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 61553.69443226 -2072.09364767  1460.9765133 ]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831 61553.69443226 -2072.09364767  1460.9765133 ]
New Q values:  [-5922.26708831 25760.28110673 -2072.09364767  1460.9765133 ]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 3778.01111276  677.18474264]
------
Step:3, Action:East
State  272
Old Q Values:  [10146.63620035 -8521.23367799  3511.30380717  3242.65859862]
New Q values:  [10146.63620035 -8521.23367799  3227.01171187  3242.65859862]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  809.38859727 -6442.16912869 -8192.20126966  6076.96729669]
------
Step:4, Action:West
State  288
Old Q Values:  [  809.38859727 -6442.16912869 -8192.20126966  6076.96729669]
New Q values:  [  809.38859727 -6442.16912869 -8192.20126966  5474.17777878]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10146.63620035 -8521.23367799  3227.01171187  3242.65859862]
------
Step:5, Action:West
State  272
Old Q Values:  [10146.63620035 -8521.23367799  3227.01171187  3242.65859862]
New Q values:  [10146.63620035 -8521.23367799  3227.01171187  1688.81308644]
Reward: 9  Episode Reward:  25
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1287.83215663   26.73544252 -142.5818878   123.6214372 ]
------
Step:6, Action:North
State  261
Old Q Values:  [1287.83215663   26.73544252 -142.5818878   123.6214372 ]
New Q values:  [6906.36135079   26.73544252 -142.5818878   123.6214372 ]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  496.48838101   445.64685634 21306.09496045   262.76946019]
------
Step:7, Action:North
State  181
Old Q Values:  [  496.48838101   445.64685634 21306.09496045   262.76946019]
New Q values:  [ 1323.39458386   445.64685634 21306.09496045   262.76946019]
Reward: 9  Episode Reward:  33
xxxxx
xag.x
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         3731.33077154    0.            0.        ]
------
Step:8, Action:South
State  103
Old Q Values:  [221.30610858 797.26081698 238.35800069   0.        ]
New Q values:  [ 221.30610858 6710.13281493  238.35800069    0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1323.39458386   445.64685634 21306.09496045   262.76946019]
------
Step:9, Action:North
State  181
Old Q Values:  [ 1323.39458386   445.64685634 21306.09496045   262.76946019]
New Q values:  [ 2541.79767802   445.64685634 21306.09496045   262.76946019]
Reward: -1  Episode Reward:  31
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 6710.13281493  238.35800069    0.        ]
------
Step:10, Action:South
State  103
Old Q Values:  [ 221.30610858 6710.13281493  238.35800069    0.        ]
New Q values:  [ 221.30610858 9075.2816141   238.35800069    0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2541.79767802   445.64685634 21306.09496045   262.76946019]
------
Step:11, Action:North
State  183
Old Q Values:  [ 675.38544397  661.24847269 1682.35573346  358.5166536 ]
New Q values:  [2992.13866182  661.24847269 1682.35573346  358.5166536 ]
Reward: -1  Episode Reward:  29
xxxxx
xa..x
x  .x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 9075.2816141   238.35800069    0.        ]
------
Step:12, Action:South
State  103
Old Q Values:  [ 221.30610858 9075.2816141   238.35800069    0.        ]
New Q values:  [ 221.30610858 4527.15424419  238.35800069    0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2992.13866182  661.24847269 1682.35573346  358.5166536 ]
------
Step:13, Action:North
State  183
Old Q Values:  [2992.13866182  661.24847269 1682.35573346  358.5166536 ]
New Q values:  [2554.40173798  661.24847269 1682.35573346  358.5166536 ]
Reward: -1  Episode Reward:  27
xxxxx
xa..x
x  .x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 4527.15424419  238.35800069    0.        ]
------
Step:14, Action:South
State  103
Old Q Values:  [ 221.30610858 4527.15424419  238.35800069    0.        ]
New Q values:  [ 221.30610858 8202.09018581  238.35800069    0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2541.79767802   445.64685634 21306.09496045   262.76946019]
------
Step:15, Action:North
State  181
Old Q Values:  [ 2541.79767802   445.64685634 21306.09496045   262.76946019]
New Q values:  [ 2135.51830267   445.64685634 21306.09496045   262.76946019]
Reward: -1  Episode Reward:  25
xxxxx
xag.x
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         3731.33077154    0.            0.        ]
------
Step:16, Action:South
State  100
Old Q Values:  [   0.         4011.09684676  318.34177431    0.        ]
New Q values:  [   0.         8676.39736464  318.34177431    0.        ]
Reward: -1  Episode Reward:  24
xxxxx
xg..x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  150.2741814   7546.67422929 23575.19541979 -4966.32149798]
------
Step:17, Action:East
State  180
Old Q Values:  [  150.2741814   7546.67422929 23575.19541979 -4966.32149798]
New Q values:  [  150.2741814   7546.67422929 16238.97203849 -4966.32149798]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   634.55080188 22698.31290191   492.34934406]
------
Step:18, Action:East
State  194
Old Q Values:  [-6.00000000e-01  9.60896760e+02  7.98156887e+02  1.03161518e+03]
New Q values:  [-6.00000000e-01  9.60896760e+02  3.65837401e+03  1.03161518e+03]
Reward: 9  Episode Reward:  32
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11112.37084995  1093.22762389   790.72804752  1050.85266124]
------
Step:19, Action:North
State  208
Old Q Values:  [15761.12276667  2483.29359772 -4584.50430574  1067.63960005]
New Q values:  [17174.41681692  2483.29359772 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  41
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  6087.16496961  -180.00807518 36215.22570085]
------
Step:20, Action:West
State  128
Old Q Values:  [11374.93691792 10380.76024543 -8652.84       68375.61054215]
New Q values:  [ 11374.93691792  10380.76024543  -8652.84       120640.57313574]
Reward: 100009  Episode Reward:  100050
xxxxx
xga x
x   x
x   x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  4.90441201e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  4.90441201e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  3.65564562e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   523.68399323   546.62693994]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   523.68399323   546.62693994]
New Q values:  [ -253.44886264 -1902.20915811   523.68399323   494.78847153]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  902.45898518  520.46511977 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869  902.45898518  520.46511977 -120.29354603]
New Q values:  [-177.44732869 1132.70411547  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2554.40173798  661.24847269 1682.35573346  358.5166536 ]
------
Step:4, Action:North
State  181
Old Q Values:  [ 2135.51830267   445.64685634 21306.09496045   262.76946019]
New Q values:  [ 1193.41855571   445.64685634 21306.09496045   262.76946019]
Reward: -1  Episode Reward:  26
xxxxx
xa  x
x g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1132.70411547  520.46511977 -120.29354603]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 1132.70411547  520.46511977 -120.29354603]
New Q values:  [-177.44732869 1218.80216758  520.46511977 -120.29354603]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2554.40173798  661.24847269 1682.35573346  358.5166536 ]
------
Step:6, Action:North
State  183
Old Q Values:  [2554.40173798  661.24847269 1682.35573346  358.5166536 ]
New Q values:  [1386.80134547  661.24847269 1682.35573346  358.5166536 ]
Reward: -1  Episode Reward:  24
xxxxx
xa  x
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1218.80216758  520.46511977 -120.29354603]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869 1218.80216758  520.46511977 -120.29354603]
New Q values:  [-177.44732869  991.62758707  520.46511977 -120.29354603]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1386.80134547  661.24847269 1682.35573346  358.5166536 ]
------
Step:8, Action:East
State  182
Old Q Values:  [    0.         -7507.54632711  1283.06065933     0.        ]
New Q values:  [    0.         -7507.54632711  5160.03171396     0.        ]
Reward: 9  Episode Reward:  32
xxxxx
x   x
x a.x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:9, Action:East
State  200
Old Q Values:  [   62.8218634  39409.21472391   996.93198117   568.38654082]
New Q values:  [   62.8218634  39409.21472391   550.32637685   568.38654082]
Reward: 9  Episode Reward:  41
xxxxx
x   x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  487.1786146  -5399.47678043 -8896.20691497   -81.5545589 ]
------
Step:10, Action:North
State  216
Old Q Values:  [  487.1786146  -5399.47678043 -8896.20691497   -81.5545589 ]
New Q values:  [  640.0667541  -5399.47678043 -8896.20691497   -81.5545589 ]
Reward: -1  Episode Reward:  40
xxxxx
xg ax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1485.98436087 -2383.80019164   195.1554019 ]
------
Step:11, Action:South
State  136
Old Q Values:  [ -170.77177351  1485.98436087 -2383.80019164   195.1554019 ]
New Q values:  [ -170.77177351   785.81377058 -2383.80019164   195.1554019 ]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  640.0667541  -5399.47678043 -8896.20691497   -81.5545589 ]
------
Step:12, Action:North
State  216
Old Q Values:  [  640.0667541  -5399.47678043 -8896.20691497   -81.5545589 ]
New Q values:  [-5508.82916719 -5399.47678043 -8896.20691497   -81.5545589 ]
Reward: -10001  Episode Reward:  -9962
xxxxx
x  gx
x   x
x.. x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17174.41681692  2483.29359772 -4584.50430574  1067.63960005]
------
Step:1, Action:North
State  216
Old Q Values:  [-5508.82916719 -5399.47678043 -8896.20691497   -81.5545589 ]
New Q values:  [-2088.4622982  -5399.47678043 -8896.20691497   -81.5545589 ]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  3.65564562e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  3.65564562e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  3.15435165e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   546.03113413   539.6442801 ]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   546.03113413   539.6442801 ]
New Q values:  [ -281.736      -1150.91067548   312.44300319   539.6442801 ]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  3.15435165e+02]
------
Step:4, Action:West
State  136
Old Q Values:  [ -170.77177351   785.81377058 -2383.80019164   195.1554019 ]
New Q values:  [ -170.77177351   785.81377058 -2383.80019164   541.14053728]
Reward: -1  Episode Reward:  16
xxxxx
xga x
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1545.59458841   112.58531102]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   312.44300319   539.6442801 ]
New Q values:  [ -281.736      -1150.91067548   219.00775082   539.6442801 ]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  3.15435165e+02]
------
Step:6, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  3.15435165e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.87467350e+02]
Reward: -1  Episode Reward:  14
xxxxx
x.a x
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   539.6442801 ]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   539.6442801 ]
New Q values:  [ -281.736      -1150.91067548   219.00775082   513.96223136]
Reward: 9  Episode Reward:  23
xxxxx
xa  x
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  975.68173106  174.61129759 -252.78192178]
------
Step:8, Action:South
State  111
Old Q Values:  [-177.44732869  991.62758707  520.46511977 -120.29354603]
New Q values:  [-177.44732869  970.33164828  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  32
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1894.26871151 1670.6531179   154.04646645]
------
Step:9, Action:South
State  189
Old Q Values:  [ 275.08817949 1894.26871151 1670.6531179   154.04646645]
New Q values:  [ 275.08817949 2835.01588984 1670.6531179   154.04646645]
Reward: 9  Episode Reward:  41
xxxxx
x g x
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6906.36135079   26.73544252 -142.5818878   123.6214372 ]
------
Step:10, Action:North
State  260
Old Q Values:  [-5258.4185764  -5704.51612281  5234.2303441  -5679.36893145]
New Q values:  [-1662.87464405 -5704.51612281  5234.2303441  -5679.36893145]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263   935.74403408  1470.30928837     0.        ]
------
Step:11, Action:East
State  189
Old Q Values:  [ 275.08817949 2835.01588984 1670.6531179   154.04646645]
New Q values:  [  275.08817949  2835.01588984 12490.42566433   154.04646645]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  39409.21472391   550.32637685   568.38654082]
------
Step:12, Action:South
State  205
Old Q Values:  [   0.         1104.47596846    0.          198.38683706]
New Q values:  [   0.         1929.52417238    0.          198.38683706]
Reward: 9  Episode Reward:  48
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 4.94111262e+03 3.86565271e+02]
------
Step:13, Action:East
State  273
Old Q Values:  [ 633.77207181 -168.92307549 3778.01111276  677.18474264]
New Q values:  [  633.77207181  -168.92307549 63158.85777874   677.18474264]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x  gx
x  ax
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  150.2741814   7546.67422929 16238.97203849 -4966.32149798]
------
Step:1, Action:East
State  180
Old Q Values:  [  150.2741814   7546.67422929 16238.97203849 -4966.32149798]
New Q values:  [  150.2741814   7546.67422929 13310.48268597 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144   634.55080188 22698.31290191   492.34934406]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01  9.60896760e+02  3.65837401e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  9.60896760e+02  4.80246086e+03  1.03161518e+03]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11112.37084995  1093.22762389   790.72804752  1050.85266124]
------
Step:3, Action:North
State  216
Old Q Values:  [-2088.4622982  -5399.47678043 -8896.20691497   -81.5545589 ]
New Q values:  [ -743.74471425 -5399.47678043 -8896.20691497   -81.5545589 ]
Reward: 9  Episode Reward:  27
xxxxx
x .ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.87467350e+02]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.87467350e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.74575609e+02]
Reward: 9  Episode Reward:  36
xxxxx
x a x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   513.96223136]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   513.96223136]
New Q values:  [ -281.736      -1150.91067548   219.00775082   447.009194  ]
Reward: -1  Episode Reward:  35
xxxxx
xa  x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  806.74767153  321.64367051 -180.6       ]
------
Step:6, Action:East
State  110
Old Q Values:  [-239.29051573  806.74767153  321.64367051 -180.6       ]
New Q values:  [-239.29051573  806.74767153  262.1602264  -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   447.009194  ]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   447.009194  ]
New Q values:  [ -281.736      -1150.91067548   219.00775082   420.22797906]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  806.74767153  262.1602264  -180.6       ]
------
Step:8, Action:East
State  108
Old Q Values:  [-8463.16477134  3762.38420191   468.70125453     0.        ]
New Q values:  [-8463.16477134  3762.38420191   650.55887833     0.        ]
Reward: -1  Episode Reward:  32
xxxxx
xga x
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1545.59458841   112.58531102]
------
Step:9, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1545.59458841   112.58531102]
New Q values:  [-9594.56523706 -8069.05606225   853.38196654   112.58531102]
Reward: -1  Episode Reward:  31
xxxxx
x gax
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   785.81377058 -2383.80019164   541.14053728]
------
Step:10, Action:South
State  136
Old Q Values:  [ -170.77177351   785.81377058 -2383.80019164   541.14053728]
New Q values:  [ -170.77177351   289.25914056 -2383.80019164   541.14053728]
Reward: -1  Episode Reward:  30
xxxxx
xg  x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ -743.74471425 -5399.47678043 -8896.20691497   -81.5545589 ]
------
Step:11, Action:West
State  216
Old Q Values:  [ -743.74471425 -5399.47678043 -8896.20691497   -81.5545589 ]
New Q values:  [ -743.74471425 -5399.47678043 -8896.20691497 11789.54259361]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  39409.21472391   550.32637685   568.38654082]
------
Step:12, Action:South
State  200
Old Q Values:  [   62.8218634  39409.21472391   550.32637685   568.38654082]
New Q values:  [   62.8218634  18813.07674967   550.32637685   568.38654082]
Reward: 9  Episode Reward:  38
xxxxx
xg  x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10146.63620035 -8521.23367799  3227.01171187  1688.81308644]
------
Step:13, Action:North
State  272
Old Q Values:  [10146.63620035 -8521.23367799  3227.01171187  1688.81308644]
New Q values:  [ 9701.97750504 -8521.23367799  3227.01171187  1688.81308644]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x a x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  18813.07674967   550.32637685   568.38654082]
------
Step:14, Action:South
State  200
Old Q Values:  [   62.8218634  18813.07674967   550.32637685   568.38654082]
New Q values:  [   62.8218634  10435.22395138   550.32637685   568.38654082]
Reward: -1  Episode Reward:  36
xxxxx
xg  x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9701.97750504 -8521.23367799  3227.01171187  1688.81308644]
------
Step:15, Action:North
State  272
Old Q Values:  [ 9701.97750504 -8521.23367799  3227.01171187  1688.81308644]
New Q values:  [ 7010.75818743 -8521.23367799  3227.01171187  1688.81308644]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xga x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  10435.22395138   550.32637685   568.38654082]
------
Step:16, Action:South
State  200
Old Q Values:  [   62.8218634  10435.22395138   550.32637685   568.38654082]
New Q values:  [  62.8218634  6276.71703678  550.32637685  568.38654082]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7010.75818743 -8521.23367799  3227.01171187  1688.81308644]
------
Step:17, Action:East
State  272
Old Q Values:  [ 7010.75818743 -8521.23367799  3227.01171187  1688.81308644]
New Q values:  [ 7010.75818743 -8521.23367799  2938.45801838  1688.81308644]
Reward: 9  Episode Reward:  43
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  809.38859727 -6442.16912869 -8192.20126966  5474.17777878]
------
Step:18, Action:West
State  288
Old Q Values:  [  809.38859727 -6442.16912869 -8192.20126966  5474.17777878]
New Q values:  [  809.38859727 -6442.16912869 -8192.20126966  4292.29856774]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7010.75818743 -8521.23367799  2938.45801838  1688.81308644]
------
Step:19, Action:East
State  272
Old Q Values:  [ 7010.75818743 -8521.23367799  2938.45801838  1688.81308644]
New Q values:  [ 7010.75818743 -8521.23367799  2462.47277768  1688.81308644]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  809.38859727 -6442.16912869 -8192.20126966  4292.29856774]
------
Step:20, Action:West
State  288
Old Q Values:  [  809.38859727 -6442.16912869 -8192.20126966  4292.29856774]
New Q values:  [  809.38859727 -6442.16912869 -8192.20126966  3819.54688332]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7010.75818743 -8521.23367799  2462.47277768  1688.81308644]
------
Step:21, Action:North
State  272
Old Q Values:  [ 7010.75818743 -8521.23367799  2462.47277768  1688.81308644]
New Q values:  [ 4686.71838601 -8521.23367799  2462.47277768  1688.81308644]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  6276.71703678  550.32637685  568.38654082]
------
Step:22, Action:South
State  200
Old Q Values:  [  62.8218634  6276.71703678  550.32637685  568.38654082]
New Q values:  [  62.8218634  3916.10233051  550.32637685  568.38654082]
Reward: -1  Episode Reward:  38
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4686.71838601 -8521.23367799  2462.47277768  1688.81308644]
------
Step:23, Action:North
State  272
Old Q Values:  [ 4686.71838601 -8521.23367799  2462.47277768  1688.81308644]
New Q values:  [ 3048.91805356 -8521.23367799  2462.47277768  1688.81308644]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  3916.10233051  550.32637685  568.38654082]
------
Step:24, Action:South
State  194
Old Q Values:  [-6.00000000e-01  9.60896760e+02  4.80246086e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.29843412e+03  4.80246086e+03  1.03161518e+03]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3048.91805356 -8521.23367799  2462.47277768  1688.81308644]
------
Step:25, Action:North
State  272
Old Q Values:  [ 3048.91805356 -8521.23367799  2462.47277768  1688.81308644]
New Q values:  [ 2321.49843386 -8521.23367799  2462.47277768  1688.81308644]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  3675.10404147     0.        ]
------
Step:26, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.29843412e+03  4.80246086e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.29843412e+03  5.25409560e+03  1.03161518e+03]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11112.37084995  1093.22762389   790.72804752  1050.85266124]
------
Step:27, Action:North
State  218
Old Q Values:  [ 138.78034012 6862.634248      0.         1863.19740327]
New Q values:  [ 137.28481888 6862.634248      0.         1863.19740327]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.74575609e+02]
------
Step:28, Action:West
State  130
Old Q Values:  [26290.62961917  6087.16496961  -180.00807518 36215.22570085]
New Q values:  [26290.62961917  6087.16496961  -180.00807518 35218.29453991]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 69109.34753192]
------
Step:29, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   420.22797906]
New Q values:  [ -281.736      -1150.91067548   219.00775082   367.76795611]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
xg  x
x.  x
xxxxx
Step:30, Action:South
State  104
Old Q Values:  [-8652.84        7009.88182296   821.87865408 -8652.84      ]
New Q values:  [-8652.84       -1093.68272393   821.87865408 -8652.84      ]
Reward: -10001  Episode Reward:  -9970
xxxxx
xg  x
x   x
x.  x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1662.87464405 -5704.51612281  5234.2303441  -5679.36893145]
------
Step:1, Action:East
State  261
Old Q Values:  [6906.36135079   26.73544252 -142.5818878   123.6214372 ]
New Q values:  [6906.36135079   26.73544252  204.92125935  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824   484.10856144]
------
Step:2, Action:East
State  272
Old Q Values:  [ 2321.49843386 -8521.23367799  2462.47277768  1688.81308644]
New Q values:  [ 2321.49843386 -8521.23367799  2136.25317607  1688.81308644]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  809.38859727 -6442.16912869 -8192.20126966  3819.54688332]
------
Step:3, Action:West
State  288
Old Q Values:  [  809.38859727 -6442.16912869 -8192.20126966  3819.54688332]
New Q values:  [  809.38859727 -6442.16912869 -8192.20126966  2223.66828349]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2321.49843386 -8521.23367799  2136.25317607  1688.81308644]
------
Step:4, Action:North
State  272
Old Q Values:  [ 2321.49843386 -8521.23367799  2136.25317607  1688.81308644]
New Q values:  [ 3942.07829966 -8521.23367799  2136.25317607  1688.81308644]
Reward: -9991  Episode Reward:  -9974
xxxxx
x ..x
x.g.x
x   x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  970.33164828  520.46511977 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 8202.09018581  238.35800069    0.        ]
New Q values:  [ 221.30610858 3790.94279436  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1386.80134547  661.24847269 1682.35573346  358.5166536 ]
------
Step:2, Action:East
State  183
Old Q Values:  [1386.80134547  661.24847269 1682.35573346  358.5166536 ]
New Q values:  [1386.80134547  661.24847269 2254.57097294  358.5166536 ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.29843412e+03  5.25409560e+03  1.03161518e+03]
------
Step:3, Action:East
State  200
Old Q Values:  [  62.8218634  3916.10233051  550.32637685  568.38654082]
New Q values:  [  62.8218634  3916.10233051 3756.39332882  568.38654082]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ -743.74471425 -5399.47678043 -8896.20691497 11789.54259361]
------
Step:4, Action:North
State  218
Old Q Values:  [ 137.28481888 6862.634248      0.         1863.19740327]
New Q values:  [ 142.68661038 6862.634248      0.         1863.19740327]
Reward: 9  Episode Reward:  26
xxxxx
x .ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.74575609e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.74575609e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.25560631e+02]
Reward: 9  Episode Reward:  35
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   367.76795611]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   367.76795611]
New Q values:  [ -281.736      -1150.91067548   219.00775082   439.21170176]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  975.68173106  174.61129759 -252.78192178]
------
Step:7, Action:South
State  110
Old Q Values:  [-239.29051573  806.74767153  262.1602264  -180.6       ]
New Q values:  [-239.29051573  860.56017632  262.1602264  -180.6       ]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xa  x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  1.79487036e+03  0.00000000e+00]
------
Step:8, Action:East
State  188
Old Q Values:  [-6523.78898263   935.74403408  1470.30928837     0.        ]
New Q values:  [-6523.78898263   935.74403408  1762.3544145      0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  3916.10233051 3756.39332882  568.38654082]
------
Step:9, Action:South
State  200
Old Q Values:  [  62.8218634  3916.10233051 3756.39332882  568.38654082]
New Q values:  [  62.8218634  2754.4644221  3756.39332882  568.38654082]
Reward: 9  Episode Reward:  41
xxxxx
xg  x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3942.07829966 -8521.23367799  2136.25317607  1688.81308644]
------
Step:10, Action:North
State  272
Old Q Values:  [ 3942.07829966 -8521.23367799  2136.25317607  1688.81308644]
New Q values:  [ 2703.14931851 -8521.23367799  2136.25317607  1688.81308644]
Reward: -1  Episode Reward:  40
xxxxx
x g x
x a x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  2754.4644221  3756.39332882  568.38654082]
------
Step:11, Action:East
State  200
Old Q Values:  [  62.8218634  2754.4644221  3756.39332882  568.38654082]
New Q values:  [  62.8218634  2754.4644221  5038.82010961  568.38654082]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ -743.74471425 -5399.47678043 -8896.20691497 11789.54259361]
------
Step:12, Action:North
State  216
Old Q Values:  [ -743.74471425 -5399.47678043 -8896.20691497 11789.54259361]
New Q values:  [ -230.42969652 -5399.47678043 -8896.20691497 11789.54259361]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.25560631e+02]
------
Step:13, Action:West
State  136
Old Q Values:  [ -170.77177351   289.25914056 -2383.80019164   541.14053728]
New Q values:  [ -170.77177351   289.25914056 -2383.80019164   340.47246602]
Reward: -1  Episode Reward:  37
xxxxx
x agx
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:14, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   523.68399323   494.78847153]
New Q values:  [ -253.44886264 -1902.20915811   523.68399323   490.01990793]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  975.68173106  174.61129759 -252.78192178]
------
Step:15, Action:South
State  107
Old Q Values:  [-252.35169558  975.68173106  174.61129759 -252.78192178]
New Q values:  [-252.35169558 2182.59534164  174.61129759 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xag x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 200.47685943    0.         5976.40883071 -178.98      ]
------
Step:16, Action:North
State  185
Old Q Values:  [ 200.47685943    0.         5976.40883071 -178.98      ]
New Q values:  [ 734.36934626    0.         5976.40883071 -178.98      ]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 2182.59534164  174.61129759 -252.78192178]
------
Step:17, Action:South
State  107
Old Q Values:  [-252.35169558 2182.59534164  174.61129759 -252.78192178]
New Q values:  [-252.35169558 1720.81056551  174.61129759 -252.78192178]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         2827.90809619    0.        ]
------
Step:18, Action:East
State  185
Old Q Values:  [ 734.36934626    0.         5976.40883071 -178.98      ]
New Q values:  [ 734.36934626    0.         2849.64163109 -178.98      ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.53226033e+03 -5.74006972e+03  2.00341972e+02]
------
Step:19, Action:South
State  200
Old Q Values:  [  62.8218634  2754.4644221  5038.82010961  568.38654082]
New Q values:  [  62.8218634  1912.13056439 5038.82010961  568.38654082]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2703.14931851 -8521.23367799  2136.25317607  1688.81308644]
------
Step:20, Action:East
State  272
Old Q Values:  [ 2703.14931851 -8521.23367799  2136.25317607  1688.81308644]
New Q values:  [ 2703.14931851 -8521.23367799  1527.00175547  1688.81308644]
Reward: 9  Episode Reward:  40
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  809.38859727 -6442.16912869 -8192.20126966  2223.66828349]
------
Step:21, Action:West
State  288
Old Q Values:  [  809.38859727 -6442.16912869 -8192.20126966  2223.66828349]
New Q values:  [  809.38859727 -6442.16912869 -8192.20126966  1699.81210895]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2703.14931851 -8521.23367799  1527.00175547  1688.81308644]
------
Step:22, Action:North
State  272
Old Q Values:  [ 2703.14931851 -8521.23367799  1527.00175547  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799  1527.00175547  1688.81308644]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1912.13056439 5038.82010961  568.38654082]
------
Step:23, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.29843412e+03  5.25409560e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.29843412e+03  5.43474949e+03  1.03161518e+03]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11112.37084995  1093.22762389   790.72804752  1050.85266124]
------
Step:24, Action:North
State  216
Old Q Values:  [ -230.42969652 -5399.47678043 -8896.20691497 11789.54259361]
New Q values:  [  -25.10368942 -5399.47678043 -8896.20691497 11789.54259361]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.25560631e+02]
------
Step:25, Action:West
State  130
Old Q Values:  [26290.62961917  6087.16496961  -180.00807518 35218.29453991]
New Q values:  [26290.62961917  6087.16496961  -180.00807518 34819.52207554]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 69109.34753192]
------
Step:26, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   439.21170176]
New Q values:  [ -281.736      -1150.91067548   219.00775082   375.36144519]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   667.58921495  -180.6       ]
------
Step:27, Action:East
State  98
Old Q Values:  [    0.         43751.82428687 48660.81170179     0.        ]
New Q values:  [    0.         43751.82428687 40196.52894029     0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 69109.34753192]
------
Step:28, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   375.36144519]
New Q values:  [ -281.736      -1150.91067548   219.00775082   349.82134256]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   667.58921495  -180.6       ]
------
Step:29, Action:East
State  107
Old Q Values:  [-252.35169558 1720.81056551  174.61129759 -252.78192178]
New Q values:  [-252.35169558 1720.81056551  174.1909218  -252.78192178]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   349.82134256]
------
Step:30, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   349.82134256]
New Q values:  [ -281.736      -1150.91067548   219.00775082   339.60530151]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   667.58921495  -180.6       ]
------
Step:31, Action:East
State  104
Old Q Values:  [-8652.84       -1093.68272393   821.87865408 -8652.84      ]
New Q values:  [-8652.84       -1093.68272393   584.16605159 -8652.84      ]
Reward: -1  Episode Reward:  29
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   853.38196654   112.58531102]
------
Step:32, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   853.38196654   112.58531102]
New Q values:  [-9594.56523706 -8069.05606225   442.89452642   112.58531102]
Reward: -1  Episode Reward:  28
xxxxx
xg ax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   289.25914056 -2383.80019164   340.47246602]
------
Step:33, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  2.25560631e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  1.91505843e+02]
Reward: -1  Episode Reward:  27
xxxxx
x a x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   339.60530151]
------
Step:34, Action:West
State  114
Old Q Values:  [ -180.6         3557.6642036  18106.71132503 69109.34753192]
New Q values:  [ -180.6         3557.6642036  18106.71132503 40768.68629883]
Reward: -1  Episode Reward:  26
xxxxx
xa  x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SW
[    0.         43751.82428687 40196.52894029     0.        ]
------
Step:35, Action:South
State  107
Old Q Values:  [-252.35169558 1720.81056551  174.1909218  -252.78192178]
New Q values:  [-252.35169558 1536.09665506  174.1909218  -252.78192178]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xa  x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         2827.90809619    0.        ]
------
Step:36, Action:East
State  185
Old Q Values:  [ 734.36934626    0.         2849.64163109 -178.98      ]
New Q values:  [  734.36934626     0.         -3349.09731468  -178.98      ]
Reward: -10001  Episode Reward:  -9976
xxxxx
x   x
x g x
x.  x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 4182.6894674  2069.36982143 1169.39963074]
------
Step:1, Action:South
State  194
Old Q Values:  [-6.00000000e-01  1.29843412e+03  5.43474949e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -4.69753462e+03  5.43474949e+03  1.03161518e+03]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. .x
x.g x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1193.41855571   445.64685634 21306.09496045   262.76946019]
------
Step:1, Action:East
State  189
Old Q Values:  [  275.08817949  2835.01588984 12490.42566433   154.04646645]
New Q values:  [  275.08817949  2835.01588984 12729.65459775   154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 25760.28110673 -2072.09364767  1460.9765133 ]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831 25760.28110673 -2072.09364767  1460.9765133 ]
New Q values:  [-5922.26708831 29257.16977631 -2072.09364767  1460.9765133 ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  633.77207181  -168.92307549 63158.85777874   677.18474264]
------
Step:3, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799  1527.00175547  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799  1126.14433487  1688.81308644]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  809.38859727 -6442.16912869 -8192.20126966  1699.81210895]
------
Step:4, Action:West
State  288
Old Q Values:  [  809.38859727 -6442.16912869 -8192.20126966  1699.81210895]
New Q values:  [  809.38859727 -6442.16912869 -8192.20126966 19626.9821772 ]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  633.77207181  -168.92307549 63158.85777874   677.18474264]
------
Step:5, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799  1126.14433487  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799  6337.95238711  1688.81308644]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  809.38859727 -6442.16912869 -8192.20126966 19626.9821772 ]
------
Step:6, Action:West
State  288
Old Q Values:  [  809.38859727 -6442.16912869 -8192.20126966 19626.9821772 ]
New Q values:  [  809.38859727 -6442.16912869 -8192.20126966  3751.57858701]
Reward: -10001  Episode Reward:  -9976
xxxxx
x...x
x   x
x.g x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2592.30576029 -8521.23367799  6337.95238711  1688.81308644]
------
Step:1, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799  6337.95238711  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799  3666.05453095  1688.81308644]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  809.38859727 -6442.16912869 -8192.20126966  3751.57858701]
------
Step:2, Action:West
State  288
Old Q Values:  [  809.38859727 -6442.16912869 -8192.20126966  3751.57858701]
New Q values:  [  809.38859727 -6442.16912869 -8192.20126966  2599.84779409]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2592.30576029 -8521.23367799  3666.05453095  1688.81308644]
------
Step:3, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799  3666.05453095  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799  2245.77615061  1688.81308644]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  809.38859727 -6442.16912869 -8192.20126966  2599.84779409]
------
Step:4, Action:West
State  288
Old Q Values:  [  809.38859727 -6442.16912869 -8192.20126966  2599.84779409]
New Q values:  [  809.38859727 -6442.16912869 -8192.20126966  1817.03084572]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
x.g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2592.30576029 -8521.23367799  2245.77615061  1688.81308644]
------
Step:5, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799  2245.77615061  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799  1442.81971396  1688.81308644]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  809.38859727 -6442.16912869 -8192.20126966  1817.03084572]
------
Step:6, Action:North
State  288
Old Q Values:  [  809.38859727 -6442.16912869 -8192.20126966  1817.03084572]
New Q values:  [ 3662.86669389 -6442.16912869 -8192.20126966  1817.03084572]
Reward: 9  Episode Reward:  14
xxxxx
x ..x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11112.37084995  1093.22762389   790.72804752  1050.85266124]
------
Step:7, Action:North
State  208
Old Q Values:  [17174.41681692  2483.29359772 -4584.50430574  1067.63960005]
New Q values:  [ 6932.61847958  2483.29359772 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  23
xxxxx
x .ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  1.91505843e+02]
------
Step:8, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  1.91505843e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  1.83883928e+02]
Reward: 9  Episode Reward:  32
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   339.60530151]
------
Step:9, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   442.89452642   112.58531102]
New Q values:  [-9594.56523706 -8069.05606225   442.89452642   168.87347733]
Reward: -1  Episode Reward:  31
xxxxx
xag x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         414.79784307   86.99637671    0.        ]
------
Step:10, Action:South
State  104
Old Q Values:  [-8652.84       -1093.68272393   584.16605159 -8652.84      ]
New Q values:  [-8652.84        3561.07171622   584.16605159 -8652.84      ]
Reward: 9  Episode Reward:  40
xxxxx
xg  x
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  150.2741814   7546.67422929 13310.48268597 -4966.32149798]
------
Step:11, Action:East
State  184
Old Q Values:  [ 264.76160535    0.         7517.65918625    0.        ]
New Q values:  [ 264.76160535    0.         4524.10970738    0.        ]
Reward: 9  Episode Reward:  49
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1912.13056439 5038.82010961  568.38654082]
------
Step:12, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -4.69753462e+03  5.43474949e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -4.69753462e+03  5.50701105e+03  1.03161518e+03]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11112.37084995  1093.22762389   790.72804752  1050.85266124]
------
Step:13, Action:North
State  218
Old Q Values:  [ 142.68661038 6862.634248      0.         1863.19740327]
New Q values:  [ 111.63982241 6862.634248      0.         1863.19740327]
Reward: -1  Episode Reward:  47
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  1.83883928e+02]
------
Step:14, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  1.83883928e+02]
New Q values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  1.74835161e+02]
Reward: -1  Episode Reward:  46
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   339.60530151]
------
Step:15, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   339.60530151]
New Q values:  [ -281.736      -1150.91067548   219.00775082   596.07111712]
Reward: -1  Episode Reward:  45
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1536.09665506  174.1909218  -252.78192178]
------
Step:16, Action:South
State  98
Old Q Values:  [    0.         43751.82428687 40196.52894029     0.        ]
New Q values:  [    0.         43859.10131705 40196.52894029     0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:17, Action:East
State  184
Old Q Values:  [ 264.76160535    0.         4524.10970738    0.        ]
New Q values:  [ 264.76160535    0.         3320.68991584    0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1912.13056439 5038.82010961  568.38654082]
------
Step:18, Action:East
State  200
Old Q Values:  [  62.8218634  1912.13056439 5038.82010961  568.38654082]
New Q values:  [  62.8218634  1912.13056439 5551.79082193  568.38654082]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  -25.10368942 -5399.47678043 -8896.20691497 11789.54259361]
------
Step:19, Action:West
State  216
Old Q Values:  [  -25.10368942 -5399.47678043 -8896.20691497 11789.54259361]
New Q values:  [  -25.10368942 -5399.47678043 -8896.20691497  6380.75428402]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1912.13056439 5551.79082193  568.38654082]
------
Step:20, Action:East
State  200
Old Q Values:  [  62.8218634  1912.13056439 5551.79082193  568.38654082]
New Q values:  [  62.8218634  1912.13056439 4134.34261398  568.38654082]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  -25.10368942 -5399.47678043 -8896.20691497  6380.75428402]
------
Step:21, Action:North
State  216
Old Q Values:  [  -25.10368942 -5399.47678043 -8896.20691497  6380.75428402]
New Q values:  [   41.80907267 -5399.47678043 -8896.20691497  6380.75428402]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  1.74835161e+02]
------
Step:22, Action:West
State  136
Old Q Values:  [ -170.77177351   289.25914056 -2383.80019164   340.47246602]
New Q values:  [ -170.77177351   289.25914056 -2383.80019164   268.45734434]
Reward: -1  Episode Reward:  38
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   442.89452642   168.87347733]
------
Step:23, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   442.89452642   168.87347733]
New Q values:  [-9594.56523706 -8069.05606225   263.33555274   168.87347733]
Reward: -1  Episode Reward:  37
xxxxx
xg ax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   289.25914056 -2383.80019164   268.45734434]
------
Step:24, Action:South
State  138
Old Q Values:  [ 8.43634063e+00 -5.30850751e+03 -3.22965309e-01  1.74835161e+02]
New Q values:  [   8.43634063 -209.77671831   -0.32296531  174.83516147]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   41.80907267 -5399.47678043 -8896.20691497  6380.75428402]
------
Step:25, Action:West
State  216
Old Q Values:  [   41.80907267 -5399.47678043 -8896.20691497  6380.75428402]
New Q values:  [   41.80907267 -5399.47678043 -8896.20691497 -2207.9955022 ]
Reward: -10001  Episode Reward:  -9965
xxxxx
x   x
x g x
x.  x
xxxxx
xxxxx
x...x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1662.87464405 -5704.51612281  5234.2303441  -5679.36893145]
------
Step:1, Action:East
State  261
Old Q Values:  [6906.36135079   26.73544252  204.92125935  123.6214372 ]
New Q values:  [6906.36135079   26.73544252  343.92251821  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824   484.10856144]
------
Step:2, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799  1442.81971396  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799  1681.38789375  1688.81308644]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x ..x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3662.86669389 -6442.16912869 -8192.20126966  1817.03084572]
------
Step:3, Action:North
State  288
Old Q Values:  [ 3662.86669389 -6442.16912869 -8192.20126966  1817.03084572]
New Q values:  [ 4804.25793254 -6442.16912869 -8192.20126966  1817.03084572]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11112.37084995  1093.22762389   790.72804752  1050.85266124]
------
Step:4, Action:North
State  210
Old Q Values:  [11112.37084995  1093.22762389   790.72804752  1050.85266124]
New Q values:  [14896.20496264  1093.22762389   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  6087.16496961  -180.00807518 34819.52207554]
------
Step:5, Action:West
State  130
Old Q Values:  [26290.62961917  6087.16496961  -180.00807518 34819.52207554]
New Q values:  [26290.62961917  6087.16496961  -180.00807518 26163.81471987]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 40768.68629883]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   596.07111712]
New Q values:  [ -281.736      -1150.91067548   219.00775082   534.92794133]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  970.33164828  520.46511977 -120.29354603]
------
Step:7, Action:South
State  110
Old Q Values:  [-239.29051573  860.56017632  262.1602264  -180.6       ]
New Q values:  [-239.29051573 1891.63358472  262.1602264  -180.6       ]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -7507.54632711  5160.03171396     0.        ]
------
Step:8, Action:East
State  176
Old Q Values:  [ 76485.61294353   1621.55095326 103366.80613407      0.        ]
New Q values:  [ 76485.61294353   1621.55095326 110360.20137974      0.        ]
Reward: 100009  Episode Reward:  100062
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[14896.20496264  1093.22762389   790.72804752  1050.85266124]
------
Step:1, Action:North
State  208
Old Q Values:  [ 6932.61847958  2483.29359772 -4584.50430574  1067.63960005]
New Q values:  [ 2830.89794027  2483.29359772 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[   8.43634063 -209.77671831   -0.32296531  174.83516147]
------
Step:2, Action:West
State  138
Old Q Values:  [   8.43634063 -209.77671831   -0.32296531  174.83516147]
New Q values:  [   8.43634063 -209.77671831   -0.32296531  235.81244699]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   534.92794133]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   534.92794133]
New Q values:  [ -281.736      -1150.91067548   219.00775082   786.86125195]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1891.63358472  262.1602264  -180.6       ]
------
Step:4, Action:East
State  110
Old Q Values:  [-239.29051573 1891.63358472  262.1602264  -180.6       ]
New Q values:  [-239.29051573 1891.63358472  340.32246615 -180.6       ]
Reward: -1  Episode Reward:  26
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   786.86125195]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   786.86125195]
New Q values:  [ -281.736      -1150.91067548   219.00775082   605.24399526]
Reward: -1  Episode Reward:  25
xxxxx
xa  x
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  970.33164828  520.46511977 -120.29354603]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869  970.33164828  520.46511977 -120.29354603]
New Q values:  [-177.44732869 4212.42903864  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  34
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  275.08817949  2835.01588984 12729.65459775   154.04646645]
------
Step:7, Action:South
State  183
Old Q Values:  [1386.80134547  661.24847269 2254.57097294  358.5166536 ]
New Q values:  [1386.80134547 2335.80779431 2254.57097294  358.5166536 ]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x . x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6906.36135079   26.73544252  343.92251821  123.6214372 ]
------
Step:8, Action:North
State  261
Old Q Values:  [6906.36135079   26.73544252  343.92251821  123.6214372 ]
New Q values:  [6580.84091964   26.73544252  343.92251821  123.6214372 ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  275.08817949  2835.01588984 12729.65459775   154.04646645]
------
Step:9, Action:South
State  189
Old Q Values:  [  275.08817949  2835.01588984 12729.65459775   154.04646645]
New Q values:  [  275.08817949  3107.65863183 12729.65459775   154.04646645]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6580.84091964   26.73544252  343.92251821  123.6214372 ]
------
Step:10, Action:North
State  261
Old Q Values:  [6580.84091964   26.73544252  343.92251821  123.6214372 ]
New Q values:  [6450.63274718   26.73544252  343.92251821  123.6214372 ]
Reward: -1  Episode Reward:  30
xxxxx
x  gx
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  275.08817949  3107.65863183 12729.65459775   154.04646645]
------
Step:11, Action:East
State  189
Old Q Values:  [  275.08817949  3107.65863183 12729.65459775   154.04646645]
New Q values:  [ 275.08817949 3107.65863183 5556.9399379   154.04646645]
Reward: 9  Episode Reward:  39
xxxxx
x   x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.53226033e+03 -5.74006972e+03  2.00341972e+02]
------
Step:12, Action:South
State  204
Old Q Values:  [   0.         1387.82535955 1076.13926719  441.58769553]
New Q values:  [   0.          817.08415829 1076.13926719  441.58769553]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824   484.10856144]
------
Step:13, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799  1681.38789375  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799 62119.23253726  1688.81308644]
Reward: 100009  Episode Reward:  100057
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.45197447e+03 3.00269298e+04 1.27673579e+04]
------
Step:1, Action:East
State  200
Old Q Values:  [  62.8218634  1912.13056439 4134.34261398  568.38654082]
New Q values:  [  62.8218634  1912.13056439 1671.67976739  568.38654082]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   41.80907267 -5399.47678043 -8896.20691497 -2207.9955022 ]
------
Step:2, Action:North
State  216
Old Q Values:  [   41.80907267 -5399.47678043 -8896.20691497 -2207.9955022 ]
New Q values:  [   92.86736316 -5399.47678043 -8896.20691497 -2207.9955022 ]
Reward: 9  Episode Reward:  18
xxxxx
x. ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[   8.43634063 -209.77671831   -0.32296531  235.81244699]
------
Step:3, Action:West
State  138
Old Q Values:  [   8.43634063 -209.77671831   -0.32296531  235.81244699]
New Q values:  [   8.43634063 -209.77671831   -0.32296531  275.29817737]
Reward: -1  Episode Reward:  17
xxxxx
x.a x
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   605.24399526]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   605.24399526]
New Q values:  [ -281.736      -1150.91067548   219.00775082   814.98767352]
Reward: 9  Episode Reward:  26
xxxxx
xa  x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1891.63358472  340.32246615 -180.6       ]
------
Step:5, Action:East
State  107
Old Q Values:  [-252.35169558 1536.09665506  174.1909218  -252.78192178]
New Q values:  [-252.35169558 1536.09665506  313.57267078 -252.78192178]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   814.98767352]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   814.98767352]
New Q values:  [ -281.736      -1150.91067548   219.00775082   786.22406593]
Reward: -1  Episode Reward:  24
xxxxx
xa  x
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1536.09665506  313.57267078 -252.78192178]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869 4212.42903864  520.46511977 -120.29354603]
New Q values:  [-177.44732869 3357.45359683  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  33
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 3107.65863183 5556.9399379   154.04646645]
------
Step:8, Action:South
State  189
Old Q Values:  [ 275.08817949 3107.65863183 5556.9399379   154.04646645]
New Q values:  [ 275.08817949 3183.65327689 5556.9399379   154.04646645]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6450.63274718   26.73544252  343.92251821  123.6214372 ]
------
Step:9, Action:North
State  261
Old Q Values:  [6450.63274718   26.73544252  343.92251821  123.6214372 ]
New Q values:  [4246.73508024   26.73544252  343.92251821  123.6214372 ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 3183.65327689 5556.9399379   154.04646645]
------
Step:10, Action:South
State  188
Old Q Values:  [-6523.78898263   935.74403408  1762.3544145      0.        ]
New Q values:  [-6523.78898263  1943.96671686  1762.3544145      0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xg  x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1662.87464405 -5704.51612281  5234.2303441  -5679.36893145]
------
Step:11, Action:East
State  261
Old Q Values:  [4246.73508024   26.73544252  343.92251821  123.6214372 ]
New Q values:  [4246.73508024   26.73544252  399.52302176  123.6214372 ]
Reward: 9  Episode Reward:  49
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824   484.10856144]
------
Step:12, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799 62119.23253726  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799 86294.37039467  1688.81308644]
Reward: 100009  Episode Reward:  100058
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1891.63358472  340.32246615 -180.6       ]
------
Step:1, Action:East
State  108
Old Q Values:  [-8463.16477134  3762.38420191   650.55887833     0.        ]
New Q values:  [-8463.16477134  3762.38420191   344.62421715     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xga.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   263.33555274   168.87347733]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   263.33555274   168.87347733]
New Q values:  [-9594.56523706 -8069.05606225   197.51196326   168.87347733]
Reward: 9  Episode Reward:  18
xxxxx
x gax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   289.25914056 -2383.80019164   268.45734434]
------
Step:3, Action:South
State  136
Old Q Values:  [ -170.77177351   289.25914056 -2383.80019164   268.45734434]
New Q values:  [ -170.77177351   148.96386517 -2383.80019164   268.45734434]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[   92.86736316 -5399.47678043 -8896.20691497 -2207.9955022 ]
------
Step:4, Action:North
State  216
Old Q Values:  [   92.86736316 -5399.47678043 -8896.20691497 -2207.9955022 ]
New Q values:  [  117.08414857 -5399.47678043 -8896.20691497 -2207.9955022 ]
Reward: -1  Episode Reward:  26
xxxxx
x gax
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   148.96386517 -2383.80019164   268.45734434]
------
Step:5, Action:South
State  138
Old Q Values:  [   8.43634063 -209.77671831   -0.32296531  275.29817737]
New Q values:  [  8.43634063 -49.38544275  -0.32296531 275.29817737]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  117.08414857 -5399.47678043 -8896.20691497 -2207.9955022 ]
------
Step:6, Action:North
State  216
Old Q Values:  [  117.08414857 -5399.47678043 -8896.20691497 -2207.9955022 ]
New Q values:  [  128.82311264 -5399.47678043 -8896.20691497 -2207.9955022 ]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 -49.38544275  -0.32296531 275.29817737]
------
Step:7, Action:West
State  138
Old Q Values:  [  8.43634063 -49.38544275  -0.32296531 275.29817737]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  3.45386491e+02]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   219.00775082   786.22406593]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   786.22406593]
New Q values:  [ -281.736      -1150.91067548   219.00775082   881.37970179]
Reward: -1  Episode Reward:  22
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1891.63358472  340.32246615 -180.6       ]
------
Step:9, Action:East
State  108
Old Q Values:  [-8463.16477134  3762.38420191   344.62421715     0.        ]
New Q values:  [-8463.16477134  3762.38420191   196.50327584     0.        ]
Reward: -1  Episode Reward:  21
xxxxx
xga x
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   197.51196326   168.87347733]
------
Step:10, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   197.51196326   168.87347733]
New Q values:  [-9594.56523706 -8069.05606225   158.94198861   168.87347733]
Reward: -1  Episode Reward:  20
xxxxx
x gax
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   148.96386517 -2383.80019164   268.45734434]
------
Step:11, Action:South
State  136
Old Q Values:  [ -170.77177351   148.96386517 -2383.80019164   268.45734434]
New Q values:  [ -170.77177351    97.63247986 -2383.80019164   268.45734434]
Reward: -1  Episode Reward:  19
xxxxx
x  gx
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  128.82311264 -5399.47678043 -8896.20691497 -2207.9955022 ]
------
Step:12, Action:West
State  216
Old Q Values:  [  128.82311264 -5399.47678043 -8896.20691497 -2207.9955022 ]
New Q values:  [  128.82311264 -5399.47678043 -8896.20691497  -304.15903156]
Reward: 9  Episode Reward:  28
xxxxx
x g x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1912.13056439 1671.67976739  568.38654082]
------
Step:13, Action:South
State  200
Old Q Values:  [  62.8218634  1912.13056439 1671.67976739  568.38654082]
New Q values:  [   62.8218634  26658.56334416  1671.67976739   568.38654082]
Reward: 9  Episode Reward:  37
xxxxx
xg  x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2592.30576029 -8521.23367799 86294.37039467  1688.81308644]
------
Step:14, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799 86294.37039467  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799 35964.42553763  1688.81308644]
Reward: 9  Episode Reward:  46
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4804.25793254 -6442.16912869 -8192.20126966  1817.03084572]
------
Step:15, Action:North
State  288
Old Q Values:  [ 4804.25793254 -6442.16912869 -8192.20126966  1817.03084572]
New Q values:  [ 1959.75010681 -6442.16912869 -8192.20126966  1817.03084572]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  128.82311264 -5399.47678043 -8896.20691497  -304.15903156]
------
Step:16, Action:North
State  216
Old Q Values:  [  128.82311264 -5399.47678043 -8896.20691497  -304.15903156]
New Q values:  [  154.54519227 -5399.47678043 -8896.20691497  -304.15903156]
Reward: -1  Episode Reward:  44
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  3.45386491e+02]
------
Step:17, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  3.45386491e+02]
New Q values:  [  8.43634063 -49.38544275  -0.32296531 294.65979426]
Reward: -1  Episode Reward:  43
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   523.68399323   490.01990793]
------
Step:18, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   219.00775082   881.37970179]
New Q values:  [ -281.736      -1150.91067548   175.40103861   881.37970179]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[  8.43634063 -49.38544275  -0.32296531 294.65979426]
------
Step:19, Action:West
State  138
Old Q Values:  [  8.43634063 -49.38544275  -0.32296531 294.65979426]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  3.81677828e+02]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   175.40103861   881.37970179]
------
Step:20, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   175.40103861   881.37970179]
New Q values:  [ -281.736      -1150.91067548   175.40103861   552.2286452 ]
Reward: -1  Episode Reward:  40
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   667.58921495  -180.6       ]
------
Step:21, Action:East
State  98
Old Q Values:  [    0.         43859.10131705 40196.52894029     0.        ]
New Q values:  [    0.         43859.10131705 28308.61746576     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 40768.68629883]
------
Step:22, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   175.40103861   552.2286452 ]
New Q values:  [ -281.736      -1150.91067548   175.40103861   681.1204546 ]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1536.09665506  313.57267078 -252.78192178]
------
Step:23, Action:South
State  98
Old Q Values:  [    0.         43859.10131705 28308.61746576     0.        ]
New Q values:  [    0.         43902.01212913 28308.61746576     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:24, Action:East
State  184
Old Q Values:  [ 264.76160535    0.         3320.68991584    0.        ]
New Q values:  [ 264.76160535    0.         9325.24496958    0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  26658.56334416  1671.67976739   568.38654082]
------
Step:25, Action:South
State  200
Old Q Values:  [   62.8218634  26658.56334416  1671.67976739   568.38654082]
New Q values:  [   62.8218634  21452.15299895  1671.67976739   568.38654082]
Reward: -1  Episode Reward:  35
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2592.30576029 -8521.23367799 35964.42553763  1688.81308644]
------
Step:26, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799 35964.42553763  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799 14973.09524709  1688.81308644]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1959.75010681 -6442.16912869 -8192.20126966  1817.03084572]
------
Step:27, Action:North
State  288
Old Q Values:  [ 1959.75010681 -6442.16912869 -8192.20126966  1817.03084572]
New Q values:  [  829.66360041 -6442.16912869 -8192.20126966  1817.03084572]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  154.54519227 -5399.47678043 -8896.20691497  -304.15903156]
------
Step:28, Action:North
State  216
Old Q Values:  [  154.54519227 -5399.47678043 -8896.20691497  -304.15903156]
New Q values:  [  141.75528021 -5399.47678043 -8896.20691497  -304.15903156]
Reward: -1  Episode Reward:  32
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351    97.63247986 -2383.80019164   268.45734434]
------
Step:29, Action:South
State  136
Old Q Values:  [ -170.77177351    97.63247986 -2383.80019164   268.45734434]
New Q values:  [ -170.77177351    80.97957601 -2383.80019164   268.45734434]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  141.75528021 -5399.47678043 -8896.20691497  -304.15903156]
------
Step:30, Action:West
State  216
Old Q Values:  [  141.75528021 -5399.47678043 -8896.20691497  -304.15903156]
New Q values:  [  141.75528021 -5399.47678043 -8896.20691497   337.41448618]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.53226033e+03 -5.74006972e+03  2.00341972e+02]
------
Step:31, Action:South
State  200
Old Q Values:  [   62.8218634  21452.15299895  1671.67976739   568.38654082]
New Q values:  [   62.8218634  13072.18977371  1671.67976739   568.38654082]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2592.30576029 -8521.23367799 14973.09524709  1688.81308644]
------
Step:32, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799 14973.09524709  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799  6533.74735255  1688.81308644]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  829.66360041 -6442.16912869 -8192.20126966  1817.03084572]
------
Step:33, Action:West
State  288
Old Q Values:  [  829.66360041 -6442.16912869 -8192.20126966  1817.03084572]
New Q values:  [  829.66360041 -6442.16912869 -8192.20126966  2686.33654406]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2592.30576029 -8521.23367799  6533.74735255  1688.81308644]
------
Step:34, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799  6533.74735255  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799  3418.79990424  1688.81308644]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  829.66360041 -6442.16912869 -8192.20126966  2686.33654406]
------
Step:35, Action:North
State  288
Old Q Values:  [  829.66360041 -6442.16912869 -8192.20126966  2686.33654406]
New Q values:  [ 2390.05571456 -6442.16912869 -8192.20126966  2686.33654406]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 111.63982241 6862.634248      0.         1863.19740327]
------
Step:36, Action:West
State  218
Old Q Values:  [ 111.63982241 6862.634248      0.         1863.19740327]
New Q values:  [ 111.63982241 6862.634248      0.         1847.21017375]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  3675.10404147     0.        ]
------
Step:37, Action:East
State  200
Old Q Values:  [   62.8218634  13072.18977371  1671.67976739   568.38654082]
New Q values:  [   62.8218634  13072.18977371   769.29625281   568.38654082]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  141.75528021 -5399.47678043 -8896.20691497   337.41448618]
------
Step:38, Action:North
State  216
Old Q Values:  [  141.75528021 -5399.47678043 -8896.20691497   337.41448618]
New Q values:  [  136.63931538 -5399.47678043 -8896.20691497   337.41448618]
Reward: -1  Episode Reward:  22
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351    80.97957601 -2383.80019164   268.45734434]
------
Step:39, Action:South
State  136
Old Q Values:  [ -170.77177351    80.97957601 -2383.80019164   268.45734434]
New Q values:  [ -170.77177351   133.01617626 -2383.80019164   268.45734434]
Reward: -1  Episode Reward:  21
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  136.63931538 -5399.47678043 -8896.20691497   337.41448618]
------
Step:40, Action:West
State  216
Old Q Values:  [  136.63931538 -5399.47678043 -8896.20691497   337.41448618]
New Q values:  [  136.63931538 -5399.47678043 -8896.20691497  4056.02272658]
Reward: -1  Episode Reward:  20
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  13072.18977371   769.29625281   568.38654082]
------
Step:41, Action:South
State  200
Old Q Values:  [   62.8218634  13072.18977371   769.29625281   568.38654082]
New Q values:  [  62.8218634  6253.91588076  769.29625281  568.38654082]
Reward: -1  Episode Reward:  19
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2592.30576029 -8521.23367799  3418.79990424  1688.81308644]
------
Step:42, Action:East
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799  3418.79990424  1688.81308644]
New Q values:  [ 2592.30576029 -8521.23367799  2172.82092491  1688.81308644]
Reward: -1  Episode Reward:  18
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2390.05571456 -6442.16912869 -8192.20126966  2686.33654406]
------
Step:43, Action:West
State  288
Old Q Values:  [ 2390.05571456 -6442.16912869 -8192.20126966  2686.33654406]
New Q values:  [ 2390.05571456 -6442.16912869 -8192.20126966  1851.62634571]
Reward: -1  Episode Reward:  17
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2592.30576029 -8521.23367799  2172.82092491  1688.81308644]
------
Step:44, Action:North
State  272
Old Q Values:  [ 2592.30576029 -8521.23367799  2172.82092491  1688.81308644]
New Q values:  [ 2138.85351655 -8521.23367799  2172.82092491  1688.81308644]
Reward: -1  Episode Reward:  16
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  3675.10404147     0.        ]
------
Step:45, Action:East
State  200
Old Q Values:  [  62.8218634  6253.91588076  769.29625281  568.38654082]
New Q values:  [  62.8218634  6253.91588076 1523.9253191   568.38654082]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  136.63931538 -5399.47678043 -8896.20691497  4056.02272658]
------
Step:46, Action:North
State  218
Old Q Values:  [ 111.63982241 6862.634248      0.         1847.21017375]
New Q values:  [ 158.55927744 6862.634248      0.         1847.21017375]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  3.81677828e+02]
------
Step:47, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  3.81677828e+02]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  3.56407268e+02]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   175.40103861   681.1204546 ]
------
Step:48, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   158.94198861   168.87347733]
New Q values:  [-9594.56523706 -8069.05606225   158.94198861   191.38874385]
Reward: -1  Episode Reward:  12
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         414.79784307   86.99637671    0.        ]
------
Step:49, Action:South
State  104
Old Q Values:  [-8652.84        3561.07171622   584.16605159 -8652.84      ]
New Q values:  [-8652.84        4221.40217736   584.16605159 -8652.84      ]
Reward: -1  Episode Reward:  11
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[ 264.76160535    0.         9325.24496958    0.        ]
------
Step:50, Action:East
State  184
Old Q Values:  [ 264.76160535    0.         9325.24496958    0.        ]
New Q values:  [ 264.76160535    0.         5605.67275206    0.        ]
Reward: -1  Episode Reward:  10
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  6253.91588076 1523.9253191   568.38654082]
------
Step:51, Action:South
State  200
Old Q Values:  [  62.8218634  6253.91588076 1523.9253191   568.38654082]
New Q values:  [  62.8218634  3152.81262978 1523.9253191   568.38654082]
Reward: -1  Episode Reward:  9
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2138.85351655 -8521.23367799  2172.82092491  1688.81308644]
------
Step:52, Action:East
State  272
Old Q Values:  [ 2138.85351655 -8521.23367799  2172.82092491  1688.81308644]
New Q values:  [ 2138.85351655 -8521.23367799  1585.54508433  1688.81308644]
Reward: -1  Episode Reward:  8
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2390.05571456 -6442.16912869 -8192.20126966  1851.62634571]
------
Step:53, Action:North
State  288
Old Q Values:  [ 2390.05571456 -6442.16912869 -8192.20126966  1851.62634571]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966  1851.62634571]
Reward: -1  Episode Reward:  7
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  136.63931538 -5399.47678043 -8896.20691497  4056.02272658]
------
Step:54, Action:West
State  216
Old Q Values:  [  136.63931538 -5399.47678043 -8896.20691497  4056.02272658]
New Q values:  [  136.63931538 -5399.47678043 -8896.20691497  2567.65287957]
Reward: -1  Episode Reward:  6
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  3152.81262978 1523.9253191   568.38654082]
------
Step:55, Action:South
State  200
Old Q Values:  [  62.8218634  3152.81262978 1523.9253191   568.38654082]
New Q values:  [  62.8218634  1902.18110688 1523.9253191   568.38654082]
Reward: -1  Episode Reward:  5
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2138.85351655 -8521.23367799  1585.54508433  1688.81308644]
------
Step:56, Action:West
State  272
Old Q Values:  [ 2138.85351655 -8521.23367799  1585.54508433  1688.81308644]
New Q values:  [ 2138.85351655 -8521.23367799  1585.54508433 76807.49132383]
Reward: 100009  Episode Reward:  100014
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 4182.6894674  2069.36982143 1169.39963074]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831 29257.16977631 -2072.09364767  1460.9765133 ]
New Q values:  [-5922.26708831 30655.92524415 -2072.09364767  1460.9765133 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  633.77207181  -168.92307549 63158.85777874   677.18474264]
------
Step:2, Action:East
State  273
Old Q Values:  [  633.77207181  -168.92307549 63158.85777874   677.18474264]
New Q values:  [  633.77207181  -168.92307549 19914.61184264   677.18474264]
Reward: -10001  Episode Reward:  -9992
xxxxx
x...x
x. .x
x. gx
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1386.80134547 2335.80779431 2254.57097294  358.5166536 ]
------
Step:1, Action:South
State  182
Old Q Values:  [    0.         -7507.54632711  5160.03171396     0.        ]
New Q values:  [    0.         -7427.34942761  5160.03171396     0.        ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x ..x
xg .x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1193.41855571   445.64685634 21306.09496045   262.76946019]
------
Step:1, Action:East
State  189
Old Q Values:  [ 275.08817949 3183.65327689 5556.9399379   154.04646645]
New Q values:  [  275.08817949  3183.65327689 11424.95354841   154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 30655.92524415 -2072.09364767  1460.9765133 ]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831 30655.92524415 -2072.09364767  1460.9765133 ]
New Q values:  [-5922.26708831 18242.15365045 -2072.09364767  1460.9765133 ]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  633.77207181  -168.92307549 19914.61184264   677.18474264]
------
Step:3, Action:East
State  273
Old Q Values:  [  633.77207181  -168.92307549 19914.61184264   677.18474264]
New Q values:  [ 633.77207181 -168.92307549 8622.91346819  677.18474264]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966  1851.62634571]
------
Step:4, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966  1851.62634571]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966  3326.92457874]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 8622.91346819  677.18474264]
------
Step:5, Action:East
State  272
Old Q Values:  [ 2138.85351655 -8521.23367799  1585.54508433 76807.49132383]
New Q values:  [ 2138.85351655 -8521.23367799  1631.69540736 76807.49132383]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966  3326.92457874]
------
Step:6, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966  3326.92457874]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966  3917.04387195]
Reward: -1  Episode Reward:  24
xxxxx
x..gx
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 8622.91346819  677.18474264]
------
Step:7, Action:East
State  273
Old Q Values:  [ 633.77207181 -168.92307549 8622.91346819  677.18474264]
New Q values:  [ 633.77207181 -168.92307549 4623.67854886  677.18474264]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966  3917.04387195]
------
Step:8, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966  3917.04387195]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966  2953.32111344]
Reward: -1  Episode Reward:  22
xxxxx
x..gx
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 4623.67854886  677.18474264]
------
Step:9, Action:East
State  272
Old Q Values:  [ 2138.85351655 -8521.23367799  1631.69540736 76807.49132383]
New Q values:  [ 2138.85351655 -8521.23367799  1538.07449697 76807.49132383]
Reward: -1  Episode Reward:  21
xxxxx
x.g x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966  2953.32111344]
------
Step:10, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966  2953.32111344]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966 24222.97584252]
Reward: -1  Episode Reward:  20
xxxxx
xg. x
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2138.85351655 -8521.23367799  1538.07449697 76807.49132383]
------
Step:11, Action:West
State  272
Old Q Values:  [ 2138.85351655 -8521.23367799  1538.07449697 76807.49132383]
New Q values:  [ 2138.85351655 -8521.23367799  1538.07449697 32298.66563276]
Reward: 9  Episode Reward:  29
xxxxx
x.. x
xg .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1662.87464405 -5704.51612281  5234.2303441  -5679.36893145]
------
Step:12, Action:East
State  260
Old Q Values:  [-1662.87464405 -5704.51612281  5234.2303441  -5679.36893145]
New Q values:  [-1662.87464405 -5704.51612281 11782.69182747 -5679.36893145]
Reward: -1  Episode Reward:  28
xxxxx
x.. x
x  .x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2138.85351655 -8521.23367799  1538.07449697 32298.66563276]
------
Step:13, Action:North
State  272
Old Q Values:  [ 2138.85351655 -8521.23367799  1538.07449697 32298.66563276]
New Q values:  [ 2507.04472244 -8521.23367799  1538.07449697 32298.66563276]
Reward: -1  Episode Reward:  27
xxxxx
x.. x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -4.69753462e+03  5.50701105e+03  1.03161518e+03]
------
Step:14, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -4.69753462e+03  5.50701105e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -4.69753462e+03  6.67706591e+03  1.03161518e+03]
Reward: 9  Episode Reward:  36
xxxxx
x.. x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[14896.20496264  1093.22762389   790.72804752  1050.85266124]
------
Step:15, Action:North
State  210
Old Q Values:  [14896.20496264  1093.22762389   790.72804752  1050.85266124]
New Q values:  [13845.07087081  1093.22762389   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  35
xxxxx
x..ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26290.62961917  6087.16496961  -180.00807518 26163.81471987]
------
Step:16, Action:North
State  130
Old Q Values:  [26290.62961917  6087.16496961  -180.00807518 26163.81471987]
New Q values:  [18222.84073342  6087.16496961  -180.00807518 26163.81471987]
Reward: -301  Episode Reward:  -266
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  6087.16496961  -180.00807518 26163.81471987]
------
Step:17, Action:West
State  130
Old Q Values:  [18222.84073342  6087.16496961  -180.00807518 26163.81471987]
New Q values:  [18222.84073342  6087.16496961  -180.00807518 22701.53177759]
Reward: 9  Episode Reward:  -257
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 40768.68629883]
------
Step:18, Action:West
State  114
Old Q Values:  [ -180.6         3557.6642036  18106.71132503 40768.68629883]
New Q values:  [ -180.6         3557.6642036  18106.71132503 89483.47815827]
Reward: 100009  Episode Reward:  99752
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3357.45359683  520.46511977 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 3790.94279436  238.35800069    0.        ]
New Q values:  [ 221.30610858 7913.60560588  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1193.41855571   445.64685634 21306.09496045   262.76946019]
------
Step:2, Action:East
State  181
Old Q Values:  [ 1193.41855571   445.64685634 21306.09496045   262.76946019]
New Q values:  [1193.41855571  445.64685634 8987.51608298  262.76946019]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x a.x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.53226033e+03 -5.74006972e+03  2.00341972e+02]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144   634.55080188 22698.31290191   492.34934406]
New Q values:  [-2469.90645144   515.77433523 22698.31290191   492.34934406]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[   16.82637525 -5807.06396197   855.18004824   484.10856144]
------
Step:4, Action:East
State  273
Old Q Values:  [ 633.77207181 -168.92307549 4623.67854886  677.18474264]
New Q values:  [ 633.77207181 -168.92307549 9115.7641723   677.18474264]
Reward: -1  Episode Reward:  26
xxxxx
x .gx
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966 24222.97584252]
------
Step:5, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966 24222.97584252]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966 12423.3195887 ]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 9115.7641723   677.18474264]
------
Step:6, Action:East
State  273
Old Q Values:  [ 633.77207181 -168.92307549 9115.7641723   677.18474264]
New Q values:  [ 633.77207181 -168.92307549 7372.70154553  677.18474264]
Reward: -1  Episode Reward:  24
xxxxx
x .gx
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966 12423.3195887 ]
------
Step:7, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966 12423.3195887 ]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966  7180.53829914]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 7372.70154553  677.18474264]
------
Step:8, Action:East
State  273
Old Q Values:  [ 633.77207181 -168.92307549 7372.70154553  677.18474264]
New Q values:  [ 633.77207181 -168.92307549 5102.64210795  677.18474264]
Reward: -1  Episode Reward:  22
xxxxx
x .gx
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966  7180.53829914]
------
Step:9, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966  7180.53829914]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966  4402.40795204]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 633.77207181 -168.92307549 5102.64210795  677.18474264]
------
Step:10, Action:East
State  273
Old Q Values:  [ 633.77207181 -168.92307549 5102.64210795  677.18474264]
New Q values:  [  633.77207181  -168.92307549 -2638.82077121   677.18474264]
Reward: -10001  Episode Reward:  -9980
xxxxx
x ..x
x  .x
x. gx
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966  4402.40795204]
------
Step:1, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966  4402.40795204]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966 11455.96287065]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799  1538.07449697 32298.66563276]
------
Step:2, Action:West
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799  1538.07449697 32298.66563276]
New Q values:  [ 2507.04472244 -8521.23367799  1538.07449697 16459.67380135]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1662.87464405 -5704.51612281 11782.69182747 -5679.36893145]
------
Step:3, Action:East
State  260
Old Q Values:  [-1662.87464405 -5704.51612281 11782.69182747 -5679.36893145]
New Q values:  [-1662.87464405 -5704.51612281  9650.37887139 -5679.36893145]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799  1538.07449697 16459.67380135]
------
Step:4, Action:West
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799  1538.07449697 16459.67380135]
New Q values:  [ 2507.04472244 -8521.23367799  1538.07449697  7857.29004461]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4246.73508024   26.73544252  399.52302176  123.6214372 ]
------
Step:5, Action:North
State  261
Old Q Values:  [4246.73508024   26.73544252  399.52302176  123.6214372 ]
New Q values:  [2404.83637039   26.73544252  399.52302176  123.6214372 ]
Reward: 9  Episode Reward:  25
xxxxx
x ..x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1386.80134547 2335.80779431 2254.57097294  358.5166536 ]
------
Step:6, Action:South
State  182
Old Q Values:  [    0.         -7427.34942761  5160.03171396     0.        ]
New Q values:  [    0.         -6076.42610963  5160.03171396     0.        ]
Reward: -10001  Episode Reward:  -9976
xxxxx
x ..x
x ..x
xg  x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966 11455.96287065]
------
Step:1, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966 11455.96287065]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966  6944.97216164]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799  1538.07449697  7857.29004461]
------
Step:2, Action:West
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799  1538.07449697  7857.29004461]
New Q values:  [ 2507.04472244 -8521.23367799  1538.07449697  6043.42967926]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1662.87464405 -5704.51612281  9650.37887139 -5679.36893145]
------
Step:3, Action:East
State  261
Old Q Values:  [2404.83637039   26.73544252  399.52302176  123.6214372 ]
New Q values:  [2404.83637039   26.73544252 1972.23811248  123.6214372 ]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799  1538.07449697  6043.42967926]
------
Step:4, Action:West
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799  1538.07449697  6043.42967926]
New Q values:  [ 2507.04472244 -8521.23367799  1538.07449697  5311.88553312]
Reward: -1  Episode Reward:  16
xxxxx
xg..x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1662.87464405 -5704.51612281  9650.37887139 -5679.36893145]
------
Step:5, Action:East
State  261
Old Q Values:  [2404.83637039   26.73544252 1972.23811248  123.6214372 ]
New Q values:  [2404.83637039   26.73544252 2381.86090493  123.6214372 ]
Reward: -1  Episode Reward:  15
xxxxx
x g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799  1538.07449697  5311.88553312]
------
Step:6, Action:West
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799  1538.07449697  5311.88553312]
New Q values:  [ 2507.04472244 -8521.23367799  1538.07449697  2845.60512437]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2404.83637039   26.73544252 2381.86090493  123.6214372 ]
------
Step:7, Action:North
State  261
Old Q Values:  [2404.83637039   26.73544252 2381.86090493  123.6214372 ]
New Q values:  [1668.07688645   26.73544252 2381.86090493  123.6214372 ]
Reward: 9  Episode Reward:  23
xxxxx
x ..x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1386.80134547 2335.80779431 2254.57097294  358.5166536 ]
------
Step:8, Action:South
State  182
Old Q Values:  [    0.         -6076.42610963  5160.03171396     0.        ]
New Q values:  [    0.         -5536.05678243  5160.03171396     0.        ]
Reward: -10001  Episode Reward:  -9978
xxxxx
x ..x
x ..x
xg  x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799  1538.07449697  2845.60512437]
------
Step:1, Action:North
State  276
Old Q Values:  [   16.82637525 -5807.06396197   855.18004824   484.10856144]
New Q values:  [ 9020.20947621 -5807.06396197   855.18004824   484.10856144]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.45197447e+03 3.00269298e+04 1.27673579e+04]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.45197447e+03 3.00269298e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 1.45197447e+03 1.28654413e+04 1.27673579e+04]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2830.89794027  2483.29359772 -4584.50430574  1067.63960005]
------
Step:3, Action:North
State  208
Old Q Values:  [ 2830.89794027  2483.29359772 -4584.50430574  1067.63960005]
New Q values:  [ 7948.21870939  2483.29359772 -4584.50430574  1067.63960005]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x. gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  6087.16496961  -180.00807518 22701.53177759]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  3.56407268e+02]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  2.69930064e+04]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 89483.47815827]
------
Step:5, Action:West
State  126
Old Q Values:  [  0.         331.64678262 182.86408515 446.94147062]
New Q values:  [  0.         331.64678262 182.86408515 751.66666366]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1891.63358472  340.32246615 -180.6       ]
------
Step:6, Action:East
State  106
Old Q Values:  [ -180.6        -7710.46911494   667.58921495  -180.6       ]
New Q values:  [ -180.6        -7710.46911494   470.77182236  -180.6       ]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   175.40103861   681.1204546 ]
------
Step:7, Action:West
State  126
Old Q Values:  [  0.         331.64678262 182.86408515 751.66666366]
New Q values:  [  0.         331.64678262 182.86408515 867.55674088]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1891.63358472  340.32246615 -180.6       ]
------
Step:8, Action:East
State  106
Old Q Values:  [ -180.6        -7710.46911494   470.77182236  -180.6       ]
New Q values:  [ -180.6        -7710.46911494   392.04486532  -180.6       ]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   175.40103861   681.1204546 ]
------
Step:9, Action:West
State  126
Old Q Values:  [  0.         331.64678262 182.86408515 867.55674088]
New Q values:  [  0.         331.64678262 182.86408515 913.91277177]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1891.63358472  340.32246615 -180.6       ]
------
Step:10, Action:East
State  107
Old Q Values:  [-252.35169558 1536.09665506  313.57267078 -252.78192178]
New Q values:  [-252.35169558 1536.09665506  329.16520469 -252.78192178]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x.g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   175.40103861   681.1204546 ]
------
Step:11, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   158.94198861   191.38874385]
New Q values:  [-9594.56523706 -8069.05606225   158.94198861   200.39485046]
Reward: -1  Episode Reward:  39
xxxxx
xag x
x.  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         414.79784307   86.99637671    0.        ]
------
Step:12, Action:South
State  111
Old Q Values:  [-177.44732869 3357.45359683  520.46511977 -120.29354603]
New Q values:  [-177.44732869 4775.86750325  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  48
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  275.08817949  3183.65327689 11424.95354841   154.04646645]
------
Step:13, Action:South
State  188
Old Q Values:  [-6523.78898263  1943.96671686  1762.3544145      0.        ]
New Q values:  [-6523.78898263  3672.10034816  1762.3544145      0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1662.87464405 -5704.51612281  9650.37887139 -5679.36893145]
------
Step:14, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 2381.86090493  123.6214372 ]
New Q values:  [1668.07688645   26.73544252 3658.20720483  123.6214372 ]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 9020.20947621 -5807.06396197   855.18004824   484.10856144]
------
Step:15, Action:East
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799  1538.07449697  2845.60512437]
New Q values:  [ 2507.04472244 -8521.23367799 62704.12144728  2845.60512437]
Reward: 100009  Episode Reward:  100055
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1193.41855571  445.64685634 8987.51608298  262.76946019]
------
Step:1, Action:East
State  181
Old Q Values:  [1193.41855571  445.64685634 8987.51608298  262.76946019]
New Q values:  [1193.41855571  445.64685634 1460.03881826  262.76946019]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x g x
x...x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  2.69930064e+04]
------
Step:1, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  2.69930064e+04]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.09597077e+04]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   523.68399323   490.01990793]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   175.40103861   681.1204546 ]
New Q values:  [ -281.736      -1150.91067548  3357.47273738   681.1204546 ]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.09597077e+04]
------
Step:3, Action:West
State  136
Old Q Values:  [ -170.77177351   133.01617626 -2383.80019164   268.45734434]
New Q values:  [ -170.77177351   133.01617626 -2383.80019164 -5833.09860713]
Reward: -10001  Episode Reward:  -9993
xxxxx
x.g x
x. .x
x...x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966  6944.97216164]
------
Step:1, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966  6944.97216164]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966 21594.62529884]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799 62704.12144728  2845.60512437]
------
Step:2, Action:East
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799 62704.12144728  2845.60512437]
New Q values:  [ 2507.04472244 -8521.23367799 31559.43616857  2845.60512437]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966 21594.62529884]
------
Step:3, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966 21594.62529884]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966 18105.08097011]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xg..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799 31559.43616857  2845.60512437]
------
Step:4, Action:East
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799 31559.43616857  2845.60512437]
New Q values:  [ 2507.04472244 -8521.23367799 18054.69875846  2845.60512437]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
x...x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966 18105.08097011]
------
Step:5, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966 18105.08097011]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966 12657.84201558]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
xg..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799 18054.69875846  2845.60512437]
------
Step:6, Action:East
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799 18054.69875846  2845.60512437]
New Q values:  [ 2507.04472244 -8521.23367799 11018.63210806  2845.60512437]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
x...x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966 12657.84201558]
------
Step:7, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966 12657.84201558]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966  2368.12643865]
Reward: -10001  Episode Reward:  -9997
xxxxx
x ..x
x...x
x.g x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-1662.87464405 -5704.51612281  9650.37887139 -5679.36893145]
------
Step:1, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 3658.20720483  123.6214372 ]
New Q values:  [1668.07688645   26.73544252 4174.7457248   123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 9020.20947621 -5807.06396197   855.18004824   484.10856144]
------
Step:2, Action:North
State  273
Old Q Values:  [  633.77207181  -168.92307549 -2638.82077121   677.18474264]
New Q values:  [ 5731.55492386  -168.92307549 -2638.82077121   677.18474264]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x.a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 18242.15365045 -2072.09364767  1460.9765133 ]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144   515.77433523 22698.31290191   492.34934406]
New Q values:  [-2469.90645144  2911.77257695 22698.31290191   492.34934406]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 9020.20947621 -5807.06396197   855.18004824   484.10856144]
------
Step:4, Action:North
State  276
Old Q Values:  [ 9020.20947621 -5807.06396197   855.18004824   484.10856144]
New Q values:  [ 1467.11617555 -5807.06396197   855.18004824   484.10856144]
Reward: -10001  Episode Reward:  -9984
xxxxx
x ..x
x.g.x
x  .x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  3129.37111079 -2165.66138672  -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094  3129.37111079 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1695.1600898  -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x g x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1193.41855571  445.64685634 1460.03881826  262.76946019]
------
Step:2, Action:East
State  180
Old Q Values:  [  150.2741814   7546.67422929 13310.48268597 -4966.32149798]
New Q values:  [  150.2741814   7546.67422929  5900.24740645 -4966.32149798]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1902.18110688 1523.9253191   568.38654082]
------
Step:3, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.45197447e+03 1.28654413e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 3.89177942e+03 1.28654413e+04 1.27673579e+04]
Reward: 9  Episode Reward:  27
xxxxx
x . x
xg .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799 11018.63210806  2845.60512437]
------
Step:4, Action:East
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799 11018.63210806  2845.60512437]
New Q values:  [ 2507.04472244 -8521.23367799  5123.29077482  2845.60512437]
Reward: 9  Episode Reward:  36
xxxxx
x . x
x g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966  2368.12643865]
------
Step:5, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966  2368.12643865]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966  2483.6378079 ]
Reward: -1  Episode Reward:  35
xxxxx
x . x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799  5123.29077482  2845.60512437]
------
Step:6, Action:East
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799  5123.29077482  2845.60512437]
New Q values:  [ 2507.04472244 -8521.23367799  2793.8076523   2845.60512437]
Reward: -1  Episode Reward:  34
xxxxx
xg. x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966  2483.6378079 ]
------
Step:7, Action:West
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966  2483.6378079 ]
New Q values:  [ 2172.2291038  -6442.16912869 -8192.20126966  1846.53666047]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799  2793.8076523   2845.60512437]
------
Step:8, Action:West
State  273
Old Q Values:  [ 5731.55492386  -168.92307549 -2638.82077121   677.18474264]
New Q values:  [ 5731.55492386  -168.92307549 -2638.82077121  1528.69761449]
Reward: 9  Episode Reward:  42
xxxxx
x .gx
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 4174.7457248   123.6214372 ]
------
Step:9, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 4174.7457248   123.6214372 ]
New Q values:  [1668.07688645   26.73544252 2109.43314258  123.6214372 ]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1467.11617555 -5807.06396197   855.18004824   484.10856144]
------
Step:10, Action:North
State  273
Old Q Values:  [ 5731.55492386  -168.92307549 -2638.82077121  1528.69761449]
New Q values:  [ 7764.66806468  -168.92307549 -2638.82077121  1528.69761449]
Reward: -1  Episode Reward:  40
xxxxx
x .gx
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 18242.15365045 -2072.09364767  1460.9765133 ]
------
Step:11, Action:South
State  193
Old Q Values:  [-5922.26708831 18242.15365045 -2072.09364767  1460.9765133 ]
New Q values:  [-5922.26708831  9625.66187958 -2072.09364767  1460.9765133 ]
Reward: -1  Episode Reward:  39
xxxxx
x . x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 7764.66806468  -168.92307549 -2638.82077121  1528.69761449]
------
Step:12, Action:North
State  273
Old Q Values:  [ 7764.66806468  -168.92307549 -2638.82077121  1528.69761449]
New Q values:  [ 4360.07406609  -168.92307549 -2638.82077121  1528.69761449]
Reward: -1  Episode Reward:  38
xxxxx
x . x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 4182.6894674  2069.36982143 1169.39963074]
------
Step:13, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -4.69753462e+03  6.67706591e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  6.67706591e+03  1.03161518e+03]
Reward: -10001  Episode Reward:  -9963
xxxxx
x . x
x  .x
x g x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   523.68399323   490.01990793]
------
Step:1, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   523.68399323   490.01990793]
New Q values:  [ -253.44886264 -1902.20915811  3502.78591923   490.01990793]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.09597077e+04]
------
Step:2, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.09597077e+04]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  5.43411887e+03]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  3502.78591923   490.01990793]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3357.47273738   681.1204546 ]
New Q values:  [ -281.736      -1150.91067548  2972.62475646   681.1204546 ]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  5.43411887e+03]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  5.43411887e+03]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  3.06483498e+03]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2972.62475646   681.1204546 ]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2972.62475646   681.1204546 ]
New Q values:  [ -281.736      -1150.91067548  2107.90039527   681.1204546 ]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  3.06483498e+03]
------
Step:6, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  3.06483498e+03]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.85770411e+03]
Reward: -1  Episode Reward:  4
xxxxx
x.a x
x...x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2107.90039527   681.1204546 ]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2107.90039527   681.1204546 ]
New Q values:  [ -281.736      -1150.91067548  1399.87139075   681.1204546 ]
Reward: -1  Episode Reward:  3
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.85770411e+03]
------
Step:8, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.85770411e+03]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.79331742e+03]
Reward: -1  Episode Reward:  2
xxxxx
x.a x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  3502.78591923   490.01990793]
------
Step:9, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  3502.78591923   490.01990793]
New Q values:  [ -253.44886264 -1902.20915811  1938.50959348   490.01990793]
Reward: -1  Episode Reward:  1
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.79331742e+03]
------
Step:10, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.79331742e+03]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.29827985e+03]
Reward: -1  Episode Reward:  0
xxxxx
x.a x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1938.50959348   490.01990793]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1399.87139075   681.1204546 ]
New Q values:  [ -281.736      -1150.91067548   948.83251003   681.1204546 ]
Reward: -1  Episode Reward:  -1
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.29827985e+03]
------
Step:12, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.29827985e+03]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  8.03361691e+02]
Reward: -1  Episode Reward:  -2
xxxxx
x.a x
x...x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   948.83251003   681.1204546 ]
------
Step:13, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   948.83251003   681.1204546 ]
New Q values:  [ -281.736      -1150.91067548   619.94151141   681.1204546 ]
Reward: -1  Episode Reward:  -3
xxxxx
x. ax
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  8.03361691e+02]
------
Step:14, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  8.03361691e+02]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  5.25080813e+02]
Reward: -1  Episode Reward:  -4
xxxxx
x.a x
x...x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   619.94151141   681.1204546 ]
------
Step:15, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   619.94151141   681.1204546 ]
New Q values:  [ -281.736      -1150.91067548   619.94151141   845.33825725]
Reward: 9  Episode Reward:  5
xxxxx
xa  x
x...x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1891.63358472  340.32246615 -180.6       ]
------
Step:16, Action:South
State  110
Old Q Values:  [-239.29051573 1891.63358472  340.32246615 -180.6       ]
New Q values:  [ -239.29051573 -2973.94429733   340.32246615  -180.6       ]
Reward: -9991  Episode Reward:  -9986
xxxxx
x   x
xg..x
x.. x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  5160.03171396     0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [1386.80134547 2335.80779431 2254.57097294  358.5166536 ]
New Q values:  [1386.80134547 2335.80779431 2910.34816214  358.5166536 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  6.67706591e+03  1.03161518e+03]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  6.67706591e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  6.82974763e+03  1.03161518e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13845.07087081  1093.22762389   790.72804752  1050.85266124]
------
Step:3, Action:North
State  210
Old Q Values:  [13845.07087081  1093.22762389   790.72804752  1050.85266124]
New Q values:  [12353.8878816   1093.22762389   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  6087.16496961  -180.00807518 22701.53177759]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  5.25080813e+02]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  2.70604758e+04]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 89483.47815827]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1938.50959348   490.01990793]
New Q values:  [ -253.44886264 -1902.20915811  1938.50959348  1634.16821415]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4775.86750325  520.46511977 -120.29354603]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869 4775.86750325  520.46511977 -120.29354603]
New Q values:  [-177.44732869 2141.9205047   520.46511977 -120.29354603]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[  3.06655861 773.91167801 597.60025341   0.        ]
------
Step:7, Action:South
State  191
Old Q Values:  [  3.06655861 773.91167801 597.60025341   0.        ]
New Q values:  [  3.06655861 941.79461398 597.60025341   0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 2109.43314258  123.6214372 ]
------
Step:8, Action:North
State  260
Old Q Values:  [-1662.87464405 -5704.51612281  9650.37887139 -5679.36893145]
New Q values:  [ -127.28874991 -5704.51612281  9650.37887139 -5679.36893145]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xa  x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  1.79487036e+03  0.00000000e+00]
------
Step:9, Action:East
State  191
Old Q Values:  [  3.06655861 941.79461398 597.60025341   0.        ]
New Q values:  [  3.06655861 941.79461398 631.32287843   0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[    0.         -1406.21014518  1309.6092569      0.        ]
------
Step:10, Action:East
State  203
Old Q Values:  [3.60604218e+00 5.44407350e+03 3.62058893e+03 9.06816004e+03]
New Q values:  [3.60604218e+00 5.44407350e+03 3.50642584e+03 9.06816004e+03]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 158.55927744 6862.634248      0.         1847.21017375]
------
Step:11, Action:West
State  216
Old Q Values:  [  136.63931538 -5399.47678043 -8896.20691497  2567.65287957]
New Q values:  [  136.63931538 -5399.47678043 -8896.20691497  1486.13925063]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.53226033e+03 -5.74006972e+03  2.00341972e+02]
------
Step:12, Action:South
State  204
Old Q Values:  [   0.          817.08415829 1076.13926719  441.58769553]
New Q values:  [   0.          772.36851598 1076.13926719  441.58769553]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1467.11617555 -5807.06396197   855.18004824   484.10856144]
------
Step:13, Action:East
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799  2793.8076523   2845.60512437]
New Q values:  [ 2507.04472244 -8521.23367799 61774.59179206  2845.60512437]
Reward: 100009  Episode Reward:  100057
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.89177942e+03 1.28654413e+04 1.27673579e+04]
------
Step:1, Action:East
State  200
Old Q Values:  [  62.8218634  1902.18110688 1523.9253191   568.38654082]
New Q values:  [  62.8218634  1902.18110688 1060.81190283  568.38654082]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  136.63931538 -5399.47678043 -8896.20691497  1486.13925063]
------
Step:2, Action:West
State  208
Old Q Values:  [ 7948.21870939  2483.29359772 -4584.50430574  1067.63960005]
New Q values:  [ 7948.21870939  2483.29359772 -4584.50430574 -1713.91177491]
Reward: -10001  Episode Reward:  -9992
xxxxx
x ..x
x.g x
x...x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2172.2291038  -6442.16912869 -8192.20126966  1846.53666047]
------
Step:1, Action:North
State  288
Old Q Values:  [ 2172.2291038  -6442.16912869 -8192.20126966  1846.53666047]
New Q values:  [ 3258.75725434 -6442.16912869 -8192.20126966  1846.53666047]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7948.21870939  2483.29359772 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  208
Old Q Values:  [ 7948.21870939  2483.29359772 -4584.50430574 -1713.91177491]
New Q values:  [11296.83021555  2483.29359772 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  2.70604758e+04]
------
Step:3, Action:West
State  136
Old Q Values:  [ -170.77177351   133.01617626 -2383.80019164 -5833.09860713]
New Q values:  [ -170.77177351   133.01617626 -2383.80019164 -8267.72098771]
Reward: -9991  Episode Reward:  -9983
xxxxx
x.g x
x.. x
x.. x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3258.75725434 -6442.16912869 -8192.20126966  1846.53666047]
------
Step:1, Action:North
State  288
Old Q Values:  [ 3258.75725434 -6442.16912869 -8192.20126966  1846.53666047]
New Q values:  [ 4697.9519664  -6442.16912869 -8192.20126966  1846.53666047]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11296.83021555  2483.29359772 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  208
Old Q Values:  [11296.83021555  2483.29359772 -4584.50430574 -1713.91177491]
New Q values:  [ 4564.0369391   2483.29359772 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
xg.ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   133.01617626 -2383.80019164 -8267.72098771]
------
Step:3, Action:South
State  136
Old Q Values:  [ -170.77177351   133.01617626 -2383.80019164 -8267.72098771]
New Q values:  [ -170.77177351  1421.81755223 -2383.80019164 -8267.72098771]
Reward: -1  Episode Reward:  17
xxxxx
x g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4564.0369391   2483.29359772 -4584.50430574 -1713.91177491]
------
Step:4, Action:North
State  208
Old Q Values:  [ 4564.0369391   2483.29359772 -4584.50430574 -1713.91177491]
New Q values:  [ 2251.56004131  2483.29359772 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  16
xxxxx
xg.ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1421.81755223 -2383.80019164 -8267.72098771]
------
Step:5, Action:South
State  136
Old Q Values:  [ -170.77177351  1421.81755223 -2383.80019164 -8267.72098771]
New Q values:  [ -170.77177351  1313.11510021 -2383.80019164 -8267.72098771]
Reward: -1  Episode Reward:  15
xxxxx
x g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2251.56004131  2483.29359772 -4584.50430574 -1713.91177491]
------
Step:6, Action:South
State  208
Old Q Values:  [ 2251.56004131  2483.29359772 -4584.50430574 -1713.91177491]
New Q values:  [ 2251.56004131  2402.10302901 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  14
xxxxx
xg. x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4697.9519664  -6442.16912869 -8192.20126966  1846.53666047]
------
Step:7, Action:North
State  288
Old Q Values:  [ 4697.9519664  -6442.16912869 -8192.20126966  1846.53666047]
New Q values:  [ 2599.21169526 -6442.16912869 -8192.20126966  1846.53666047]
Reward: -1  Episode Reward:  13
xxxxx
x g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2251.56004131  2402.10302901 -4584.50430574 -1713.91177491]
------
Step:8, Action:South
State  208
Old Q Values:  [ 2251.56004131  2402.10302901 -4584.50430574 -1713.91177491]
New Q values:  [ 2251.56004131  1740.00472018 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  12
xxxxx
x . x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2599.21169526 -6442.16912869 -8192.20126966  1846.53666047]
------
Step:9, Action:North
State  288
Old Q Values:  [ 2599.21169526 -6442.16912869 -8192.20126966  1846.53666047]
New Q values:  [ 1714.5526905  -6442.16912869 -8192.20126966  1846.53666047]
Reward: -1  Episode Reward:  11
xxxxx
x . x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2251.56004131  1740.00472018 -4584.50430574 -1713.91177491]
------
Step:10, Action:North
State  210
Old Q Values:  [12353.8878816   1093.22762389   790.72804752  1050.85266124]
New Q values:  [13059.09788443  1093.22762389   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  10
xxxxx
x .ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  2.70604758e+04]
------
Step:11, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  2.70604758e+04]
New Q values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.10831918e+04]
Reward: 9  Episode Reward:  19
xxxxx
x a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   619.94151141   845.33825725]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   619.94151141   845.33825725]
New Q values:  [ -281.736      -1150.91067548   619.94151141   439.63204275]
Reward: -1  Episode Reward:  18
xxxxx
xa  x
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -2973.94429733   340.32246615  -180.6       ]
------
Step:13, Action:East
State  104
Old Q Values:  [-8652.84        4221.40217736   584.16605159 -8652.84      ]
New Q values:  [-8652.84        4221.40217736   293.18487578 -8652.84      ]
Reward: -1  Episode Reward:  17
xxxxx
xga x
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   158.94198861   200.39485046]
------
Step:14, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   158.94198861   200.39485046]
New Q values:  [-9594.56523706 -8069.05606225   456.91132551   200.39485046]
Reward: -1  Episode Reward:  16
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1313.11510021 -2383.80019164 -8267.72098771]
------
Step:15, Action:South
State  138
Old Q Values:  [ 8.43634063e+00 -4.93854428e+01 -3.22965309e-01  1.10831918e+04]
New Q values:  [ 8.43634063e+00  4.25487598e+02 -3.22965309e-01  1.10831918e+04]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  136.63931538 -5399.47678043 -8896.20691497  1486.13925063]
------
Step:16, Action:North
State  208
Old Q Values:  [ 2251.56004131  1740.00472018 -4584.50430574 -1713.91177491]
New Q values:  [ 1293.95854659  1740.00472018 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  14
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1313.11510021 -2383.80019164 -8267.72098771]
------
Step:17, Action:South
State  136
Old Q Values:  [ -170.77177351  1313.11510021 -2383.80019164 -8267.72098771]
New Q values:  [ -170.77177351  1046.64745614 -2383.80019164 -8267.72098771]
Reward: -1  Episode Reward:  13
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1293.95854659  1740.00472018 -4584.50430574 -1713.91177491]
------
Step:18, Action:South
State  216
Old Q Values:  [  136.63931538 -5399.47678043 -8896.20691497  1486.13925063]
New Q values:  [  136.63931538 -1606.42971403 -8896.20691497  1486.13925063]
Reward: -1  Episode Reward:  12
xxxxx
x   x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1714.5526905  -6442.16912869 -8192.20126966  1846.53666047]
------
Step:19, Action:West
State  288
Old Q Values:  [ 1714.5526905  -6442.16912869 -8192.20126966  1846.53666047]
New Q values:  [ 1714.5526905  -6442.16912869 -8192.20126966 19276.39220181]
Reward: 9  Episode Reward:  21
xxxxx
x   x
x.. x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2507.04472244 -8521.23367799 61774.59179206  2845.60512437]
------
Step:20, Action:East
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799 61774.59179206  2845.60512437]
New Q values:  [ 2507.04472244 -8521.23367799 30492.15437737  2845.60512437]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1714.5526905  -6442.16912869 -8192.20126966 19276.39220181]
------
Step:21, Action:North
State  288
Old Q Values:  [ 1714.5526905  -6442.16912869 -8192.20126966 19276.39220181]
New Q values:  [ 4602.95044153 -6442.16912869 -8192.20126966 19276.39220181]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13059.09788443  1093.22762389   790.72804752  1050.85266124]
------
Step:22, Action:North
State  208
Old Q Values:  [ 1293.95854659  1740.00472018 -4584.50430574 -1713.91177491]
New Q values:  [ 3841.9409545   1740.00472018 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  18
xxxxx
x  ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.25487598e+02 -3.22965309e-01  1.10831918e+04]
------
Step:23, Action:West
State  136
Old Q Values:  [ -170.77177351  1046.64745614 -2383.80019164 -8267.72098771]
New Q values:  [ -170.77177351  1046.64745614 -2383.80019164 -3170.61499743]
Reward: -1  Episode Reward:  17
xxxxx
xga x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   456.91132551   200.39485046]
------
Step:24, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   619.94151141   439.63204275]
New Q values:  [ -281.736      -1150.91067548  3572.33414043   439.63204275]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.25487598e+02 -3.22965309e-01  1.10831918e+04]
------
Step:25, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  4.25487598e+02 -3.22965309e-01  1.10831918e+04]
New Q values:  [ 8.43634063e+00  4.25487598e+02 -3.22965309e-01  5.50437696e+03]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3572.33414043   439.63204275]
------
Step:26, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1938.50959348  1634.16821415]
New Q values:  [ -253.44886264 -1902.20915811  2426.11692438  1634.16821415]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.25487598e+02 -3.22965309e-01  5.50437696e+03]
------
Step:27, Action:West
State  136
Old Q Values:  [ -170.77177351  1046.64745614 -2383.80019164 -3170.61499743]
New Q values:  [ -170.77177351  1046.64745614 -2383.80019164 -1144.22974786]
Reward: -1  Episode Reward:  13
xxxxx
x agx
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:28, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   456.91132551   200.39485046]
New Q values:  [-9594.56523706 -8069.05606225   456.91132551   203.9972931 ]
Reward: -1  Episode Reward:  12
xxxxx
xag x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         414.79784307   86.99637671    0.        ]
------
Step:29, Action:South
State  105
Old Q Values:  [-180.6         414.79784307   86.99637671    0.        ]
New Q values:  [-180.6         609.33078271   86.99637671    0.        ]
Reward: 9  Episode Reward:  21
xxxxx
x  gx
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1193.41855571  445.64685634 1460.03881826  262.76946019]
------
Step:30, Action:East
State  185
Old Q Values:  [  734.36934626     0.         -3349.09731468  -178.98      ]
New Q values:  [ 734.36934626    0.         -874.56082707 -178.98      ]
Reward: 9  Episode Reward:  30
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.53226033e+03 -5.74006972e+03  2.00341972e+02]
------
Step:31, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  1.53226033e+03 -5.74006972e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  1.92032635e+03 -5.74006972e+03  2.00341972e+02]
Reward: -1  Episode Reward:  29
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 4360.07406609  -168.92307549 -2638.82077121  1528.69761449]
------
Step:32, Action:North
State  272
Old Q Values:  [ 2507.04472244 -8521.23367799 30492.15437737  2845.60512437]
New Q values:  [ 1572.87222104 -8521.23367799 30492.15437737  2845.60512437]
Reward: -1  Episode Reward:  28
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  1902.18110688 1060.81190283  568.38654082]
------
Step:33, Action:South
State  200
Old Q Values:  [  62.8218634  1902.18110688 1060.81190283  568.38654082]
New Q values:  [  62.8218634  9907.91875596 1060.81190283  568.38654082]
Reward: -1  Episode Reward:  27
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1572.87222104 -8521.23367799 30492.15437737  2845.60512437]
------
Step:34, Action:East
State  272
Old Q Values:  [ 1572.87222104 -8521.23367799 30492.15437737  2845.60512437]
New Q values:  [ 1572.87222104 -8521.23367799 17979.17941149  2845.60512437]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4602.95044153 -6442.16912869 -8192.20126966 19276.39220181]
------
Step:35, Action:West
State  288
Old Q Values:  [ 4602.95044153 -6442.16912869 -8192.20126966 19276.39220181]
New Q values:  [ 4602.95044153 -6442.16912869 -8192.20126966 13103.71070417]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1572.87222104 -8521.23367799 17979.17941149  2845.60512437]
------
Step:36, Action:East
State  272
Old Q Values:  [ 1572.87222104 -8521.23367799 17979.17941149  2845.60512437]
New Q values:  [ 1572.87222104 -8521.23367799 11122.18497585  2845.60512437]
Reward: -1  Episode Reward:  24
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4602.95044153 -6442.16912869 -8192.20126966 13103.71070417]
------
Step:37, Action:West
State  288
Old Q Values:  [ 4602.95044153 -6442.16912869 -8192.20126966 13103.71070417]
New Q values:  [ 4602.95044153 -6442.16912869 -8192.20126966  6548.9065015 ]
Reward: -1  Episode Reward:  23
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 4360.07406609  -168.92307549 -2638.82077121  1528.69761449]
------
Step:38, Action:North
State  272
Old Q Values:  [ 1572.87222104 -8521.23367799 11122.18497585  2845.60512437]
New Q values:  [ 3600.9245152  -8521.23367799 11122.18497585  2845.60512437]
Reward: -1  Episode Reward:  22
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  9907.91875596 1060.81190283  568.38654082]
------
Step:39, Action:South
State  200
Old Q Values:  [  62.8218634  9907.91875596 1060.81190283  568.38654082]
New Q values:  [  62.8218634  7299.22299514 1060.81190283  568.38654082]
Reward: -1  Episode Reward:  21
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3600.9245152  -8521.23367799 11122.18497585  2845.60512437]
------
Step:40, Action:East
State  272
Old Q Values:  [ 3600.9245152  -8521.23367799 11122.18497585  2845.60512437]
New Q values:  [ 3600.9245152  -8521.23367799  6412.94594079  2845.60512437]
Reward: -1  Episode Reward:  20
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4602.95044153 -6442.16912869 -8192.20126966  6548.9065015 ]
------
Step:41, Action:West
State  288
Old Q Values:  [ 4602.95044153 -6442.16912869 -8192.20126966  6548.9065015 ]
New Q values:  [ 4602.95044153 -6442.16912869 -8192.20126966  4542.84638283]
Reward: -1  Episode Reward:  19
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3600.9245152  -8521.23367799  6412.94594079  2845.60512437]
------
Step:42, Action:East
State  272
Old Q Values:  [ 3600.9245152  -8521.23367799  6412.94594079  2845.60512437]
New Q values:  [ 3600.9245152  -8521.23367799  3945.46350877  2845.60512437]
Reward: -1  Episode Reward:  18
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4602.95044153 -6442.16912869 -8192.20126966  4542.84638283]
------
Step:43, Action:North
State  288
Old Q Values:  [ 4602.95044153 -6442.16912869 -8192.20126966  4542.84638283]
New Q values:  [ 2286.4219518  -6442.16912869 -8192.20126966  4542.84638283]
Reward: -1  Episode Reward:  17
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  136.63931538 -1606.42971403 -8896.20691497  1486.13925063]
------
Step:44, Action:West
State  216
Old Q Values:  [  136.63931538 -1606.42971403 -8896.20691497  1486.13925063]
New Q values:  [  136.63931538 -1606.42971403 -8896.20691497  2783.62259879]
Reward: -1  Episode Reward:  16
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  7299.22299514 1060.81190283  568.38654082]
------
Step:45, Action:South
State  200
Old Q Values:  [  62.8218634  7299.22299514 1060.81190283  568.38654082]
New Q values:  [  62.8218634  4102.72825069 1060.81190283  568.38654082]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3600.9245152  -8521.23367799  3945.46350877  2845.60512437]
------
Step:46, Action:East
State  272
Old Q Values:  [ 3600.9245152  -8521.23367799  3945.46350877  2845.60512437]
New Q values:  [ 3600.9245152  -8521.23367799  2940.43931836  2845.60512437]
Reward: -1  Episode Reward:  14
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2286.4219518  -6442.16912869 -8192.20126966  4542.84638283]
------
Step:47, Action:West
State  288
Old Q Values:  [ 2286.4219518  -6442.16912869 -8192.20126966  4542.84638283]
New Q values:  [ 2286.4219518  -6442.16912869 -8192.20126966  2896.8159077 ]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3600.9245152  -8521.23367799  2940.43931836  2845.60512437]
------
Step:48, Action:North
State  272
Old Q Values:  [ 3600.9245152  -8521.23367799  2940.43931836  2845.60512437]
New Q values:  [ 2670.58828129 -8521.23367799  2940.43931836  2845.60512437]
Reward: -1  Episode Reward:  12
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  4102.72825069 1060.81190283  568.38654082]
------
Step:49, Action:South
State  200
Old Q Values:  [  62.8218634  4102.72825069 1060.81190283  568.38654082]
New Q values:  [  62.8218634  2522.62309578 1060.81190283  568.38654082]
Reward: -1  Episode Reward:  11
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2670.58828129 -8521.23367799  2940.43931836  2845.60512437]
------
Step:50, Action:East
State  272
Old Q Values:  [ 2670.58828129 -8521.23367799  2940.43931836  2845.60512437]
New Q values:  [ 2670.58828129 -8521.23367799  2044.62049965  2845.60512437]
Reward: -1  Episode Reward:  10
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2286.4219518  -6442.16912869 -8192.20126966  2896.8159077 ]
------
Step:51, Action:West
State  288
Old Q Values:  [ 2286.4219518  -6442.16912869 -8192.20126966  2896.8159077 ]
New Q values:  [ 2286.4219518  -6442.16912869 -8192.20126966  2011.80790039]
Reward: -1  Episode Reward:  9
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2670.58828129 -8521.23367799  2044.62049965  2845.60512437]
------
Step:52, Action:West
State  272
Old Q Values:  [ 2670.58828129 -8521.23367799  2044.62049965  2845.60512437]
New Q values:  [ 2670.58828129 -8521.23367799  2044.62049965 77270.208139  ]
Reward: 100009  Episode Reward:  100018
xxxxx
x g x
x   x
xa  x
xxxxx
Episode # 1000
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2141.9205047   520.46511977 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 7913.60560588  238.35800069    0.        ]
New Q values:  [ 221.30610858 3608.85388783  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1193.41855571  445.64685634 1460.03881826  262.76946019]
------
Step:2, Action:East
State  181
Old Q Values:  [1193.41855571  445.64685634 1460.03881826  262.76946019]
New Q values:  [1193.41855571  445.64685634 1392.90939788  262.76946019]
Reward: -10001  Episode Reward:  -9992
xxxxx
x ..x
x g.x
x...x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   456.91132551   203.9972931 ]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3572.33414043   439.63204275]
New Q values:  [ -281.736      -1150.91067548  3085.64674316   439.63204275]
Reward: 9  Episode Reward:  9
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.25487598e+02 -3.22965309e-01  5.50437696e+03]
------
Step:2, Action:West
State  136
Old Q Values:  [ -170.77177351  1046.64745614 -2383.80019164 -1144.22974786]
New Q values:  [ -170.77177351  1046.64745614 -2383.80019164  -321.21850149]
Reward: -1  Episode Reward:  8
xxxxx
xga x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   456.91132551   203.9972931 ]
------
Step:3, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   456.91132551   203.9972931 ]
New Q values:  [-9594.56523706 -8069.05606225   496.15876704   203.9972931 ]
Reward: -1  Episode Reward:  7
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1046.64745614 -2383.80019164  -321.21850149]
------
Step:4, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  4.25487598e+02 -3.22965309e-01  5.50437696e+03]
New Q values:  [ 8.43634063e+00  1.01068182e+03 -3.22965309e-01  5.50437696e+03]
Reward: 9  Episode Reward:  16
xxxxx
x   x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  136.63931538 -1606.42971403 -8896.20691497  2783.62259879]
------
Step:5, Action:North
State  216
Old Q Values:  [  136.63931538 -1606.42971403 -8896.20691497  2783.62259879]
New Q values:  [ 1705.36881314 -1606.42971403 -8896.20691497  2783.62259879]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.01068182e+03 -3.22965309e-01  5.50437696e+03]
------
Step:6, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.01068182e+03 -3.22965309e-01  5.50437696e+03]
New Q values:  [ 8.43634063e+00  1.01068182e+03 -3.22965309e-01  3.12684481e+03]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3085.64674316   439.63204275]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3085.64674316   439.63204275]
New Q values:  [ -281.736      -1150.91067548  2171.71213894   439.63204275]
Reward: -1  Episode Reward:  13
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.01068182e+03 -3.22965309e-01  3.12684481e+03]
------
Step:8, Action:West
State  136
Old Q Values:  [ -170.77177351  1046.64745614 -2383.80019164  -321.21850149]
New Q values:  [ -170.77177351  1046.64745614 -2383.80019164    19.76022952]
Reward: -1  Episode Reward:  12
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   496.15876704   203.9972931 ]
------
Step:9, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2171.71213894   439.63204275]
New Q values:  [ -281.736      -1150.91067548  1806.13829726   439.63204275]
Reward: -1  Episode Reward:  11
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.01068182e+03 -3.22965309e-01  3.12684481e+03]
------
Step:10, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.01068182e+03 -3.22965309e-01  3.12684481e+03]
New Q values:  [ 8.43634063e+00  1.01068182e+03 -3.22965309e-01  1.79197941e+03]
Reward: -1  Episode Reward:  10
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1806.13829726   439.63204275]
------
Step:11, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   496.15876704   203.9972931 ]
New Q values:  [-9594.56523706 -8069.05606225   511.85774366   203.9972931 ]
Reward: -1  Episode Reward:  9
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1046.64745614 -2383.80019164    19.76022952]
------
Step:12, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.01068182e+03 -3.22965309e-01  1.79197941e+03]
New Q values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.79197941e+03]
Reward: -1  Episode Reward:  8
xxxxx
x   x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1705.36881314 -1606.42971403 -8896.20691497  2783.62259879]
------
Step:13, Action:North
State  216
Old Q Values:  [ 1705.36881314 -1606.42971403 -8896.20691497  2783.62259879]
New Q values:  [ 1219.14134868 -1606.42971403 -8896.20691497  2783.62259879]
Reward: -1  Episode Reward:  7
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.79197941e+03]
------
Step:14, Action:West
State  136
Old Q Values:  [ -170.77177351  1046.64745614 -2383.80019164    19.76022952]
New Q values:  [ -170.77177351  1046.64745614 -2383.80019164   160.8614149 ]
Reward: -1  Episode Reward:  6
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   511.85774366   203.9972931 ]
------
Step:15, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   511.85774366   203.9972931 ]
New Q values:  [-9594.56523706 -8069.05606225   518.13733431   203.9972931 ]
Reward: -1  Episode Reward:  5
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1046.64745614 -2383.80019164   160.8614149 ]
------
Step:16, Action:South
State  136
Old Q Values:  [ -170.77177351  1046.64745614 -2383.80019164   160.8614149 ]
New Q values:  [ -170.77177351  1253.14576209 -2383.80019164   160.8614149 ]
Reward: -1  Episode Reward:  4
xxxxx
x g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1219.14134868 -1606.42971403 -8896.20691497  2783.62259879]
------
Step:17, Action:West
State  216
Old Q Values:  [ 1219.14134868 -1606.42971403 -8896.20691497  2783.62259879]
New Q values:  [ 1219.14134868 -1606.42971403 -8896.20691497 -4124.36403175]
Reward: -9991  Episode Reward:  -9987
xxxxx
x   x
x.g x
x...x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2670.58828129 -8521.23367799  2044.62049965 77270.208139  ]
------
Step:1, Action:West
State  276
Old Q Values:  [ 1467.11617555 -5807.06396197   855.18004824   484.10856144]
New Q values:  [ 1467.11617555 -5807.06396197   855.18004824  3094.15708599]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -127.28874991 -5704.51612281  9650.37887139 -5679.36893145]
------
Step:2, Action:East
State  260
Old Q Values:  [ -127.28874991 -5704.51612281  9650.37887139 -5679.36893145]
New Q values:  [ -127.28874991 -5704.51612281 27040.61399026 -5679.36893145]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2670.58828129 -8521.23367799  2044.62049965 77270.208139  ]
------
Step:3, Action:North
State  276
Old Q Values:  [ 1467.11617555 -5807.06396197   855.18004824  3094.15708599]
New Q values:  [ 4445.87885529 -5807.06396197   855.18004824  3094.15708599]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.89177942e+03 1.28654413e+04 1.27673579e+04]
------
Step:4, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.89177942e+03 1.28654413e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 3.89177942e+03 6.30415880e+03 1.27673579e+04]
Reward: 9  Episode Reward:  16
xxxxx
xg..x
x. ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3841.9409545   1740.00472018 -4584.50430574 -1713.91177491]
------
Step:5, Action:North
State  208
Old Q Values:  [ 3841.9409545   1740.00472018 -4584.50430574 -1713.91177491]
New Q values:  [ 1918.12011043  1740.00472018 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  25
xxxxx
x.gax
x.  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1253.14576209 -2383.80019164   160.8614149 ]
------
Step:6, Action:South
State  136
Old Q Values:  [ -170.77177351  1253.14576209 -2383.80019164   160.8614149 ]
New Q values:  [ -170.77177351  1076.09433797 -2383.80019164   160.8614149 ]
Reward: -1  Episode Reward:  24
xxxxx
xg. x
x. ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1918.12011043  1740.00472018 -4584.50430574 -1713.91177491]
------
Step:7, Action:North
State  208
Old Q Values:  [ 1918.12011043  1740.00472018 -4584.50430574 -1713.91177491]
New Q values:  [ 7577.10757745  1740.00472018 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  23
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  6087.16496961  -180.00807518 22701.53177759]
------
Step:8, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.79197941e+03]
New Q values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  2.75672352e+04]
Reward: 9  Episode Reward:  32
xxxxx
x.a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 89483.47815827]
------
Step:9, Action:West
State  126
Old Q Values:  [  0.         331.64678262 182.86408515 913.91277177]
New Q values:  [  0.         331.64678262 182.86408515 473.06184855]
Reward: 9  Episode Reward:  41
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -2973.94429733   340.32246615  -180.6       ]
------
Step:10, Action:East
State  104
Old Q Values:  [-8652.84        4221.40217736   293.18487578 -8652.84      ]
New Q values:  [-8652.84        4221.40217736   272.1151506  -8652.84      ]
Reward: -1  Episode Reward:  40
xxxxx
xga x
x.  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   518.13733431   203.9972931 ]
------
Step:11, Action:East
State  126
Old Q Values:  [  0.         331.64678262 182.86408515 473.06184855]
New Q values:  [   0.          331.64678262 8342.71619767  473.06184855]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  2.75672352e+04]
------
Step:12, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  2.75672352e+04]
New Q values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.15681356e+04]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1806.13829726   439.63204275]
------
Step:13, Action:East
State  126
Old Q Values:  [   0.          331.64678262 8342.71619767  473.06184855]
New Q values:  [   0.          331.64678262 6806.92715127  473.06184855]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.15681356e+04]
------
Step:14, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.15681356e+04]
New Q values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  5.16849572e+03]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x.g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1806.13829726   439.63204275]
------
Step:15, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1806.13829726   439.63204275]
New Q values:  [ -281.736      -1150.91067548  2272.40403454   439.63204275]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  5.16849572e+03]
------
Step:16, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  5.16849572e+03]
New Q values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  2.79463336e+03]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2426.11692438  1634.16821415]
------
Step:17, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2272.40403454   439.63204275]
New Q values:  [ -281.736      -1150.91067548  1746.75162326   439.63204275]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  2.79463336e+03]
------
Step:18, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  2.79463336e+03]
New Q values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.64127883e+03]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1746.75162326   439.63204275]
------
Step:19, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1746.75162326   439.63204275]
New Q values:  [ -281.736      -1150.91067548  1190.48429918   439.63204275]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.64127883e+03]
------
Step:20, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.64127883e+03]
New Q values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.38374661e+03]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2426.11692438  1634.16821415]
------
Step:21, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1190.48429918   439.63204275]
New Q values:  [ -281.736      -1150.91067548   890.71770281   439.63204275]
Reward: -1  Episode Reward:  29
xxxxx
x  ax
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.38374661e+03]
------
Step:22, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.38374661e+03]
New Q values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.28073372e+03]
Reward: -1  Episode Reward:  28
xxxxx
x a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2426.11692438  1634.16821415]
------
Step:23, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2426.11692438  1634.16821415]
New Q values:  [ -253.44886264 -1902.20915811  1354.0668862   1634.16821415]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x. gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.28073372e+03]
------
Step:24, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  1.28073372e+03]
New Q values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  7.78908799e+02]
Reward: -1  Episode Reward:  26
xxxxx
x a x
x.g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   890.71770281   439.63204275]
------
Step:25, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   890.71770281   439.63204275]
New Q values:  [ -281.736      -1150.91067548   727.31493328   439.63204275]
Reward: -1  Episode Reward:  25
xxxxx
x  ax
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  7.78908799e+02]
------
Step:26, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.23875951e+03 -3.22965309e-01  7.78908799e+02]
New Q values:  [ 8.43634063e+00  2.76803608e+03 -3.22965309e-01  7.78908799e+02]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x.gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7577.10757745  1740.00472018 -4584.50430574 -1713.91177491]
------
Step:27, Action:North
State  208
Old Q Values:  [ 7577.10757745  1740.00472018 -4584.50430574 -1713.91177491]
New Q values:  [ 3860.65385381  1740.00472018 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  23
xxxxx
x  ax
x. gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.76803608e+03 -3.22965309e-01  7.78908799e+02]
------
Step:28, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  2.76803608e+03 -3.22965309e-01  7.78908799e+02]
New Q values:  [ 8.43634063e+00  2.76803608e+03 -3.22965309e-01  8.01213984e+02]
Reward: -1  Episode Reward:  22
xxxxx
x a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1354.0668862   1634.16821415]
------
Step:29, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1354.0668862   1634.16821415]
New Q values:  [ -253.44886264 -1902.20915811  1354.0668862   1113.89628218]
Reward: -1  Episode Reward:  21
xxxxx
xa  x
x. gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1536.09665506  329.16520469 -252.78192178]
------
Step:30, Action:South
State  111
Old Q Values:  [-177.44732869 2141.9205047   520.46511977 -120.29354603]
New Q values:  [-177.44732869 4289.6542664   520.46511977 -120.29354603]
Reward: 9  Episode Reward:  30
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  275.08817949  3183.65327689 11424.95354841   154.04646645]
------
Step:31, Action:South
State  189
Old Q Values:  [  275.08817949  3183.65327689 11424.95354841   154.04646645]
New Q values:  [  275.08817949  1905.69125353 11424.95354841   154.04646645]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 2109.43314258  123.6214372 ]
------
Step:32, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 2109.43314258  123.6214372 ]
New Q values:  [1668.07688645   26.73544252 2325.50704203  123.6214372 ]
Reward: -1  Episode Reward:  28
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 4.94111262e+03 3.86565271e+02]
------
Step:33, Action:East
State  272
Old Q Values:  [ 2670.58828129 -8521.23367799  2044.62049965 77270.208139  ]
New Q values:  [ 2670.58828129 -8521.23367799 61509.1747854  77270.208139  ]
Reward: 100009  Episode Reward:  100037
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 2325.50704203  123.6214372 ]
------
Step:1, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 2325.50704203  123.6214372 ]
New Q values:  [1668.07688645   26.73544252 2243.62503664  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 4360.07406609  -168.92307549 -2638.82077121  1528.69761449]
------
Step:2, Action:North
State  273
Old Q Values:  [ 4360.07406609  -168.92307549 -2638.82077121  1528.69761449]
New Q values:  [ 4637.12819031  -168.92307549 -2638.82077121  1528.69761449]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x.a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9625.66187958 -2072.09364767  1460.9765133 ]
------
Step:3, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.89177942e+03 6.30415880e+03 1.27673579e+04]
New Q values:  [3.89777037e-01 2.47371742e+04 6.30415880e+03 1.27673579e+04]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2670.58828129 -8521.23367799 61509.1747854  77270.208139  ]
------
Step:4, Action:West
State  273
Old Q Values:  [ 4637.12819031  -168.92307549 -2638.82077121  1528.69761449]
New Q values:  [ 4637.12819031  -168.92307549 -2638.82077121  1283.96655679]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 2243.62503664  123.6214372 ]
------
Step:5, Action:East
State  257
Old Q Values:  [53755.22029751  6367.89256635  5196.05500727  1875.31501677]
New Q values:  [53755.22029751  6367.89256635 25258.88444461  1875.31501677]
Reward: -1  Episode Reward:  15
xxxxx
x.g x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2670.58828129 -8521.23367799 61509.1747854  77270.208139  ]
------
Step:6, Action:West
State  273
Old Q Values:  [ 4637.12819031  -168.92307549 -2638.82077121  1283.96655679]
New Q values:  [ 4637.12819031  -168.92307549 -2638.82077121  1186.07413371]
Reward: -1  Episode Reward:  14
xxxxx
x..gx
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 2243.62503664  123.6214372 ]
------
Step:7, Action:East
State  257
Old Q Values:  [53755.22029751  6367.89256635 25258.88444461  1875.31501677]
New Q values:  [53755.22029751  6367.89256635 33284.01621954  1875.31501677]
Reward: -1  Episode Reward:  13
xxxxx
x.g x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2670.58828129 -8521.23367799 61509.1747854  77270.208139  ]
------
Step:8, Action:West
State  272
Old Q Values:  [ 2670.58828129 -8521.23367799 61509.1747854  77270.208139  ]
New Q values:  [ 2670.58828129 -8521.23367799 61509.1747854  39019.66745268]
Reward: -1  Episode Reward:  12
xxxxx
xg. x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -127.28874991 -5704.51612281 27040.61399026 -5679.36893145]
------
Step:9, Action:East
State  260
Old Q Values:  [ -127.28874991 -5704.51612281 27040.61399026 -5679.36893145]
New Q values:  [ -127.28874991 -5704.51612281 29268.39803172 -5679.36893145]
Reward: -1  Episode Reward:  11
xxxxx
x.. x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2670.58828129 -8521.23367799 61509.1747854  39019.66745268]
------
Step:10, Action:East
State  272
Old Q Values:  [ 2670.58828129 -8521.23367799 61509.1747854  39019.66745268]
New Q values:  [ 2670.58828129 -8521.23367799 25294.9964997  39019.66745268]
Reward: 9  Episode Reward:  20
xxxxx
x.. x
x. .x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2286.4219518  -6442.16912869 -8192.20126966  2011.80790039]
------
Step:11, Action:North
State  288
Old Q Values:  [ 2286.4219518  -6442.16912869 -8192.20126966  2011.80790039]
New Q values:  [ 4837.69814605 -6442.16912869 -8192.20126966  2011.80790039]
Reward: 9  Episode Reward:  29
xxxxx
x.. x
x. ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13059.09788443  1093.22762389   790.72804752  1050.85266124]
------
Step:12, Action:North
State  210
Old Q Values:  [13059.09788443  1093.22762389   790.72804752  1050.85266124]
New Q values:  [12033.49868705  1093.22762389   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  28
xxxxx
x..ax
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  6087.16496961  -180.00807518 22701.53177759]
------
Step:13, Action:West
State  130
Old Q Values:  [18222.84073342  6087.16496961  -180.00807518 22701.53177759]
New Q values:  [18222.84073342  6087.16496961  -180.00807518 35931.05615852]
Reward: 9  Episode Reward:  37
xxxxx
x.a x
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 89483.47815827]
------
Step:14, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1354.0668862   1113.89628218]
New Q values:  [ -253.44886264 -1902.20915811  1354.0668862    911.78750939]
Reward: 9  Episode Reward:  46
xxxxx
xa  x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1536.09665506  329.16520469 -252.78192178]
------
Step:15, Action:South
State  99
Old Q Values:  [    0.         53806.48035597 38863.4580325      0.        ]
New Q values:  [     0.        115810.1272872  38863.4580325      0.       ]
Reward: 100009  Episode Reward:  100055
xxxxx
x   x
xa gx
x   x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1354.0668862    911.78750939]
------
Step:1, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1354.0668862    911.78750939]
New Q values:  [ -253.44886264 -1902.20915811  1377.43757731   911.78750939]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.76803608e+03 -3.22965309e-01  8.01213984e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.76803608e+03 -3.22965309e-01  8.01213984e+02]
New Q values:  [ 8.43634063e+00  4.71666404e+03 -3.22965309e-01  8.01213984e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[12033.49868705  1093.22762389   790.72804752  1050.85266124]
------
Step:3, Action:North
State  210
Old Q Values:  [12033.49868705  1093.22762389   790.72804752  1050.85266124]
New Q values:  [6227.79868579 1093.22762389  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.71666404e+03 -3.22965309e-01  8.01213984e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  4.71666404e+03 -3.22965309e-01  8.01213984e+02]
New Q values:  [ 8.43634063e+00 -2.95573823e+03 -3.22965309e-01  8.01213984e+02]
Reward: -10001  Episode Reward:  -9994
xxxxx
x.  x
x..gx
x...x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 4637.12819031  -168.92307549 -2638.82077121  1186.07413371]
------
Step:1, Action:North
State  272
Old Q Values:  [ 2670.58828129 -8521.23367799 25294.9964997  39019.66745268]
New Q values:  [ 3122.55960007 -8521.23367799 25294.9964997  39019.66745268]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  6.82974763e+03  1.03161518e+03]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.47371742e+04 6.30415880e+03 1.27673579e+04]
New Q values:  [3.89777037e-01 2.47371742e+04 3.68525968e+03 1.27673579e+04]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3860.65385381  1740.00472018 -4584.50430574 -1713.91177491]
------
Step:3, Action:North
State  208
Old Q Values:  [ 3860.65385381  1740.00472018 -4584.50430574 -1713.91177491]
New Q values:  [ 1872.48984292  1740.00472018 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  27
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1076.09433797 -2383.80019164   160.8614149 ]
------
Step:4, Action:South
State  130
Old Q Values:  [18222.84073342  6087.16496961  -180.00807518 35931.05615852]
New Q values:  [18222.84073342  2996.01294072  -180.00807518 35931.05615852]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1872.48984292  1740.00472018 -4584.50430574 -1713.91177491]
------
Step:5, Action:North
State  208
Old Q Values:  [ 1872.48984292  1740.00472018 -4584.50430574 -1713.91177491]
New Q values:  [ 1071.22423856  1740.00472018 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  25
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1076.09433797 -2383.80019164   160.8614149 ]
------
Step:6, Action:South
State  136
Old Q Values:  [ -170.77177351  1076.09433797 -2383.80019164   160.8614149 ]
New Q values:  [ -170.77177351   951.83915124 -2383.80019164   160.8614149 ]
Reward: -1  Episode Reward:  24
xxxxx
xg. x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1071.22423856  1740.00472018 -4584.50430574 -1713.91177491]
------
Step:7, Action:South
State  208
Old Q Values:  [ 1071.22423856  1740.00472018 -4584.50430574 -1713.91177491]
New Q values:  [ 1071.22423856  2146.71133189 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4837.69814605 -6442.16912869 -8192.20126966  2011.80790039]
------
Step:8, Action:North
State  288
Old Q Values:  [ 4837.69814605 -6442.16912869 -8192.20126966  2011.80790039]
New Q values:  [ 2578.49265799 -6442.16912869 -8192.20126966  2011.80790039]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1071.22423856  2146.71133189 -4584.50430574 -1713.91177491]
------
Step:9, Action:South
State  208
Old Q Values:  [ 1071.22423856  2146.71133189 -4584.50430574 -1713.91177491]
New Q values:  [ 1071.22423856  1631.63233015 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  21
xxxxx
x.g x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2578.49265799 -6442.16912869 -8192.20126966  2011.80790039]
------
Step:10, Action:North
State  288
Old Q Values:  [ 2578.49265799 -6442.16912869 -8192.20126966  2011.80790039]
New Q values:  [ 1520.28676224 -6442.16912869 -8192.20126966  2011.80790039]
Reward: -1  Episode Reward:  20
xxxxx
xg. x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1071.22423856  1631.63233015 -4584.50430574 -1713.91177491]
------
Step:11, Action:South
State  208
Old Q Values:  [ 1071.22423856  1631.63233015 -4584.50430574 -1713.91177491]
New Q values:  [ 1071.22423856  1255.59530218 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1520.28676224 -6442.16912869 -8192.20126966  2011.80790039]
------
Step:12, Action:West
State  288
Old Q Values:  [ 1520.28676224 -6442.16912869 -8192.20126966  2011.80790039]
New Q values:  [ 1520.28676224 -6442.16912869 -8192.20126966 12510.02339596]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
x.  x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3122.55960007 -8521.23367799 25294.9964997  39019.66745268]
------
Step:13, Action:East
State  272
Old Q Values:  [ 3122.55960007 -8521.23367799 25294.9964997  39019.66745268]
New Q values:  [ 3122.55960007 -8521.23367799 13870.40561867 39019.66745268]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1520.28676224 -6442.16912869 -8192.20126966 12510.02339596]
------
Step:14, Action:West
State  288
Old Q Values:  [ 1520.28676224 -6442.16912869 -8192.20126966 12510.02339596]
New Q values:  [ 1520.28676224 -6442.16912869 -8192.20126966 16709.30959419]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3122.55960007 -8521.23367799 13870.40561867 39019.66745268]
------
Step:15, Action:West
State  272
Old Q Values:  [ 3122.55960007 -8521.23367799 13870.40561867 39019.66745268]
New Q values:  [ 3122.55960007 -8521.23367799 13870.40561867 25766.83882106]
Reward: 9  Episode Reward:  25
xxxxx
x.. x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[33845.23946664 10195.91087381 10063.72056248   644.94785455]
------
Step:16, Action:South
State  257
Old Q Values:  [53755.22029751  6367.89256635 33284.01621954  1875.31501677]
New Q values:  [53755.22029751 18493.12311579 33284.01621954  1875.31501677]
Reward: -301  Episode Reward:  -276
xxxxx
x.. x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[53755.22029751 18493.12311579 33284.01621954  1875.31501677]
------
Step:17, Action:North
State  257
Old Q Values:  [53755.22029751 18493.12311579 33284.01621954  1875.31501677]
New Q values:  [26569.27978506 18493.12311579 33284.01621954  1875.31501677]
Reward: 9  Episode Reward:  -267
xxxxx
x.. x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[16872.63888686 16101.90751562  6535.7653539      0.        ]
------
Step:18, Action:North
State  181
Old Q Values:  [1193.41855571  445.64685634 1392.90939788  262.76946019]
New Q values:  [1565.42358863  445.64685634 1392.90939788  262.76946019]
Reward: 9  Episode Reward:  -258
xxxxx
xa. x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 3608.85388783  238.35800069    0.        ]
------
Step:19, Action:South
State  103
Old Q Values:  [ 221.30610858 3608.85388783  238.35800069    0.        ]
New Q values:  [ 221.30610858 1912.56863172  238.35800069    0.        ]
Reward: -1  Episode Reward:  -259
xxxxx
x . x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1565.42358863  445.64685634 1392.90939788  262.76946019]
------
Step:20, Action:North
State  181
Old Q Values:  [1565.42358863  445.64685634 1392.90939788  262.76946019]
New Q values:  [1744.96866691  445.64685634 1392.90939788  262.76946019]
Reward: -1  Episode Reward:  -260
xxxxx
xa.gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         3731.33077154    0.            0.        ]
------
Step:21, Action:South
State  103
Old Q Values:  [ 221.30610858 1912.56863172  238.35800069    0.        ]
New Q values:  [ 221.30610858 1287.91805276  238.35800069    0.        ]
Reward: -1  Episode Reward:  -261
xxxxx
x . x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1744.96866691  445.64685634 1392.90939788  262.76946019]
------
Step:22, Action:North
State  181
Old Q Values:  [1744.96866691  445.64685634 1392.90939788  262.76946019]
New Q values:  [1083.76288259  445.64685634 1392.90939788  262.76946019]
Reward: -1  Episode Reward:  -262
xxxxx
xa. x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1287.91805276  238.35800069    0.        ]
------
Step:23, Action:South
State  103
Old Q Values:  [ 221.30610858 1287.91805276  238.35800069    0.        ]
New Q values:  [ 221.30610858 1387.67166975  238.35800069    0.        ]
Reward: -1  Episode Reward:  -263
xxxxx
x . x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1386.80134547 2335.80779431 2910.34816214  358.5166536 ]
------
Step:24, Action:East
State  181
Old Q Values:  [1083.76288259  445.64685634 1392.90939788  262.76946019]
New Q values:  [1083.76288259  445.64685634 1977.71602231  262.76946019]
Reward: -10001  Episode Reward:  -10264
xxxxx
x . x
x g x
x   x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 2243.62503664  123.6214372 ]
------
Step:1, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 2243.62503664  123.6214372 ]
New Q values:  [1668.07688645   26.73544252 2632.90166098  123.6214372 ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x...x
x g x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -2973.94429733   340.32246615  -180.6       ]
------
Step:1, Action:East
State  110
Old Q Values:  [ -239.29051573 -2973.94429733   340.32246615  -180.6       ]
New Q values:  [ -239.29051573 -2973.94429733   359.72346644  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   727.31493328   439.63204275]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   518.13733431   203.9972931 ]
New Q values:  [-9594.56523706 -8069.05606225   498.20667909   203.9972931 ]
Reward: 9  Episode Reward:  18
xxxxx
xg ax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   951.83915124 -2383.80019164   160.8614149 ]
------
Step:3, Action:South
State  138
Old Q Values:  [ 8.43634063e+00 -2.95573823e+03 -3.22965309e-01  8.01213984e+02]
New Q values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  8.01213984e+02]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1219.14134868 -1606.42971403 -8896.20691497 -4124.36403175]
------
Step:4, Action:North
State  210
Old Q Values:  [6227.79868579 1093.22762389  790.72804752 1050.85266124]
New Q values:  [2730.88366952 1093.22762389  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  8.01213984e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  8.01213984e+02]
New Q values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  5.38080074e+02]
Reward: -1  Episode Reward:  25
xxxxx
x a x
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   727.31493328   439.63204275]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   727.31493328   439.63204275]
New Q values:  [ -281.736      -1150.91067548   451.74999539   439.63204275]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  5.38080074e+02]
------
Step:7, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  5.38080074e+02]
New Q values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  6.27863303e+02]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1377.43757731   911.78750939]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   451.74999539   439.63204275]
New Q values:  [ -281.736      -1150.91067548   368.45898895   439.63204275]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  6.27863303e+02]
------
Step:9, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  6.27863303e+02]
New Q values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  3.82434934e+02]
Reward: -1  Episode Reward:  21
xxxxx
x a x
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   368.45898895   439.63204275]
------
Step:10, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   498.20667909   203.9972931 ]
New Q values:  [-9594.56523706 -8069.05606225   498.20667909 -4790.28582218]
Reward: -10001  Episode Reward:  -9980
xxxxx
xg  x
x.. x
x ..x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 4637.12819031  -168.92307549 -2638.82077121  1186.07413371]
------
Step:1, Action:North
State  272
Old Q Values:  [ 3122.55960007 -8521.23367799 13870.40561867 25766.83882106]
New Q values:  [ 3303.34812759 -8521.23367799 13870.40561867 25766.83882106]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  6.82974763e+03  1.03161518e+03]
------
Step:2, Action:East
State  195
Old Q Values:  [  38.85388605 4182.6894674  2069.36982143 1169.39963074]
New Q values:  [  38.85388605 4182.6894674  1652.41302943 1169.39963074]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2730.88366952 1093.22762389  790.72804752 1050.85266124]
------
Step:3, Action:North
State  210
Old Q Values:  [2730.88366952 1093.22762389  790.72804752 1050.85266124]
New Q values:  [11877.07031536  1093.22762389   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  2996.01294072  -180.00807518 35931.05615852]
------
Step:4, Action:West
State  130
Old Q Values:  [18222.84073342  2996.01294072  -180.00807518 35931.05615852]
New Q values:  [18222.84073342  2996.01294072  -180.00807518 41222.86591089]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 89483.47815827]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   368.45898895   439.63204275]
New Q values:  [ -281.736      -1150.91067548   368.45898895   298.8662767 ]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   392.04486532  -180.6       ]
------
Step:6, Action:East
State  107
Old Q Values:  [-252.35169558 1536.09665506  329.16520469 -252.78192178]
New Q values:  [-252.35169558 1536.09665506  241.60377856 -252.78192178]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   368.45898895   298.8662767 ]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   368.45898895   298.8662767 ]
New Q values:  [ -281.736      -1150.91067548   261.51407574   298.8662767 ]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  3.82434934e+02]
------
Step:8, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  3.82434934e+02]
New Q values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  5.65605247e+02]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1377.43757731   911.78750939]
------
Step:9, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   261.51407574   298.8662767 ]
New Q values:  [ -281.736      -1150.91067548   273.68720432   298.8662767 ]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  5.65605247e+02]
------
Step:10, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  5.65605247e+02]
New Q values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  3.15301982e+02]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   273.68720432   298.8662767 ]
------
Step:11, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   273.68720432   298.8662767 ]
New Q values:  [ -281.736      -1150.91067548   273.68720432   236.55997028]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   392.04486532  -180.6       ]
------
Step:12, Action:East
State  106
Old Q Values:  [ -180.6        -7710.46911494   392.04486532  -180.6       ]
New Q values:  [ -180.6        -7710.46911494   238.32410743  -180.6       ]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   273.68720432   236.55997028]
------
Step:13, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   273.68720432   236.55997028]
New Q values:  [ -281.736      -1150.91067548   203.46547624   236.55997028]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  3.15301982e+02]
------
Step:14, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  3.15301982e+02]
New Q values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  1.96488784e+02]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   203.46547624   236.55997028]
------
Step:15, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   498.20667909 -4790.28582218]
New Q values:  [-9594.56523706 -8069.05606225   498.20667909 -1733.91509406]
Reward: -1  Episode Reward:  35
xxxxx
xag x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         609.33078271   86.99637671    0.        ]
------
Step:16, Action:South
State  107
Old Q Values:  [-252.35169558 1536.09665506  241.60377856 -252.78192178]
New Q values:  [-252.35169558  840.1494659   241.60377856 -252.78192178]
Reward: 9  Episode Reward:  44
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 734.36934626    0.         -874.56082707 -178.98      ]
------
Step:17, Action:North
State  184
Old Q Values:  [ 264.76160535    0.         5605.67275206    0.        ]
New Q values:  [ 176.80187437    0.         5605.67275206    0.        ]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   238.32410743  -180.6       ]
------
Step:18, Action:East
State  104
Old Q Values:  [-8652.84        4221.40217736   272.1151506  -8652.84      ]
New Q values:  [-8652.84        4221.40217736   257.70806397 -8652.84      ]
Reward: -1  Episode Reward:  42
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   498.20667909 -1733.91509406]
------
Step:19, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   203.46547624   236.55997028]
New Q values:  [ -281.736      -1150.91067548   139.73282563   236.55997028]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  1.96488784e+02]
------
Step:20, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  1.96488784e+02]
New Q values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  1.48963505e+02]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   139.73282563   236.55997028]
------
Step:21, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   498.20667909 -1733.91509406]
New Q values:  [-9594.56523706 -8069.05606225   498.20667909  -511.36680281]
Reward: -1  Episode Reward:  39
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         609.33078271   86.99637671    0.        ]
------
Step:22, Action:South
State  104
Old Q Values:  [-8652.84        4221.40217736   257.70806397 -8652.84      ]
New Q values:  [-8652.84        3369.66269656   257.70806397 -8652.84      ]
Reward: -1  Episode Reward:  38
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[ 176.80187437    0.         5605.67275206    0.        ]
------
Step:23, Action:East
State  184
Old Q Values:  [ 176.80187437    0.         5605.67275206    0.        ]
New Q values:  [ 176.80187437    0.         2998.45602956    0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  2522.62309578 1060.81190283  568.38654082]
------
Step:24, Action:South
State  200
Old Q Values:  [  62.8218634  2522.62309578 1060.81190283  568.38654082]
New Q values:  [  62.8218634  8738.50088463 1060.81190283  568.38654082]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3303.34812759 -8521.23367799 13870.40561867 25766.83882106]
------
Step:25, Action:West
State  273
Old Q Values:  [ 4637.12819031  -168.92307549 -2638.82077121  1186.07413371]
New Q values:  [ 4637.12819031  -168.92307549 -2638.82077121 70465.03451935]
Reward: 100009  Episode Reward:  100045
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   498.20667909  -511.36680281]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   498.20667909  -511.36680281]
New Q values:  [-9594.56523706 -8069.05606225   490.23441701  -511.36680281]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   951.83915124 -2383.80019164   160.8614149 ]
------
Step:2, Action:South
State  136
Old Q Values:  [ -170.77177351   951.83915124 -2383.80019164   160.8614149 ]
New Q values:  [ -170.77177351   751.8780651  -2383.80019164   160.8614149 ]
Reward: 9  Episode Reward:  18
xxxxx
x  gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1219.14134868 -1606.42971403 -8896.20691497 -4124.36403175]
------
Step:3, Action:South
State  208
Old Q Values:  [ 1071.22423856  1255.59530218 -4584.50430574 -1713.91177491]
New Q values:  [ 1071.22423856  5520.43099913 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1520.28676224 -6442.16912869 -8192.20126966 16709.30959419]
------
Step:4, Action:West
State  288
Old Q Values:  [ 1520.28676224 -6442.16912869 -8192.20126966 16709.30959419]
New Q values:  [ 1520.28676224 -6442.16912869 -8192.20126966 27828.63419348]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 4637.12819031  -168.92307549 -2638.82077121 70465.03451935]
------
Step:5, Action:West
State  272
Old Q Values:  [ 3303.34812759 -8521.23367799 13870.40561867 25766.83882106]
New Q values:  [ 3303.34812759 -8521.23367799 13870.40561867 20297.34039429]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[26569.27978506 18493.12311579 33284.01621954  1875.31501677]
------
Step:6, Action:North
State  257
Old Q Values:  [26569.27978506 18493.12311579 33284.01621954  1875.31501677]
New Q values:  [44915.24705884 18493.12311579 33284.01621954  1875.31501677]
Reward: 9  Episode Reward:  54
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738 114273.78381604      0.        ]
------
Step:7, Action:North
State  183
Old Q Values:  [1386.80134547 2335.80779431 2910.34816214  358.5166536 ]
New Q values:  [1841.01681811 2335.80779431 2910.34816214  358.5166536 ]
Reward: -1  Episode Reward:  53
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4289.6542664   520.46511977 -120.29354603]
------
Step:8, Action:South
State  110
Old Q Values:  [ -239.29051573 -2973.94429733   359.72346644  -180.6       ]
New Q values:  [-239.29051573  357.83179526  359.72346644 -180.6       ]
Reward: -1  Episode Reward:  52
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  5160.03171396     0.        ]
------
Step:9, Action:East
State  179
Old Q Values:  [16872.63888686 16101.90751562  6535.7653539      0.        ]
New Q values:  [16872.63888686 16101.90751562 64668.63042912     0.        ]
Reward: 100009  Episode Reward:  100061
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3303.34812759 -8521.23367799 13870.40561867 20297.34039429]
------
Step:1, Action:East
State  272
Old Q Values:  [ 3303.34812759 -8521.23367799 13870.40561867 20297.34039429]
New Q values:  [ 3303.34812759 -8521.23367799 13902.15250551 20297.34039429]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1520.28676224 -6442.16912869 -8192.20126966 27828.63419348]
------
Step:2, Action:West
State  288
Old Q Values:  [ 1520.28676224 -6442.16912869 -8192.20126966 27828.63419348]
New Q values:  [ 1520.28676224 -6442.16912869 -8192.20126966 17220.05579568]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x...x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3303.34812759 -8521.23367799 13902.15250551 20297.34039429]
------
Step:3, Action:East
State  272
Old Q Values:  [ 3303.34812759 -8521.23367799 13902.15250551 20297.34039429]
New Q values:  [ 3303.34812759 -8521.23367799 10726.27774091 20297.34039429]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1520.28676224 -6442.16912869 -8192.20126966 17220.05579568]
------
Step:4, Action:West
State  288
Old Q Values:  [ 1520.28676224 -6442.16912869 -8192.20126966 17220.05579568]
New Q values:  [ 1520.28676224 -6442.16912869 -8192.20126966 12976.62443656]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x...x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3303.34812759 -8521.23367799 10726.27774091 20297.34039429]
------
Step:5, Action:East
State  272
Old Q Values:  [ 3303.34812759 -8521.23367799 10726.27774091 20297.34039429]
New Q values:  [ 3303.34812759 -8521.23367799  8182.89842733 20297.34039429]
Reward: -1  Episode Reward:  5
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1520.28676224 -6442.16912869 -8192.20126966 12976.62443656]
------
Step:6, Action:West
State  288
Old Q Values:  [ 1520.28676224 -6442.16912869 -8192.20126966 12976.62443656]
New Q values:  [ 1520.28676224 -6442.16912869 -8192.20126966 11279.25189291]
Reward: -1  Episode Reward:  4
xxxxx
xg..x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3303.34812759 -8521.23367799  8182.89842733 20297.34039429]
------
Step:7, Action:West
State  272
Old Q Values:  [ 3303.34812759 -8521.23367799  8182.89842733 20297.34039429]
New Q values:  [ 3303.34812759 -8521.23367799  8182.89842733 16898.85556723]
Reward: -1  Episode Reward:  3
xxxxx
x...x
xg..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -127.28874991 -5704.51612281 29268.39803172 -5679.36893145]
------
Step:8, Action:East
State  260
Old Q Values:  [ -127.28874991 -5704.51612281 29268.39803172 -5679.36893145]
New Q values:  [ -127.28874991 -5704.51612281 16776.41588286 -5679.36893145]
Reward: -1  Episode Reward:  2
xxxxx
x...x
x...x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3303.34812759 -8521.23367799  8182.89842733 16898.85556723]
------
Step:9, Action:East
State  272
Old Q Values:  [ 3303.34812759 -8521.23367799  8182.89842733 16898.85556723]
New Q values:  [ 3303.34812759 -8521.23367799  6656.3349388  16898.85556723]
Reward: -1  Episode Reward:  1
xxxxx
x...x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1520.28676224 -6442.16912869 -8192.20126966 11279.25189291]
------
Step:10, Action:North
State  288
Old Q Values:  [ 1520.28676224 -6442.16912869 -8192.20126966 11279.25189291]
New Q values:  [ 4176.63579951 -6442.16912869 -8192.20126966 11279.25189291]
Reward: 9  Episode Reward:  10
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11877.07031536  1093.22762389   790.72804752  1050.85266124]
------
Step:11, Action:North
State  208
Old Q Values:  [ 1071.22423856  5520.43099913 -4584.50430574 -1713.91177491]
New Q values:  [12800.74946869  5520.43099913 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  19
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  2996.01294072  -180.00807518 41222.86591089]
------
Step:12, Action:West
State  130
Old Q Values:  [18222.84073342  2996.01294072  -180.00807518 41222.86591089]
New Q values:  [18222.84073342  2996.01294072  -180.00807518 43339.58981184]
Reward: 9  Episode Reward:  28
xxxxx
x.a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 89483.47815827]
------
Step:13, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   139.73282563   236.55997028]
New Q values:  [ -281.736      -1150.91067548   139.73282563   207.94102804]
Reward: 9  Episode Reward:  37
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  357.83179526  359.72346644 -180.6       ]
------
Step:14, Action:East
State  104
Old Q Values:  [-8652.84        3369.66269656   257.70806397 -8652.84      ]
New Q values:  [-8652.84        3369.66269656   249.55355069 -8652.84      ]
Reward: -1  Episode Reward:  36
xxxxx
xga x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   490.23441701  -511.36680281]
------
Step:15, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   139.73282563   207.94102804]
New Q values:  [ -281.736      -1150.91067548    99.98218163   207.94102804]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  1.48963505e+02]
------
Step:16, Action:West
State  136
Old Q Values:  [ -170.77177351   751.8780651  -2383.80019164   160.8614149 ]
New Q values:  [ -170.77177351   751.8780651  -2383.80019164   210.81489106]
Reward: -1  Episode Reward:  34
xxxxx
xga x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   490.23441701  -511.36680281]
------
Step:17, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548    99.98218163   207.94102804]
New Q values:  [ -281.736      -1150.91067548    84.08192403   207.94102804]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  1.48963505e+02]
------
Step:18, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  1.48963505e+02]
New Q values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  1.21367710e+02]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548    84.08192403   207.94102804]
------
Step:19, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548    84.08192403   207.94102804]
New Q values:  [ -281.736      -1150.91067548    84.08192403   190.49345115]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  357.83179526  359.72346644 -180.6       ]
------
Step:20, Action:East
State  107
Old Q Values:  [-252.35169558  840.1494659   241.60377856 -252.78192178]
New Q values:  [-252.35169558  840.1494659   153.18954677 -252.78192178]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548    84.08192403   190.49345115]
------
Step:21, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   490.23441701  -511.36680281]
New Q values:  [-9594.56523706 -8069.05606225   490.23441701   -22.34748631]
Reward: -1  Episode Reward:  29
xxxxx
xag x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         609.33078271   86.99637671    0.        ]
------
Step:22, Action:South
State  108
Old Q Values:  [-8463.16477134  3762.38420191   196.50327584     0.        ]
New Q values:  [-8463.16477134  3774.35594955   196.50327584     0.        ]
Reward: 9  Episode Reward:  38
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  150.2741814   7546.67422929  5900.24740645 -4966.32149798]
------
Step:23, Action:South
State  180
Old Q Values:  [  150.2741814   7546.67422929  5900.24740645 -4966.32149798]
New Q values:  [  150.2741814   8050.99445658  5900.24740645 -4966.32149798]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -127.28874991 -5704.51612281 16776.41588286 -5679.36893145]
------
Step:24, Action:East
State  260
Old Q Values:  [ -127.28874991 -5704.51612281 16776.41588286 -5679.36893145]
New Q values:  [ -127.28874991 -5704.51612281 11779.62302331 -5679.36893145]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3303.34812759 -8521.23367799  6656.3349388  16898.85556723]
------
Step:25, Action:East
State  272
Old Q Values:  [ 3303.34812759 -8521.23367799  6656.3349388  16898.85556723]
New Q values:  [ 3303.34812759 -8521.23367799  6045.70954339 16898.85556723]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4176.63579951 -6442.16912869 -8192.20126966 11279.25189291]
------
Step:26, Action:North
State  288
Old Q Values:  [ 4176.63579951 -6442.16912869 -8192.20126966 11279.25189291]
New Q values:  [ 5233.17541441 -6442.16912869 -8192.20126966 11279.25189291]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11877.07031536  1093.22762389   790.72804752  1050.85266124]
------
Step:27, Action:North
State  210
Old Q Values:  [11877.07031536  1093.22762389   790.72804752  1050.85266124]
New Q values:  [4786.63843922 1093.22762389  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  1.21367710e+02]
------
Step:28, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  1.21367710e+02]
New Q values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  4.61178357e+02]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1377.43757731   911.78750939]
------
Step:29, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1377.43757731   911.78750939]
New Q values:  [ -253.44886264 -1902.20915811   688.72853811   911.78750939]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  4.61178357e+02]
------
Step:30, Action:West
State  136
Old Q Values:  [ -170.77177351   751.8780651  -2383.80019164   210.81489106]
New Q values:  [ -170.77177351   751.8780651  -2383.80019164   208.34220754]
Reward: -1  Episode Reward:  30
xxxxx
x agx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:31, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   688.72853811   911.78750939]
New Q values:  [ -253.44886264 -1902.20915811   688.72853811  1651.01128368]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4289.6542664   520.46511977 -120.29354603]
------
Step:32, Action:South
State  111
Old Q Values:  [-177.44732869 4289.6542664   520.46511977 -120.29354603]
New Q values:  [-177.44732869 2588.3661552   520.46511977 -120.29354603]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1841.01681811 2335.80779431 2910.34816214  358.5166536 ]
------
Step:33, Action:East
State  179
Old Q Values:  [16872.63888686 16101.90751562 64668.63042912     0.        ]
New Q values:  [16872.63888686 16101.90751562 87921.77645921     0.        ]
Reward: 100009  Episode Reward:  100037
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2588.3661552   520.46511977 -120.29354603]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094  1695.1600898  -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1276.77884261 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1083.76288259  445.64685634 1977.71602231  262.76946019]
------
Step:2, Action:East
State  180
Old Q Values:  [  150.2741814   8050.99445658  5900.24740645 -4966.32149798]
New Q values:  [  150.2741814   8050.99445658  9168.99283315 -4966.32149798]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2911.77257695 22698.31290191   492.34934406]
------
Step:3, Action:East
State  200
Old Q Values:  [  62.8218634  8738.50088463 1060.81190283  568.38654082]
New Q values:  [  62.8218634  8738.50088463  795.46716574  568.38654082]
Reward: 9  Episode Reward:  17
xxxxx
x g.x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1219.14134868 -1606.42971403 -8896.20691497 -4124.36403175]
------
Step:4, Action:North
State  216
Old Q Values:  [ 1219.14134868 -1606.42971403 -8896.20691497 -4124.36403175]
New Q values:  [-5281.380041   -1606.42971403 -8896.20691497 -4124.36403175]
Reward: -9991  Episode Reward:  -9974
xxxxx
x .gx
x   x
x...x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3303.34812759 -8521.23367799  6045.70954339 16898.85556723]
------
Step:1, Action:West
State  276
Old Q Values:  [ 4445.87885529 -5807.06396197   855.18004824  3094.15708599]
New Q values:  [ 4445.87885529 -5807.06396197   855.18004824  4776.94974139]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -127.28874991 -5704.51612281 11779.62302331 -5679.36893145]
------
Step:2, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 2632.90166098  123.6214372 ]
New Q values:  [1668.07688645   26.73544252 6122.21733456  123.6214372 ]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3303.34812759 -8521.23367799  6045.70954339 16898.85556723]
------
Step:3, Action:West
State  273
Old Q Values:  [ 4637.12819031  -168.92307549 -2638.82077121 70465.03451935]
New Q values:  [ 4637.12819031  -168.92307549 -2638.82077121 30022.07900811]
Reward: -1  Episode Reward:  7
xxxxx
x. gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 6122.21733456  123.6214372 ]
------
Step:4, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 6122.21733456  123.6214372 ]
New Q values:  [ 1668.07688645    26.73544252 11454.91063626   123.6214372 ]
Reward: -1  Episode Reward:  6
xxxxx
x. .x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 4637.12819031  -168.92307549 -2638.82077121 30022.07900811]
------
Step:5, Action:West
State  273
Old Q Values:  [ 4637.12819031  -168.92307549 -2638.82077121 30022.07900811]
New Q values:  [ 4637.12819031  -168.92307549 -2638.82077121 15444.70479412]
Reward: -1  Episode Reward:  5
xxxxx
x. .x
x...x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1668.07688645    26.73544252 11454.91063626   123.6214372 ]
------
Step:6, Action:East
State  261
Old Q Values:  [ 1668.07688645    26.73544252 11454.91063626   123.6214372 ]
New Q values:  [1668.07688645   26.73544252 3651.02092467  123.6214372 ]
Reward: -10001  Episode Reward:  -9996
xxxxx
x. .x
x...x
x g.x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 3651.02092467  123.6214372 ]
------
Step:1, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 3651.02092467  123.6214372 ]
New Q values:  [1668.07688645   26.73544252 6099.2198081   123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 4637.12819031  -168.92307549 -2638.82077121 15444.70479412]
------
Step:2, Action:West
State  273
Old Q Values:  [ 4637.12819031  -168.92307549 -2638.82077121 15444.70479412]
New Q values:  [ 4637.12819031  -168.92307549 -2638.82077121  8007.04786008]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 6099.2198081   123.6214372 ]
------
Step:3, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 6099.2198081   123.6214372 ]
New Q values:  [1668.07688645   26.73544252 3872.17284566  123.6214372 ]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4445.87885529 -5807.06396197   855.18004824  4776.94974139]
------
Step:4, Action:West
State  272
Old Q Values:  [ 3303.34812759 -8521.23367799  6045.70954339 16898.85556723]
New Q values:  [ 3303.34812759 -8521.23367799  6045.70954339  7920.59408059]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 3872.17284566  123.6214372 ]
------
Step:5, Action:East
State  260
Old Q Values:  [ -127.28874991 -5704.51612281 11779.62302331 -5679.36893145]
New Q values:  [ -127.28874991 -5704.51612281  6144.33413174 -5679.36893145]
Reward: -1  Episode Reward:  5
xxxxx
x...x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4445.87885529 -5807.06396197   855.18004824  4776.94974139]
------
Step:6, Action:West
State  272
Old Q Values:  [ 3303.34812759 -8521.23367799  6045.70954339  7920.59408059]
New Q values:  [ 3303.34812759 -8521.23367799  6045.70954339  -989.06212824]
Reward: -10001  Episode Reward:  -9996
xxxxx
x...x
x. .x
xg .x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 4637.12819031  -168.92307549 -2638.82077121  8007.04786008]
------
Step:1, Action:West
State  273
Old Q Values:  [ 4637.12819031  -168.92307549 -2638.82077121  8007.04786008]
New Q values:  [ 4637.12819031  -168.92307549 -2638.82077121  4369.87099773]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 3872.17284566  123.6214372 ]
------
Step:2, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 3872.17284566  123.6214372 ]
New Q values:  [1668.07688645   26.73544252 3361.98200128  123.6214372 ]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3303.34812759 -8521.23367799  6045.70954339  -989.06212824]
------
Step:3, Action:East
State  272
Old Q Values:  [ 3303.34812759 -8521.23367799  6045.70954339  -989.06212824]
New Q values:  [ 3303.34812759 -8521.23367799  5807.45938523  -989.06212824]
Reward: 9  Episode Reward:  17
xxxxx
x.g x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5233.17541441 -6442.16912869 -8192.20126966 11279.25189291]
------
Step:4, Action:West
State  288
Old Q Values:  [ 5233.17541441 -6442.16912869 -8192.20126966 11279.25189291]
New Q values:  [ 5233.17541441 -6442.16912869 -8192.20126966  6253.33857273]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x.g.x
x a x
xxxxx
Step:5, Action:North
State  272
Old Q Values:  [ 3303.34812759 -8521.23367799  5807.45938523  -989.06212824]
New Q values:  [-2937.0229334  -8521.23367799  5807.45938523  -989.06212824]
Reward: -10001  Episode Reward:  -9985
xxxxx
x.. x
x...x
x g x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 4637.12819031  -168.92307549 -2638.82077121  4369.87099773]
------
Step:1, Action:North
State  273
Old Q Values:  [ 4637.12819031  -168.92307549 -2638.82077121  4369.87099773]
New Q values:  [ 4747.94984     -168.92307549 -2638.82077121  4369.87099773]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9625.66187958 -2072.09364767  1460.9765133 ]
------
Step:2, Action:South
State  195
Old Q Values:  [  38.85388605 4182.6894674  1652.41302943 1169.39963074]
New Q values:  [  38.85388605 3096.86073896 1652.41302943 1169.39963074]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 4747.94984     -168.92307549 -2638.82077121  4369.87099773]
------
Step:3, Action:North
State  272
Old Q Values:  [-2937.0229334  -8521.23367799  5807.45938523  -989.06212824]
New Q values:  [  873.5151142  -8521.23367799  5807.45938523  -989.06212824]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x.a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  6.82974763e+03  1.03161518e+03]
------
Step:4, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  6.82974763e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  4.17329058e+03  1.03161518e+03]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4786.63843922 1093.22762389  790.72804752 1050.85266124]
------
Step:5, Action:North
State  210
Old Q Values:  [4786.63843922 1093.22762389  790.72804752 1050.85266124]
New Q values:  [14921.93231924  1093.22762389   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  25
xxxxx
x..ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  2996.01294072  -180.00807518 43339.58981184]
------
Step:6, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  4.61178357e+02]
New Q values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  6.85174728e+02]
Reward: 9  Episode Reward:  34
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   688.72853811  1651.01128368]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548    84.08192403   190.49345115]
New Q values:  [ -281.736      -1150.91067548    84.08192403   333.64222023]
Reward: 9  Episode Reward:  43
xxxxx
xa  x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  840.1494659   153.18954677 -252.78192178]
------
Step:8, Action:South
State  107
Old Q Values:  [-252.35169558  840.1494659   153.18954677 -252.78192178]
New Q values:  [-252.35169558 1189.83221522  153.18954677 -252.78192178]
Reward: 9  Episode Reward:  52
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         2827.90809619    0.        ]
------
Step:9, Action:East
State  185
Old Q Values:  [ 734.36934626    0.         -874.56082707 -178.98      ]
New Q values:  [ 734.36934626    0.          225.67357464 -178.98      ]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  1.92032635e+03 -5.74006972e+03  2.00341972e+02]
------
Step:10, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  1.92032635e+03 -5.74006972e+03  2.00341972e+02]
New Q values:  [ 2.33354578e+00  2.19191549e+03 -5.74006972e+03  2.00341972e+02]
Reward: -1  Episode Reward:  50
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 4747.94984     -168.92307549 -2638.82077121  4369.87099773]
------
Step:11, Action:North
State  273
Old Q Values:  [ 4747.94984     -168.92307549 -2638.82077121  4369.87099773]
New Q values:  [ 2556.15458379  -168.92307549 -2638.82077121  4369.87099773]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  2.19191549e+03 -5.74006972e+03  2.00341972e+02]
------
Step:12, Action:South
State  203
Old Q Values:  [3.60604218e+00 5.44407350e+03 3.50642584e+03 9.06816004e+03]
New Q values:  [3.60604218e+00 3.48799070e+03 3.50642584e+03 9.06816004e+03]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 2556.15458379  -168.92307549 -2638.82077121  4369.87099773]
------
Step:13, Action:West
State  272
Old Q Values:  [  873.5151142  -8521.23367799  5807.45938523  -989.06212824]
New Q values:  [  873.5151142  -8521.23367799  5807.45938523 73084.34926635]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  873.5151142  -8521.23367799  5807.45938523 73084.34926635]
------
Step:1, Action:West
State  276
Old Q Values:  [ 4445.87885529 -5807.06396197   855.18004824  4776.94974139]
New Q values:  [ 4445.87885529 -5807.06396197   855.18004824  3759.48013608]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -127.28874991 -5704.51612281  6144.33413174 -5679.36893145]
------
Step:2, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 3361.98200128  123.6214372 ]
New Q values:  [1668.07688645   26.73544252 2677.9564571   123.6214372 ]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4445.87885529 -5807.06396197   855.18004824  3759.48013608]
------
Step:3, Action:North
State  273
Old Q Values:  [ 2556.15458379  -168.92307549 -2638.82077121  4369.87099773]
New Q values:  [ 3915.56039739  -168.92307549 -2638.82077121  4369.87099773]
Reward: 9  Episode Reward:  17
xxxxx
x..gx
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9625.66187958 -2072.09364767  1460.9765133 ]
------
Step:4, Action:South
State  196
Old Q Values:  [-2469.90645144  2911.77257695 22698.31290191   492.34934406]
New Q values:  [-2469.90645144  2497.87268737 22698.31290191   492.34934406]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4445.87885529 -5807.06396197   855.18004824  3759.48013608]
------
Step:5, Action:North
State  276
Old Q Values:  [ 4445.87885529 -5807.06396197   855.18004824  3759.48013608]
New Q values:  [ 8587.24541269 -5807.06396197   855.18004824  3759.48013608]
Reward: -1  Episode Reward:  15
xxxxx
xg..x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2497.87268737 22698.31290191   492.34934406]
------
Step:6, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.47371742e+04 3.68525968e+03 1.27673579e+04]
New Q values:  [3.89777037e-01 2.47371742e+04 5.31972871e+03 1.27673579e+04]
Reward: 9  Episode Reward:  24
xxxxx
x...x
xg ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12800.74946869  5520.43099913 -4584.50430574 -1713.91177491]
------
Step:7, Action:North
State  208
Old Q Values:  [12800.74946869  5520.43099913 -4584.50430574 -1713.91177491]
New Q values:  [41317.8717282   5520.43099913 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  33
xxxxx
xg.ax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 11374.93691792  10380.76024543  -8652.84       120640.57313574]
------
Step:8, Action:West
State  128
Old Q Values:  [ 11374.93691792  10380.76024543  -8652.84       120640.57313574]
New Q values:  [11374.93691792 10380.76024543 -8652.84       75546.55817317]
Reward: -9991  Episode Reward:  -9958
xxxxx
x.g x
x   x
x  .x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548    84.08192403   333.64222023]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548    84.08192403   333.64222023]
New Q values:  [ -281.736      -1150.91067548    84.08192403   246.77392803]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  357.83179526  359.72346644 -180.6       ]
------
Step:2, Action:East
State  110
Old Q Values:  [-239.29051573  357.83179526  359.72346644 -180.6       ]
New Q values:  [-239.29051573  357.83179526  217.32156498 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548    84.08192403   246.77392803]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548    84.08192403   246.77392803]
New Q values:  [ -281.736      -1150.91067548    84.08192403   455.05923578]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1189.83221522  153.18954677 -252.78192178]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 2588.3661552   520.46511977 -120.29354603]
New Q values:  [-177.44732869 1913.85091073  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  16
xxxxx
x  .x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1841.01681811 2335.80779431 2910.34816214  358.5166536 ]
------
Step:5, Action:East
State  189
Old Q Values:  [  275.08817949  1905.69125353 11424.95354841   154.04646645]
New Q values:  [ 275.08817949 1905.69125353 5226.95606715  154.04646645]
Reward: -1  Episode Reward:  15
xxxxx
x  .x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  2.19191549e+03 -5.74006972e+03  2.00341972e+02]
------
Step:6, Action:South
State  197
Old Q Values:  [-5833.78831344   555.86236402 -4510.80210702   403.06255908]
New Q values:  [-5833.78831344  1710.07873061 -4510.80210702   403.06255908]
Reward: 9  Episode Reward:  24
xxxxx
x  gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 4.94111262e+03 3.86565271e+02]
------
Step:7, Action:East
State  273
Old Q Values:  [ 3915.56039739  -168.92307549 -2638.82077121  4369.87099773]
New Q values:  [3915.56039739 -168.92307549  825.87326334 4369.87099773]
Reward: 9  Episode Reward:  33
xxxxx
x  .x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5233.17541441 -6442.16912869 -8192.20126966  6253.33857273]
------
Step:8, Action:West
State  288
Old Q Values:  [ 5233.17541441 -6442.16912869 -8192.20126966  6253.33857273]
New Q values:  [ 5233.17541441 -6442.16912869 -8192.20126966  3983.06921409]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
x  .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 4.94111262e+03 3.86565271e+02]
------
Step:9, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 4.94111262e+03 3.86565271e+02]
New Q values:  [1.64433000e+00 0.00000000e+00 4.94111262e+03 9.63413046e+02]
Reward: 9  Episode Reward:  41
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 2677.9564571   123.6214372 ]
------
Step:10, Action:East
State  261
Old Q Values:  [1668.07688645   26.73544252 2677.9564571   123.6214372 ]
New Q values:  [1668.07688645   26.73544252 3646.75620665  123.6214372 ]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 8587.24541269 -5807.06396197   855.18004824  3759.48013608]
------
Step:11, Action:West
State  276
Old Q Values:  [ 8587.24541269 -5807.06396197   855.18004824  3759.48013608]
New Q values:  [ 8587.24541269 -5807.06396197   855.18004824  3346.49229395]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
xg .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -127.28874991 -5704.51612281  6144.33413174 -5679.36893145]
------
Step:12, Action:East
State  260
Old Q Values:  [ -127.28874991 -5704.51612281  6144.33413174 -5679.36893145]
New Q values:  [ -127.28874991 -5704.51612281  5033.3072765  -5679.36893145]
Reward: -1  Episode Reward:  38
xxxxx
xg .x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 8587.24541269 -5807.06396197   855.18004824  3346.49229395]
------
Step:13, Action:North
State  276
Old Q Values:  [ 8587.24541269 -5807.06396197   855.18004824  3346.49229395]
New Q values:  [10243.79203565 -5807.06396197   855.18004824  3346.49229395]
Reward: -1  Episode Reward:  37
xxxxx
x  .x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2497.87268737 22698.31290191   492.34934406]
------
Step:14, Action:East
State  196
Old Q Values:  [-2469.90645144  2497.87268737 22698.31290191   492.34934406]
New Q values:  [-2469.90645144  2497.87268737 21480.08667922   492.34934406]
Reward: 9  Episode Reward:  46
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[41317.8717282   5520.43099913 -4584.50430574 -1713.91177491]
------
Step:15, Action:North
State  208
Old Q Values:  [41317.8717282   5520.43099913 -4584.50430574 -1713.91177491]
New Q values:  [99196.51614323  5520.43099913 -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100055
xxxxx
x gax
x   x
x   x
xxxxx
xxxxx
x.g.x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1668.07688645   26.73544252 3646.75620665  123.6214372 ]
------
Step:1, Action:East
State  260
Old Q Values:  [ -127.28874991 -5704.51612281  5033.3072765  -5679.36893145]
New Q values:  [ -127.28874991 -5704.51612281  5091.8605213  -5679.36893145]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[10243.79203565 -5807.06396197   855.18004824  3346.49229395]
------
Step:2, Action:North
State  276
Old Q Values:  [10243.79203565 -5807.06396197   855.18004824  3346.49229395]
New Q values:  [11524.06907742 -5807.06396197   855.18004824  3346.49229395]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.47371742e+04 5.31972871e+03 1.27673579e+04]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831  9625.66187958 -2072.09364767  1460.9765133 ]
New Q values:  [-5922.26708831  5160.62605115 -2072.09364767  1460.9765133 ]
Reward: -1  Episode Reward:  17
xxxxx
x. gx
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549  825.87326334 4369.87099773]
------
Step:4, Action:West
State  273
Old Q Values:  [3915.56039739 -168.92307549  825.87326334 4369.87099773]
New Q values:  [ 3915.56039739  -168.92307549   825.87326334 15221.92251674]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44915.24705884 18493.12311579 33284.01621954  1875.31501677]
------
Step:5, Action:North
State  261
Old Q Values:  [1668.07688645   26.73544252 3646.75620665  123.6214372 ]
New Q values:  [1545.73520322   26.73544252 3646.75620665  123.6214372 ]
Reward: 9  Episode Reward:  25
xxxxx
x. .x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1841.01681811 2335.80779431 2910.34816214  358.5166536 ]
------
Step:6, Action:East
State  177
Old Q Values:  [109163.23336057  23344.73803738 114273.78381604      0.        ]
New Q values:  [109163.23336057  23344.73803738  47257.10134176      0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x. .x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5160.62605115 -2072.09364767  1460.9765133 ]
------
Step:7, Action:South
State  196
Old Q Values:  [-2469.90645144  2497.87268737 21480.08667922   492.34934406]
New Q values:  [-2469.90645144  4455.76979817 21480.08667922   492.34934406]
Reward: -1  Episode Reward:  23
xxxxx
x. .x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[11524.06907742 -5807.06396197   855.18004824  3346.49229395]
------
Step:8, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 4.94111262e+03 9.63413046e+02]
New Q values:  [1.64433000e+00 0.00000000e+00 4.94111262e+03 1.47879208e+03]
Reward: -1  Episode Reward:  22
xxxxx
x. .x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1545.73520322   26.73544252 3646.75620665  123.6214372 ]
------
Step:9, Action:East
State  261
Old Q Values:  [1545.73520322   26.73544252 3646.75620665  123.6214372 ]
New Q values:  [1545.73520322   26.73544252 4915.32320588  123.6214372 ]
Reward: -1  Episode Reward:  21
xxxxx
x. .x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[11524.06907742 -5807.06396197   855.18004824  3346.49229395]
------
Step:10, Action:West
State  276
Old Q Values:  [11524.06907742 -5807.06396197   855.18004824  3346.49229395]
New Q values:  [11524.06907742 -5807.06396197   855.18004824  2812.59387935]
Reward: -1  Episode Reward:  20
xxxxx
x. .x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1545.73520322   26.73544252 4915.32320588  123.6214372 ]
------
Step:11, Action:North
State  260
Old Q Values:  [ -127.28874991 -5704.51612281  5091.8605213  -5679.36893145]
New Q values:  [ 1496.49401422 -5704.51612281  5091.8605213  -5679.36893145]
Reward: -1  Episode Reward:  19
xxxxx
x. .x
xa .x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  5160.03171396     0.        ]
------
Step:12, Action:East
State  180
Old Q Values:  [  150.2741814   8050.99445658  9168.99283315 -4966.32149798]
New Q values:  [  150.2741814   8050.99445658 10111.02313703 -4966.32149798]
Reward: -1  Episode Reward:  18
xxxxx
x. .x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4455.76979817 21480.08667922   492.34934406]
------
Step:13, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  4.17329058e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  6.15129593e+03  1.03161518e+03]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[14921.93231924  1093.22762389   790.72804752  1050.85266124]
------
Step:14, Action:North
State  210
Old Q Values:  [14921.93231924  1093.22762389   790.72804752  1050.85266124]
New Q values:  [18976.04987125  1093.22762389   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  36
xxxxx
x. ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  2996.01294072  -180.00807518 43339.58981184]
------
Step:15, Action:West
State  130
Old Q Values:  [18222.84073342  2996.01294072  -180.00807518 43339.58981184]
New Q values:  [18222.84073342  2996.01294072  -180.00807518 44180.27937222]
Reward: -1  Episode Reward:  35
xxxxx
x.a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 89483.47815827]
------
Step:16, Action:West
State  126
Old Q Values:  [   0.          331.64678262 6806.92715127  473.06184855]
New Q values:  [   0.          331.64678262 6806.92715127  768.78001264]
Reward: 9  Episode Reward:  44
xxxxx
xa  x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1913.85091073  520.46511977 -120.29354603]
------
Step:17, Action:South
State  110
Old Q Values:  [-239.29051573  357.83179526  217.32156498 -180.6       ]
New Q values:  [ -239.29051573 -4755.83717745   217.32156498  -180.6       ]
Reward: -10001  Episode Reward:  -9957
xxxxx
x   x
xg  x
x  .x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:1, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   688.72853811  1651.01128368]
New Q values:  [ -253.44886264 -1902.20915811   688.72853811  1239.95978669]
Reward: 9  Episode Reward:  9
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1913.85091073  520.46511977 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 1913.85091073  520.46511977 -120.29354603]
New Q values:  [-177.44732869 1644.04481293  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x   x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1841.01681811 2335.80779431 2910.34816214  358.5166536 ]
------
Step:3, Action:East
State  183
Old Q Values:  [1841.01681811 2335.80779431 2910.34816214  358.5166536 ]
New Q values:  [1841.01681811 2335.80779431 5810.94671508  358.5166536 ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:4, Action:East
State  200
Old Q Values:  [  62.8218634  8738.50088463  795.46716574  568.38654082]
New Q values:  [  62.8218634  8738.50088463 -158.34204791  568.38654082]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5281.380041   -1606.42971403 -8896.20691497 -4124.36403175]
------
Step:5, Action:South
State  216
Old Q Values:  [-5281.380041   -1606.42971403 -8896.20691497 -4124.36403175]
New Q values:  [-5281.380041     932.78073871 -8896.20691497 -4124.36403175]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5233.17541441 -6442.16912869 -8192.20126966  3983.06921409]
------
Step:6, Action:North
State  288
Old Q Values:  [ 5233.17541441 -6442.16912869 -8192.20126966  3983.06921409]
New Q values:  [ 4151.46044017 -6442.16912869 -8192.20126966  3983.06921409]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 158.55927744 6862.634248      0.         1847.21017375]
------
Step:7, Action:South
State  218
Old Q Values:  [ 158.55927744 6862.634248      0.         1847.21017375]
New Q values:  [ 158.55927744 3989.89183125    0.         1847.21017375]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4151.46044017 -6442.16912869 -8192.20126966  3983.06921409]
------
Step:8, Action:North
State  288
Old Q Values:  [ 4151.46044017 -6442.16912869 -8192.20126966  3983.06921409]
New Q values:  [ 1939.81839768 -6442.16912869 -8192.20126966  3983.06921409]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5281.380041     932.78073871 -8896.20691497 -4124.36403175]
------
Step:9, Action:South
State  216
Old Q Values:  [-5281.380041     932.78073871 -8896.20691497 -4124.36403175]
New Q values:  [-5281.380041    1567.43305971 -8896.20691497 -4124.36403175]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1939.81839768 -6442.16912869 -8192.20126966  3983.06921409]
------
Step:10, Action:West
State  288
Old Q Values:  [ 1939.81839768 -6442.16912869 -8192.20126966  3983.06921409]
New Q values:  [ 1939.81839768 -6442.16912869 -8192.20126966 23523.93246554]
Reward: 9  Episode Reward:  50
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  873.5151142  -8521.23367799  5807.45938523 73084.34926635]
------
Step:11, Action:West
State  272
Old Q Values:  [  873.5151142  -8521.23367799  5807.45938523 73084.34926635]
New Q values:  [   873.5151142   -8521.23367799   5807.45938523 102713.71382419]
Reward: 100009  Episode Reward:  100059
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  6.15129593e+03  1.03161518e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.47371742e+04 5.31972871e+03 1.27673579e+04]
New Q values:  [3.89777037e-01 2.47371742e+04 3.18922463e+04 1.27673579e+04]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[99196.51614323  5520.43099913 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  208
Old Q Values:  [99196.51614323  5520.43099913 -4584.50430574 -1713.91177491]
New Q values:  [39909.56987682  5520.43099913 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x.gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   751.8780651  -2383.80019164   208.34220754]
------
Step:3, Action:South
State  138
Old Q Values:  [ 8.43634063e+00 -8.11152887e+02 -3.22965309e-01  6.85174728e+02]
New Q values:  [ 8.43634063e+00  1.16478098e+04 -3.22965309e-01  6.85174728e+02]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[39909.56987682  5520.43099913 -4584.50430574 -1713.91177491]
------
Step:4, Action:North
State  208
Old Q Values:  [39909.56987682  5520.43099913 -4584.50430574 -1713.91177491]
New Q values:  [19457.57089319  5520.43099913 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.16478098e+04 -3.22965309e-01  6.85174728e+02]
------
Step:5, Action:West
State  136
Old Q Values:  [ -170.77177351   751.8780651  -2383.80019164   208.34220754]
New Q values:  [ -170.77177351   751.8780651  -2383.80019164   213.35313412]
Reward: 9  Episode Reward:  25
xxxxx
x.agx
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:6, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   490.23441701   -22.34748631]
New Q values:  [-9594.56523706 -8069.05606225   490.23441701   179.26024029]
Reward: 9  Episode Reward:  34
xxxxx
xag x
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         609.33078271   86.99637671    0.        ]
------
Step:7, Action:South
State  105
Old Q Values:  [-180.6         609.33078271   86.99637671    0.        ]
New Q values:  [-180.6         469.44311696   86.99637671    0.        ]
Reward: 9  Episode Reward:  43
xxxxx
x  gx
xa  x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 734.36934626    0.          225.67357464 -178.98      ]
------
Step:8, Action:North
State  185
Old Q Values:  [ 734.36934626    0.          225.67357464 -178.98      ]
New Q values:  [ 433.98067359    0.          225.67357464 -178.98      ]
Reward: -1  Episode Reward:  42
xxxxx
xag x
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         469.44311696   86.99637671    0.        ]
------
Step:9, Action:South
State  104
Old Q Values:  [-8652.84        3369.66269656   249.55355069 -8652.84      ]
New Q values:  [-8652.84        2246.80188749   249.55355069 -8652.84      ]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
xa  x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[ 176.80187437    0.         2998.45602956    0.        ]
------
Step:10, Action:East
State  184
Old Q Values:  [ 176.80187437    0.         2998.45602956    0.        ]
New Q values:  [ 176.80187437    0.         3820.33267721    0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xga x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  8738.50088463 -158.34204791  568.38654082]
------
Step:11, Action:South
State  206
Old Q Values:  [    0.         -1406.21014518  1309.6092569      0.        ]
New Q values:  [   0.         2894.13666515 1309.6092569     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[11524.06907742 -5807.06396197   855.18004824  2812.59387935]
------
Step:12, Action:North
State  272
Old Q Values:  [   873.5151142   -8521.23367799   5807.45938523 102713.71382419]
New Q values:  [  2970.35631107  -8521.23367799   5807.45938523 102713.71382419]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xga x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  8738.50088463 -158.34204791  568.38654082]
------
Step:13, Action:South
State  206
Old Q Values:  [   0.         2894.13666515 1309.6092569     0.        ]
New Q values:  [   0.         4614.27538929 1309.6092569     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[11524.06907742 -5807.06396197   855.18004824  2812.59387935]
------
Step:14, Action:North
State  272
Old Q Values:  [  2970.35631107  -8521.23367799   5807.45938523 102713.71382419]
New Q values:  [  3809.09278982  -8521.23367799   5807.45938523 102713.71382419]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xga x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  62.8218634  8738.50088463 -158.34204791  568.38654082]
------
Step:15, Action:South
State  200
Old Q Values:  [  62.8218634  8738.50088463 -158.34204791  568.38654082]
New Q values:  [   62.8218634  34308.91450111  -158.34204791   568.38654082]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  3809.09278982  -8521.23367799   5807.45938523 102713.71382419]
------
Step:16, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 4.94111262e+03 1.47879208e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 4.94111262e+03 2.07151379e+03]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1545.73520322   26.73544252 4915.32320588  123.6214372 ]
------
Step:17, Action:East
State  257
Old Q Values:  [44915.24705884 18493.12311579 33284.01621954  1875.31501677]
New Q values:  [44915.24705884 18493.12311579 17879.58324284  1875.31501677]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549   825.87326334 15221.92251674]
------
Step:18, Action:West
State  276
Old Q Values:  [11524.06907742 -5807.06396197   855.18004824  2812.59387935]
New Q values:  [11524.06907742 -5807.06396197   855.18004824  2599.0345135 ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1545.73520322   26.73544252 4915.32320588  123.6214372 ]
------
Step:19, Action:North
State  260
Old Q Values:  [ 1496.49401422 -5704.51612281  5091.8605213  -5679.36893145]
New Q values:  [ 1136.4587134  -5704.51612281  5091.8605213  -5679.36893145]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  1.79487036e+03  0.00000000e+00]
------
Step:20, Action:East
State  188
Old Q Values:  [-6523.78898263  3672.10034816  1762.3544145      0.        ]
New Q values:  [-6523.78898263  3672.10034816  1027.18354596     0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.          772.36851598 1076.13926719  441.58769553]
------
Step:21, Action:East
State  204
Old Q Values:  [   0.          772.36851598 1076.13926719  441.58769553]
New Q values:  [  0.         772.36851598 900.08562479 441.58769553]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5281.380041    1567.43305971 -8896.20691497 -4124.36403175]
------
Step:22, Action:South
State  208
Old Q Values:  [19457.57089319  5520.43099913 -4584.50430574 -1713.91177491]
New Q values:  [19457.57089319 69270.75213931 -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100048
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.47371742e+04 3.18922463e+04 1.27673579e+04]
------
Step:1, Action:East
State  200
Old Q Values:  [   62.8218634  34308.91450111  -158.34204791   568.38654082]
New Q values:  [   62.8218634  34308.91450111   412.29309875   568.38654082]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5281.380041    1567.43305971 -8896.20691497 -4124.36403175]
------
Step:2, Action:South
State  208
Old Q Values:  [19457.57089319 69270.75213931 -4584.50430574 -1713.91177491]
New Q values:  [19457.57089319 34770.88059539 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1939.81839768 -6442.16912869 -8192.20126966 23523.93246554]
------
Step:3, Action:West
State  288
Old Q Values:  [ 1939.81839768 -6442.16912869 -8192.20126966 23523.93246554]
New Q values:  [ 1939.81839768 -6442.16912869 -8192.20126966 40229.08713347]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  3809.09278982  -8521.23367799   5807.45938523 102713.71382419]
------
Step:4, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549   825.87326334 15221.92251674]
New Q values:  [ 3915.56039739  -168.92307549   825.87326334 19568.74312435]
Reward: 9  Episode Reward:  36
xxxxx
x .gx
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44915.24705884 18493.12311579 17879.58324284  1875.31501677]
------
Step:5, Action:North
State  261
Old Q Values:  [1545.73520322   26.73544252 4915.32320588  123.6214372 ]
New Q values:  [1217.00888798   26.73544252 4915.32320588  123.6214372 ]
Reward: 9  Episode Reward:  45
xxxxx
x g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1083.76288259  445.64685634 1977.71602231  262.76946019]
------
Step:6, Action:East
State  180
Old Q Values:  [  150.2741814   8050.99445658 10111.02313703 -4966.32149798]
New Q values:  [  150.2741814   8050.99445658 13611.48315303 -4966.32149798]
Reward: -1  Episode Reward:  44
xxxxx
xg..x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.47371742e+04 3.18922463e+04 1.27673579e+04]
------
Step:7, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.47371742e+04 3.18922463e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 2.47371742e+04 2.31875627e+04 1.27673579e+04]
Reward: -1  Episode Reward:  43
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19457.57089319 34770.88059539 -4584.50430574 -1713.91177491]
------
Step:8, Action:South
State  208
Old Q Values:  [19457.57089319 34770.88059539 -4584.50430574 -1713.91177491]
New Q values:  [19457.57089319 25976.4783782  -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  42
xxxxx
xg..x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1939.81839768 -6442.16912869 -8192.20126966 40229.08713347]
------
Step:9, Action:West
State  288
Old Q Values:  [ 1939.81839768 -6442.16912869 -8192.20126966 40229.08713347]
New Q values:  [ 1939.81839768 -6442.16912869 -8192.20126966 46905.14900065]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  3809.09278982  -8521.23367799   5807.45938523 102713.71382419]
------
Step:10, Action:West
State  272
Old Q Values:  [  3809.09278982  -8521.23367799   5807.45938523 102713.71382419]
New Q values:  [ 3809.09278982 -8521.23367799  5807.45938523 42559.48249144]
Reward: -1  Episode Reward:  40
xxxxx
x ..x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1217.00888798   26.73544252 4915.32320588  123.6214372 ]
------
Step:11, Action:East
State  261
Old Q Values:  [1217.00888798   26.73544252 4915.32320588  123.6214372 ]
New Q values:  [1217.00888798   26.73544252 7836.15221966  123.6214372 ]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549   825.87326334 19568.74312435]
------
Step:12, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549   825.87326334 19568.74312435]
New Q values:  [ 3915.56039739  -168.92307549   825.87326334 10177.74291564]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1217.00888798   26.73544252 7836.15221966  123.6214372 ]
------
Step:13, Action:East
State  261
Old Q Values:  [1217.00888798   26.73544252 7836.15221966  123.6214372 ]
New Q values:  [1217.00888798   26.73544252 6187.18376255  123.6214372 ]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549   825.87326334 10177.74291564]
------
Step:14, Action:West
State  272
Old Q Values:  [ 3809.09278982 -8521.23367799  5807.45938523 42559.48249144]
New Q values:  [ 3809.09278982 -8521.23367799  5807.45938523 18879.34812534]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1217.00888798   26.73544252 6187.18376255  123.6214372 ]
------
Step:15, Action:East
State  260
Old Q Values:  [ 1136.4587134  -5704.51612281  5091.8605213  -5679.36893145]
New Q values:  [ 1136.4587134  -5704.51612281  7699.94864612 -5679.36893145]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3809.09278982 -8521.23367799  5807.45938523 18879.34812534]
------
Step:16, Action:West
State  272
Old Q Values:  [ 3809.09278982 -8521.23367799  5807.45938523 18879.34812534]
New Q values:  [ 3809.09278982 -8521.23367799  5807.45938523  9861.12384397]
Reward: -1  Episode Reward:  34
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1136.4587134  -5704.51612281  7699.94864612 -5679.36893145]
------
Step:17, Action:East
State  261
Old Q Values:  [1217.00888798   26.73544252 6187.18376255  123.6214372 ]
New Q values:  [1217.00888798   26.73544252 5931.49422825  123.6214372 ]
Reward: -1  Episode Reward:  33
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[11524.06907742 -5807.06396197   855.18004824  2599.0345135 ]
------
Step:18, Action:North
State  272
Old Q Values:  [ 3809.09278982 -8521.23367799  5807.45938523  9861.12384397]
New Q values:  [ 8944.18937909 -8521.23367799  5807.45938523  9861.12384397]
Reward: -1  Episode Reward:  32
xxxxx
xg..x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.47371742e+04 2.31875627e+04 1.27673579e+04]
------
Step:19, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.47371742e+04 2.31875627e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 1.28526068e+04 2.31875627e+04 1.27673579e+04]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 8944.18937909 -8521.23367799  5807.45938523  9861.12384397]
------
Step:20, Action:West
State  272
Old Q Values:  [ 8944.18937909 -8521.23367799  5807.45938523  9861.12384397]
New Q values:  [ 8944.18937909 -8521.23367799  5807.45938523  5723.29780606]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1217.00888798   26.73544252 5931.49422825  123.6214372 ]
------
Step:21, Action:East
State  261
Old Q Values:  [1217.00888798   26.73544252 5931.49422825  123.6214372 ]
New Q values:  [1217.00888798   26.73544252 5829.21841453  123.6214372 ]
Reward: -1  Episode Reward:  29
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[11524.06907742 -5807.06396197   855.18004824  2599.0345135 ]
------
Step:22, Action:North
State  272
Old Q Values:  [ 8944.18937909 -8521.23367799  5807.45938523  5723.29780606]
New Q values:  [ 4533.34456451 -8521.23367799  5807.45938523  5723.29780606]
Reward: -10001  Episode Reward:  -9972
xxxxx
x ..x
x g x
x   x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549   825.87326334 10177.74291564]
------
Step:1, Action:West
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  5807.45938523  5723.29780606]
New Q values:  [ 4533.34456451 -8521.23367799  5807.45938523  4043.48464678]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1217.00888798   26.73544252 5829.21841453  123.6214372 ]
------
Step:2, Action:East
State  261
Old Q Values:  [1217.00888798   26.73544252 5829.21841453  123.6214372 ]
New Q values:  [1217.00888798   26.73544252 4073.32518138  123.6214372 ]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  5807.45938523  4043.48464678]
------
Step:3, Action:East
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  5807.45938523  4043.48464678]
New Q values:  [ 4533.34456451 -8521.23367799 16399.92845429  4043.48464678]
Reward: 9  Episode Reward:  17
xxxxx
x.. x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1939.81839768 -6442.16912869 -8192.20126966 46905.14900065]
------
Step:4, Action:West
State  288
Old Q Values:  [ 1939.81839768 -6442.16912869 -8192.20126966 46905.14900065]
New Q values:  [ 1939.81839768 -6442.16912869 -8192.20126966 23681.43813655]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x.g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799 16399.92845429  4043.48464678]
------
Step:5, Action:East
State  273
Old Q Values:  [ 3915.56039739  -168.92307549   825.87326334 10177.74291564]
New Q values:  [ 3915.56039739  -168.92307549  7434.1807463  10177.74291564]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1939.81839768 -6442.16912869 -8192.20126966 23681.43813655]
------
Step:6, Action:West
State  288
Old Q Values:  [ 1939.81839768 -6442.16912869 -8192.20126966 23681.43813655]
New Q values:  [ 1939.81839768 -6442.16912869 -8192.20126966 12525.29812931]
Reward: -1  Episode Reward:  14
xxxxx
x..gx
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  7434.1807463  10177.74291564]
------
Step:7, Action:West
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799 16399.92845429  4043.48464678]
New Q values:  [ 4533.34456451 -8521.23367799 16399.92845429 15091.36797636]
Reward: -1  Episode Reward:  13
xxxxx
x.g x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44915.24705884 18493.12311579 17879.58324284  1875.31501677]
------
Step:8, Action:North
State  261
Old Q Values:  [1217.00888798   26.73544252 4073.32518138  123.6214372 ]
New Q values:  [1085.51836189   26.73544252 4073.32518138  123.6214372 ]
Reward: 9  Episode Reward:  22
xxxxx
x..gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1083.76288259  445.64685634 1977.71602231  262.76946019]
------
Step:9, Action:East
State  177
Old Q Values:  [109163.23336057  23344.73803738  47257.10134176      0.        ]
New Q values:  [109163.23336057  23344.73803738  25864.50934957      0.        ]
Reward: 9  Episode Reward:  31
xxxxx
x.g x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.28526068e+04 2.31875627e+04 1.27673579e+04]
------
Step:10, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.28526068e+04 2.31875627e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 1.28526068e+04 1.70733686e+04 1.27673579e+04]
Reward: 9  Episode Reward:  40
xxxxx
x.. x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19457.57089319 25976.4783782  -4584.50430574 -1713.91177491]
------
Step:11, Action:South
State  208
Old Q Values:  [19457.57089319 25976.4783782  -4584.50430574 -1713.91177491]
New Q values:  [19457.57089319 14147.58079007 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  39
xxxxx
x.. x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1939.81839768 -6442.16912869 -8192.20126966 12525.29812931]
------
Step:12, Action:West
State  288
Old Q Values:  [ 1939.81839768 -6442.16912869 -8192.20126966 12525.29812931]
New Q values:  [ 1939.81839768 -6442.16912869 -8192.20126966  8062.84212641]
Reward: -1  Episode Reward:  38
xxxxx
x.. x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  7434.1807463  10177.74291564]
------
Step:13, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  7434.1807463  10177.74291564]
New Q values:  [ 3915.56039739  -168.92307549  7434.1807463  17545.07128391]
Reward: -1  Episode Reward:  37
xxxxx
x.. x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44915.24705884 18493.12311579 17879.58324284  1875.31501677]
------
Step:14, Action:North
State  257
Old Q Values:  [44915.24705884 18493.12311579 17879.58324284  1875.31501677]
New Q values:  [44342.0317613  18493.12311579 17879.58324284  1875.31501677]
Reward: -1  Episode Reward:  36
xxxxx
x.. x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[16872.63888686 16101.90751562 87921.77645921     0.        ]
------
Step:15, Action:East
State  179
Old Q Values:  [16872.63888686 16101.90751562 87921.77645921     0.        ]
New Q values:  [16872.63888686 16101.90751562 37013.49936224     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x.. x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  6.15129593e+03  1.03161518e+03]
------
Step:16, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  6.15129593e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  8.15273333e+03  1.03161518e+03]
Reward: -1  Episode Reward:  34
xxxxx
x.. x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[18976.04987125  1093.22762389   790.72804752  1050.85266124]
------
Step:17, Action:North
State  208
Old Q Values:  [19457.57089319 14147.58079007 -4584.50430574 -1713.91177491]
New Q values:  [21036.51216894 14147.58079007 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  33
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  2996.01294072  -180.00807518 44180.27937222]
------
Step:18, Action:West
State  130
Old Q Values:  [18222.84073342  2996.01294072  -180.00807518 44180.27937222]
New Q values:  [18222.84073342  2996.01294072  -180.00807518 44522.55519637]
Reward: 9  Episode Reward:  42
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  18106.71132503 89483.47815827]
------
Step:19, Action:West
State  114
Old Q Values:  [ -180.6         3557.6642036  18106.71132503 89483.47815827]
New Q values:  [  -180.6          3557.6642036   18106.71132503 130541.82944947]
Reward: 100009  Episode Reward:  100051
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21036.51216894 14147.58079007 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [21036.51216894 14147.58079007 -4584.50430574 -1713.91177491]
New Q values:  [21036.51216894  8083.28495395 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1939.81839768 -6442.16912869 -8192.20126966  8062.84212641]
------
Step:2, Action:West
State  288
Old Q Values:  [ 1939.81839768 -6442.16912869 -8192.20126966  8062.84212641]
New Q values:  [ 1939.81839768 -6442.16912869 -8192.20126966  8150.51538685]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799 16399.92845429 15091.36797636]
------
Step:3, Action:East
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799 16399.92845429 15091.36797636]
New Q values:  [ 4533.34456451 -8521.23367799  9004.52599777 15091.36797636]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x.. x
x. ax
xxxxx
Step:4, Action:North
State  288
Old Q Values:  [ 1939.81839768 -6442.16912869 -8192.20126966  8150.51538685]
New Q values:  [ 7086.28100975 -6442.16912869 -8192.20126966  8150.51538685]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21036.51216894  8083.28495395 -4584.50430574 -1713.91177491]
------
Step:5, Action:South
State  208
Old Q Values:  [21036.51216894  8083.28495395 -4584.50430574 -1713.91177491]
New Q values:  [21036.51216894  5677.86859764 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  15
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7086.28100975 -6442.16912869 -8192.20126966  8150.51538685]
------
Step:6, Action:West
State  288
Old Q Values:  [ 7086.28100975 -6442.16912869 -8192.20126966  8150.51538685]
New Q values:  [ 7086.28100975 -6442.16912869 -8192.20126966  8523.12753991]
Reward: -1  Episode Reward:  14
xxxxx
x..gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  7434.1807463  17545.07128391]
------
Step:7, Action:West
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  9004.52599777 15091.36797636]
New Q values:  [ 4533.34456451 -8521.23367799  9004.52599777 19344.55671893]
Reward: 9  Episode Reward:  23
xxxxx
x.g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44342.0317613  18493.12311579 17879.58324284  1875.31501677]
------
Step:8, Action:North
State  261
Old Q Values:  [1085.51836189   26.73544252 4073.32518138  123.6214372 ]
New Q values:  [1032.92215145   26.73544252 4073.32518138  123.6214372 ]
Reward: 9  Episode Reward:  32
xxxxx
x..gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1083.76288259  445.64685634 1977.71602231  262.76946019]
------
Step:9, Action:East
State  177
Old Q Values:  [109163.23336057  23344.73803738  25864.50934957      0.        ]
New Q values:  [109163.23336057  23344.73803738  15473.21431902      0.        ]
Reward: 9  Episode Reward:  41
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.28526068e+04 1.70733686e+04 1.27673579e+04]
------
Step:10, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.28526068e+04 1.70733686e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 1.28526068e+04 1.31397011e+04 1.27673579e+04]
Reward: -1  Episode Reward:  40
xxxxx
x.. x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21036.51216894  5677.86859764 -4584.50430574 -1713.91177491]
------
Step:11, Action:North
State  208
Old Q Values:  [21036.51216894  5677.86859764 -4584.50430574 -1713.91177491]
New Q values:  [21770.77142649  5677.86859764 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  39
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  2996.01294072  -180.00807518 44522.55519637]
------
Step:12, Action:West
State  130
Old Q Values:  [18222.84073342  2996.01294072  -180.00807518 44522.55519637]
New Q values:  [18222.84073342  2996.01294072  -180.00807518 56976.97091339]
Reward: 9  Episode Reward:  48
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   18106.71132503 130541.82944947]
------
Step:13, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   18106.71132503 130541.82944947]
New Q values:  [  -180.6          3557.6642036   18106.71132503 125392.73541853]
Reward: 100009  Episode Reward:  100057
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7086.28100975 -6442.16912869 -8192.20126966  8523.12753991]
------
Step:1, Action:West
State  288
Old Q Values:  [ 7086.28100975 -6442.16912869 -8192.20126966  8523.12753991]
New Q values:  [ 7086.28100975 -6442.16912869 -8192.20126966  9218.01803165]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  9004.52599777 19344.55671893]
------
Step:2, Action:West
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  9004.52599777 19344.55671893]
New Q values:  [ 4533.34456451 -8521.23367799  9004.52599777  8965.22024199]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1032.92215145   26.73544252 4073.32518138  123.6214372 ]
------
Step:3, Action:East
State  261
Old Q Values:  [1032.92215145   26.73544252 4073.32518138  123.6214372 ]
New Q values:  [1032.92215145   26.73544252 4330.08787188  123.6214372 ]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  9004.52599777  8965.22024199]
------
Step:4, Action:East
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  7434.1807463  17545.07128391]
New Q values:  [ 3915.56039739  -168.92307549  5738.47770801 17545.07128391]
Reward: -1  Episode Reward:  16
xxxxx
x .gx
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7086.28100975 -6442.16912869 -8192.20126966  9218.01803165]
------
Step:5, Action:West
State  288
Old Q Values:  [ 7086.28100975 -6442.16912869 -8192.20126966  9218.01803165]
New Q values:  [ 7086.28100975 -6442.16912869 -8192.20126966  8950.12859783]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
x..gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  5738.47770801 17545.07128391]
------
Step:6, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  5738.47770801 17545.07128391]
New Q values:  [3915.56039739 -168.92307549 5738.47770801 8316.45487513]
Reward: -1  Episode Reward:  14
xxxxx
x .gx
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1032.92215145   26.73544252 4330.08787188  123.6214372 ]
------
Step:7, Action:East
State  261
Old Q Values:  [1032.92215145   26.73544252 4330.08787188  123.6214372 ]
New Q values:  [1032.92215145   26.73544252 4432.79294808  123.6214372 ]
Reward: -1  Episode Reward:  13
xxxxx
x g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  9004.52599777  8965.22024199]
------
Step:8, Action:East
State  273
Old Q Values:  [3915.56039739 -168.92307549 5738.47770801 8316.45487513]
New Q values:  [3915.56039739 -168.92307549 4979.82966255 8316.45487513]
Reward: -1  Episode Reward:  12
xxxxx
x .gx
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7086.28100975 -6442.16912869 -8192.20126966  8950.12859783]
------
Step:9, Action:West
State  288
Old Q Values:  [ 7086.28100975 -6442.16912869 -8192.20126966  8950.12859783]
New Q values:  [ 7086.28100975 -6442.16912869 -8192.20126966  6074.38790167]
Reward: -1  Episode Reward:  11
xxxxx
x .gx
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 4979.82966255 8316.45487513]
------
Step:10, Action:West
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  9004.52599777  8965.22024199]
New Q values:  [ 4533.34456451 -8521.23367799  9004.52599777  4915.32598122]
Reward: -1  Episode Reward:  10
xxxxx
x g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1032.92215145   26.73544252 4432.79294808  123.6214372 ]
------
Step:11, Action:East
State  261
Old Q Values:  [1032.92215145   26.73544252 4432.79294808  123.6214372 ]
New Q values:  [1032.92215145   26.73544252 4267.45364177  123.6214372 ]
Reward: -1  Episode Reward:  9
xxxxx
x .gx
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 4979.82966255 8316.45487513]
------
Step:12, Action:West
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  9004.52599777  4915.32598122]
New Q values:  [ 4533.34456451 -8521.23367799  9004.52599777  3245.76648502]
Reward: -1  Episode Reward:  8
xxxxx
x g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1032.92215145   26.73544252 4267.45364177  123.6214372 ]
------
Step:13, Action:East
State  260
Old Q Values:  [ 1136.4587134  -5704.51612281  7699.94864612 -5679.36893145]
New Q values:  [ 1136.4587134  -5704.51612281  5780.73725778 -5679.36893145]
Reward: -1  Episode Reward:  7
xxxxx
xg..x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  9004.52599777  3245.76648502]
------
Step:14, Action:East
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  9004.52599777  3245.76648502]
New Q values:  [ 4533.34456451 -8521.23367799  5727.09470203  3245.76648502]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7086.28100975 -6442.16912869 -8192.20126966  6074.38790167]
------
Step:15, Action:North
State  288
Old Q Values:  [ 7086.28100975 -6442.16912869 -8192.20126966  6074.38790167]
New Q values:  [ 9371.14383185 -6442.16912869 -8192.20126966  6074.38790167]
Reward: 9  Episode Reward:  15
xxxxx
x ..x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21770.77142649  5677.86859764 -4584.50430574 -1713.91177491]
------
Step:16, Action:North
State  208
Old Q Values:  [21770.77142649  5677.86859764 -4584.50430574 -1713.91177491]
New Q values:  [12208.05151306  5677.86859764 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  24
xxxxx
x .ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.16478098e+04 -3.22965309e-01  6.85174728e+02]
------
Step:17, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.16478098e+04 -3.22965309e-01  6.85174728e+02]
New Q values:  [ 8.43634063e+00  1.16478098e+04 -3.22965309e-01  6.51457827e+02]
Reward: 9  Episode Reward:  33
xxxxx
x a x
x.. x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   688.72853811  1239.95978669]
------
Step:18, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548    84.08192403   455.05923578]
New Q values:  [ -281.736      -1150.91067548    84.08192403   538.37335888]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1189.83221522  153.18954677 -252.78192178]
------
Step:19, Action:South
State  110
Old Q Values:  [ -239.29051573 -4755.83717745   217.32156498  -180.6       ]
New Q values:  [-239.29051573 -348.92535679  217.32156498 -180.6       ]
Reward: 9  Episode Reward:  41
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  5160.03171396     0.        ]
------
Step:20, Action:East
State  179
Old Q Values:  [16872.63888686 16101.90751562 37013.49936224     0.        ]
New Q values:  [16872.63888686 16101.90751562 77256.61974473     0.        ]
Reward: 100009  Episode Reward:  100050
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.16478098e+04 -3.22965309e-01  6.51457827e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.16478098e+04 -3.22965309e-01  6.51457827e+02]
New Q values:  [ 8.43634063e+00  8.32693938e+03 -3.22965309e-01  6.51457827e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12208.05151306  5677.86859764 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  208
Old Q Values:  [12208.05151306  5677.86859764 -4584.50430574 -1713.91177491]
New Q values:  [ 7380.70241838  5677.86859764 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  8.32693938e+03 -3.22965309e-01  6.51457827e+02]
------
Step:3, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  8.32693938e+03 -3.22965309e-01  6.51457827e+02]
New Q values:  [ 8.43634063e+00  8.32693938e+03 -3.22965309e-01  6.37971067e+02]
Reward: 9  Episode Reward:  17
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   688.72853811  1239.95978669]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548    84.08192403   538.37335888]
New Q values:  [ -281.736      -1150.91067548    84.08192403   577.69900812]
Reward: 9  Episode Reward:  26
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1189.83221522  153.18954677 -252.78192178]
------
Step:5, Action:South
State  107
Old Q Values:  [-252.35169558 1189.83221522  153.18954677 -252.78192178]
New Q values:  [-252.35169558 2224.61690061  153.18954677 -252.78192178]
Reward: 9  Episode Reward:  35
xxxxx
x   x
xa. x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1841.01681811 2335.80779431 5810.94671508  358.5166536 ]
------
Step:6, Action:East
State  185
Old Q Values:  [ 433.98067359    0.          225.67357464 -178.98      ]
New Q values:  [ 433.98067359    0.          753.24407764 -178.98      ]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  2.19191549e+03 -5.74006972e+03  2.00341972e+02]
------
Step:7, Action:South
State  200
Old Q Values:  [   62.8218634  34308.91450111   412.29309875   568.38654082]
New Q values:  [   62.8218634  15441.09421105   412.29309875   568.38654082]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  5727.09470203  3245.76648502]
------
Step:8, Action:East
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  5727.09470203  3245.76648502]
New Q values:  [ 4533.34456451 -8521.23367799  5107.58103037  3245.76648502]
Reward: 9  Episode Reward:  52
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9371.14383185 -6442.16912869 -8192.20126966  6074.38790167]
------
Step:9, Action:North
State  288
Old Q Values:  [ 9371.14383185 -6442.16912869 -8192.20126966  6074.38790167]
New Q values:  [ 4218.08745065 -6442.16912869 -8192.20126966  6074.38790167]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5281.380041    1567.43305971 -8896.20691497 -4124.36403175]
------
Step:10, Action:South
State  216
Old Q Values:  [-5281.380041    1567.43305971 -8896.20691497 -4124.36403175]
New Q values:  [-5281.380041    2448.68959439 -8896.20691497 -4124.36403175]
Reward: -1  Episode Reward:  50
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4218.08745065 -6442.16912869 -8192.20126966  6074.38790167]
------
Step:11, Action:West
State  288
Old Q Values:  [ 4218.08745065 -6442.16912869 -8192.20126966  6074.38790167]
New Q values:  [ 4218.08745065 -6442.16912869 -8192.20126966  3961.42946978]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  5107.58103037  3245.76648502]
------
Step:12, Action:East
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  5107.58103037  3245.76648502]
New Q values:  [ 4533.34456451 -8521.23367799  3307.85864734  3245.76648502]
Reward: -1  Episode Reward:  48
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4218.08745065 -6442.16912869 -8192.20126966  3961.42946978]
------
Step:13, Action:North
State  288
Old Q Values:  [ 4218.08745065 -6442.16912869 -8192.20126966  3961.42946978]
New Q values:  [ 2421.24185858 -6442.16912869 -8192.20126966  3961.42946978]
Reward: -1  Episode Reward:  47
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[-5281.380041    2448.68959439 -8896.20691497 -4124.36403175]
------
Step:14, Action:South
State  216
Old Q Values:  [-5281.380041    2448.68959439 -8896.20691497 -4124.36403175]
New Q values:  [-5281.380041    2167.30467869 -8896.20691497 -4124.36403175]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2421.24185858 -6442.16912869 -8192.20126966  3961.42946978]
------
Step:15, Action:West
State  288
Old Q Values:  [ 2421.24185858 -6442.16912869 -8192.20126966  3961.42946978]
New Q values:  [ 2421.24185858 -6442.16912869 -8192.20126966  2943.97515726]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  3307.85864734  3245.76648502]
------
Step:16, Action:East
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  3307.85864734  3245.76648502]
New Q values:  [ 4533.34456451 -8521.23367799  2205.73600612  3245.76648502]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2421.24185858 -6442.16912869 -8192.20126966  2943.97515726]
------
Step:17, Action:North
State  288
Old Q Values:  [ 2421.24185858 -6442.16912869 -8192.20126966  2943.97515726]
New Q values:  [ 2164.86429281 -6442.16912869 -8192.20126966  2943.97515726]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 158.55927744 3989.89183125    0.         1847.21017375]
------
Step:18, Action:West
State  216
Old Q Values:  [-5281.380041    2167.30467869 -8896.20691497 -4124.36403175]
New Q values:  [-5281.380041    2167.30467869 -8896.20691497  -992.77096491]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  2.19191549e+03 -5.74006972e+03  2.00341972e+02]
------
Step:19, Action:South
State  203
Old Q Values:  [3.60604218e+00 3.48799070e+03 3.50642584e+03 9.06816004e+03]
New Q values:  [3.60604218e+00 3.88953274e+03 3.50642584e+03 9.06816004e+03]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 4979.82966255 8316.45487513]
------
Step:20, Action:West
State  273
Old Q Values:  [3915.56039739 -168.92307549 4979.82966255 8316.45487513]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 76634.59147844]
Reward: 100009  Episode Reward:  100050
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1276.77884261 -2165.66138672  -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094  1276.77884261 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1109.42634374 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x  gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1083.76288259  445.64685634 1977.71602231  262.76946019]
------
Step:2, Action:East
State  189
Old Q Values:  [ 275.08817949 1905.69125353 5226.95606715  154.04646645]
New Q values:  [ 275.08817949 1905.69125353 2753.75707465  154.04646645]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  2.19191549e+03 -5.74006972e+03  2.00341972e+02]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144  4455.76979817 21480.08667922   492.34934406]
New Q values:  [-2469.90645144  5244.9286425  21480.08667922   492.34934406]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[11524.06907742 -5807.06396197   855.18004824  2599.0345135 ]
------
Step:4, Action:West
State  276
Old Q Values:  [11524.06907742 -5807.06396197   855.18004824  2599.0345135 ]
New Q values:  [11524.06907742 -5807.06396197   855.18004824  2779.23498274]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1136.4587134  -5704.51612281  5780.73725778 -5679.36893145]
------
Step:5, Action:East
State  260
Old Q Values:  [ 1136.4587134  -5704.51612281  5780.73725778 -5679.36893145]
New Q values:  [ 1136.4587134  -5704.51612281  5768.91562634 -5679.36893145]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[11524.06907742 -5807.06396197   855.18004824  2779.23498274]
------
Step:6, Action:North
State  276
Old Q Values:  [11524.06907742 -5807.06396197   855.18004824  2779.23498274]
New Q values:  [ 9250.43508119 -5807.06396197   855.18004824  2779.23498274]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
------
Step:7, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -2.00610230e+02  1.54713582e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.00610230e+02  1.18867582e+04  0.00000000e+00]
Reward: 9  Episode Reward:  43
xxxxx
x  .x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[18976.04987125  1093.22762389   790.72804752  1050.85266124]
------
Step:8, Action:North
State  216
Old Q Values:  [-5281.380041    2167.30467869 -8896.20691497  -992.77096491]
New Q values:  [  390.92979676  2167.30467869 -8896.20691497  -992.77096491]
Reward: 9  Episode Reward:  52
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  8.32693938e+03 -3.22965309e-01  6.37971067e+02]
------
Step:9, Action:South
State  136
Old Q Values:  [ -170.77177351   751.8780651  -2383.80019164   213.35313412]
New Q values:  [ -170.77177351   950.34262965 -2383.80019164   213.35313412]
Reward: -1  Episode Reward:  51
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  390.92979676  2167.30467869 -8896.20691497  -992.77096491]
------
Step:10, Action:South
State  208
Old Q Values:  [ 7380.70241838  5677.86859764 -4584.50430574 -1713.91177491]
New Q values:  [ 7380.70241838 63159.73998623 -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100060
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2164.86429281 -6442.16912869 -8192.20126966  2943.97515726]
------
Step:1, Action:West
State  288
Old Q Values:  [ 2164.86429281 -6442.16912869 -8192.20126966  2943.97515726]
New Q values:  [ 2164.86429281 -6442.16912869 -8192.20126966  2542.99343226]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  2205.73600612  3245.76648502]
------
Step:2, Action:West
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  2205.73600612  3245.76648502]
New Q values:  [ 4533.34456451 -8521.23367799  2205.73600612 14606.3161224 ]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44342.0317613  18493.12311579 17879.58324284  1875.31501677]
------
Step:3, Action:North
State  260
Old Q Values:  [ 1136.4587134  -5704.51612281  5768.91562634 -5679.36893145]
New Q values:  [ 4543.42843127 -5704.51612281  5768.91562634 -5679.36893145]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  150.2741814   8050.99445658 13611.48315303 -4966.32149798]
------
Step:4, Action:East
State  177
Old Q Values:  [109163.23336057  23344.73803738  15473.21431902      0.        ]
New Q values:  [109163.23336057  23344.73803738  10136.59605449      0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x.g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.28526068e+04 1.31397011e+04 1.27673579e+04]
------
Step:5, Action:East
State  193
Old Q Values:  [-5922.26708831  5160.62605115 -2072.09364767  1460.9765133 ]
New Q values:  [-5922.26708831  5160.62605115 18118.4845368   1460.9765133 ]
Reward: -1  Episode Reward:  35
xxxxx
x..gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7380.70241838 63159.73998623 -4584.50430574 -1713.91177491]
------
Step:6, Action:South
State  208
Old Q Values:  [ 7380.70241838 63159.73998623 -4584.50430574 -1713.91177491]
New Q values:  [ 7380.70241838 26026.19402417 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  34
xxxxx
x.g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2164.86429281 -6442.16912869 -8192.20126966  2542.99343226]
------
Step:7, Action:West
State  288
Old Q Values:  [ 2164.86429281 -6442.16912869 -8192.20126966  2542.99343226]
New Q values:  [ 2164.86429281 -6442.16912869 -8192.20126966  5398.49220962]
Reward: -1  Episode Reward:  33
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  2205.73600612 14606.3161224 ]
------
Step:8, Action:West
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  2205.73600612 14606.3161224 ]
New Q values:  [ 4533.34456451 -8521.23367799  2205.73600612  7572.60113686]
Reward: -1  Episode Reward:  32
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4543.42843127 -5704.51612281  5768.91562634 -5679.36893145]
------
Step:9, Action:East
State  261
Old Q Values:  [1032.92215145   26.73544252 4267.45364177  123.6214372 ]
New Q values:  [1032.92215145   26.73544252 3978.16179777  123.6214372 ]
Reward: -1  Episode Reward:  31
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  2205.73600612  7572.60113686]
------
Step:10, Action:West
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  2205.73600612  7572.60113686]
New Q values:  [ 4533.34456451 -8521.23367799  2205.73600612  4221.88899407]
Reward: -1  Episode Reward:  30
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1032.92215145   26.73544252 3978.16179777  123.6214372 ]
------
Step:11, Action:North
State  261
Old Q Values:  [1032.92215145   26.73544252 3978.16179777  123.6214372 ]
New Q values:  [1005.88366727   26.73544252 3978.16179777  123.6214372 ]
Reward: -1  Episode Reward:  29
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1083.76288259  445.64685634 1977.71602231  262.76946019]
------
Step:12, Action:North
State  181
Old Q Values:  [1083.76288259  445.64685634 1977.71602231  262.76946019]
New Q values:  [ 855.20665396  445.64685634 1977.71602231  262.76946019]
Reward: 9  Episode Reward:  38
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1387.67166975  238.35800069    0.        ]
------
Step:13, Action:South
State  103
Old Q Values:  [ 221.30610858 1387.67166975  238.35800069    0.        ]
New Q values:  [ 221.30610858 2297.75268242  238.35800069    0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1841.01681811 2335.80779431 5810.94671508  358.5166536 ]
------
Step:14, Action:East
State  183
Old Q Values:  [1841.01681811 2335.80779431 5810.94671508  358.5166536 ]
New Q values:  [1841.01681811 2335.80779431 4769.59868587  358.5166536 ]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  8.15273333e+03  1.03161518e+03]
------
Step:15, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.28526068e+04 1.31397011e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 1.28526068e+04 1.30631386e+04 1.27673579e+04]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7380.70241838 26026.19402417 -4584.50430574 -1713.91177491]
------
Step:16, Action:South
State  208
Old Q Values:  [ 7380.70241838 26026.19402417 -4584.50430574 -1713.91177491]
New Q values:  [ 7380.70241838 12029.42527255 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  34
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2164.86429281 -6442.16912869 -8192.20126966  5398.49220962]
------
Step:17, Action:West
State  288
Old Q Values:  [ 2164.86429281 -6442.16912869 -8192.20126966  5398.49220962]
New Q values:  [ 2164.86429281 -6442.16912869 -8192.20126966  3518.8002532 ]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  2205.73600612  4221.88899407]
------
Step:18, Action:West
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  2205.73600612  4221.88899407]
New Q values:  [ 4533.34456451 -8521.23367799  2205.73600612  3418.83028553]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4543.42843127 -5704.51612281  5768.91562634 -5679.36893145]
------
Step:19, Action:East
State  260
Old Q Values:  [ 4543.42843127 -5704.51612281  5768.91562634 -5679.36893145]
New Q values:  [ 4543.42843127 -5704.51612281  3666.96961989 -5679.36893145]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4533.34456451 -8521.23367799  2205.73600612  3418.83028553]
------
Step:20, Action:North
State  272
Old Q Values:  [ 4533.34456451 -8521.23367799  2205.73600612  3418.83028553]
New Q values:  [ 5731.67941873 -8521.23367799  2205.73600612  3418.83028553]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.28526068e+04 1.30631386e+04 1.27673579e+04]
------
Step:21, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.28526068e+04 1.30631386e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 1.28526068e+04 8.83348304e+03 1.27673579e+04]
Reward: -1  Episode Reward:  29
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7380.70241838 12029.42527255 -4584.50430574 -1713.91177491]
------
Step:22, Action:South
State  208
Old Q Values:  [ 7380.70241838 12029.42527255 -4584.50430574 -1713.91177491]
New Q values:  [ 7380.70241838  5866.81018498 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2164.86429281 -6442.16912869 -8192.20126966  3518.8002532 ]
------
Step:23, Action:West
State  288
Old Q Values:  [ 2164.86429281 -6442.16912869 -8192.20126966  3518.8002532 ]
New Q values:  [ 2164.86429281 -6442.16912869 -8192.20126966  3126.4239269 ]
Reward: -1  Episode Reward:  27
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5731.67941873 -8521.23367799  2205.73600612  3418.83028553]
------
Step:24, Action:North
State  276
Old Q Values:  [ 9250.43508119 -5807.06396197   855.18004824  2779.23498274]
New Q values:  [10143.60003624 -5807.06396197   855.18004824  2779.23498274]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  5244.9286425  21480.08667922   492.34934406]
------
Step:25, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.28526068e+04 8.83348304e+03 1.27673579e+04]
New Q values:  [3.89777037e-01 1.28526068e+04 5.74700394e+03 1.27673579e+04]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7380.70241838  5866.81018498 -4584.50430574 -1713.91177491]
------
Step:26, Action:North
State  210
Old Q Values:  [18976.04987125  1093.22762389   790.72804752  1050.85266124]
New Q values:  [24688.91122251  1093.22762389   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  34
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  2996.01294072  -180.00807518 56976.97091339]
------
Step:27, Action:West
State  130
Old Q Values:  [18222.84073342  2996.01294072  -180.00807518 56976.97091339]
New Q values:  [ 18222.84073342   2996.01294072   -180.00807518 121796.96194807]
Reward: 100009  Episode Reward:  100043
xxxxx
x a x
x   x
x  gx
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.28526068e+04 5.74700394e+03 1.27673579e+04]
------
Step:1, Action:South
State  196
Old Q Values:  [-2469.90645144  5244.9286425  21480.08667922   492.34934406]
New Q values:  [-2469.90645144  5146.45146787 21480.08667922   492.34934406]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[10143.60003624 -5807.06396197   855.18004824  2779.23498274]
------
Step:2, Action:North
State  272
Old Q Values:  [ 5731.67941873 -8521.23367799  2205.73600612  3418.83028553]
New Q values:  [ 6147.85381871 -8521.23367799  2205.73600612  3418.83028553]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x.a.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.28526068e+04 5.74700394e+03 1.27673579e+04]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144  5146.45146787 21480.08667922   492.34934406]
New Q values:  [-2469.90645144  5101.06059802 21480.08667922   492.34934406]
Reward: -1  Episode Reward:  7
xxxxx
xg .x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[10143.60003624 -5807.06396197   855.18004824  2779.23498274]
------
Step:4, Action:North
State  272
Old Q Values:  [ 6147.85381871 -8521.23367799  2205.73600612  3418.83028553]
New Q values:  [ 6314.32357871 -8521.23367799  2205.73600612  3418.83028553]
Reward: -1  Episode Reward:  6
xxxxx
x.g.x
x.a.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.28526068e+04 5.74700394e+03 1.27673579e+04]
------
Step:5, Action:South
State  193
Old Q Values:  [-5922.26708831  5160.62605115 18118.4845368   1460.9765133 ]
New Q values:  [-5922.26708831 25054.02786399 18118.4845368   1460.9765133 ]
Reward: -1  Episode Reward:  5
xxxxx
x. gx
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 76634.59147844]
------
Step:6, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 76634.59147844]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 43961.84611976]
Reward: 9  Episode Reward:  14
xxxxx
x. gx
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44342.0317613  18493.12311579 17879.58324284  1875.31501677]
------
Step:7, Action:North
State  261
Old Q Values:  [1005.88366727   26.73544252 3978.16179777  123.6214372 ]
New Q values:  [1001.0682736    26.73544252 3978.16179777  123.6214372 ]
Reward: 9  Episode Reward:  23
xxxxx
x.g.x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 855.20665396  445.64685634 1977.71602231  262.76946019]
------
Step:8, Action:East
State  180
Old Q Values:  [  150.2741814   8050.99445658 13611.48315303 -4966.32149798]
New Q values:  [  150.2741814   8050.99445658 11888.01926498 -4966.32149798]
Reward: -1  Episode Reward:  22
xxxxx
xg .x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  5101.06059802 21480.08667922   492.34934406]
------
Step:9, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.28526068e+04 5.74700394e+03 1.27673579e+04]
New Q values:  [3.89777037e-01 1.28526068e+04 4.51841230e+03 1.27673579e+04]
Reward: 9  Episode Reward:  31
xxxxx
x.g.x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7380.70241838  5866.81018498 -4584.50430574 -1713.91177491]
------
Step:10, Action:North
State  216
Old Q Values:  [  390.92979676  2167.30467869 -8896.20691497  -992.77096491]
New Q values:  [  446.8747076   2167.30467869 -8896.20691497  -992.77096491]
Reward: 9  Episode Reward:  40
xxxxx
xg ax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   950.34262965 -2383.80019164   213.35313412]
------
Step:11, Action:South
State  130
Old Q Values:  [ 18222.84073342   2996.01294072   -180.00807518 121796.96194807]
New Q values:  [ 18222.84073342   3412.0159018    -180.00807518 121796.96194807]
Reward: -1  Episode Reward:  39
xxxxx
x.  x
xg ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7380.70241838  5866.81018498 -4584.50430574 -1713.91177491]
------
Step:12, Action:North
State  216
Old Q Values:  [  446.8747076   2167.30467869 -8896.20691497  -992.77096491]
New Q values:  [  463.25267193  2167.30467869 -8896.20691497  -992.77096491]
Reward: -1  Episode Reward:  38
xxxxx
xg ax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   950.34262965 -2383.80019164   213.35313412]
------
Step:13, Action:South
State  130
Old Q Values:  [ 18222.84073342   3412.0159018    -180.00807518 121796.96194807]
New Q values:  [ 18222.84073342   3578.41708624   -180.00807518 121796.96194807]
Reward: -1  Episode Reward:  37
xxxxx
x.  x
xg ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7380.70241838  5866.81018498 -4584.50430574 -1713.91177491]
------
Step:14, Action:North
State  216
Old Q Values:  [  463.25267193  2167.30467869 -8896.20691497  -992.77096491]
New Q values:  [  469.80385767  2167.30467869 -8896.20691497  -992.77096491]
Reward: -1  Episode Reward:  36
xxxxx
xg ax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   950.34262965 -2383.80019164   213.35313412]
------
Step:15, Action:South
State  128
Old Q Values:  [11374.93691792 10380.76024543 -8652.84       75546.55817317]
New Q values:  [11374.93691792  6365.91482369 -8652.84       75546.55817317]
Reward: -1  Episode Reward:  35
xxxxx
x.g x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7380.70241838  5866.81018498 -4584.50430574 -1713.91177491]
------
Step:16, Action:North
State  208
Old Q Values:  [ 7380.70241838  5866.81018498 -4584.50430574 -1713.91177491]
New Q values:  [19615.6484193   5866.81018498 -4584.50430574 -1713.91177491]
Reward: -10001  Episode Reward:  -9966
xxxxx
x. gx
x   x
x  .x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2164.86429281 -6442.16912869 -8192.20126966  3126.4239269 ]
------
Step:1, Action:West
State  288
Old Q Values:  [ 2164.86429281 -6442.16912869 -8192.20126966  3126.4239269 ]
New Q values:  [ 2164.86429281 -6442.16912869 -8192.20126966 14444.52340669]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 43961.84611976]
------
Step:2, Action:West
State  272
Old Q Values:  [ 6314.32357871 -8521.23367799  2205.73600612  3418.83028553]
New Q values:  [ 6314.32357871 -8521.23367799  2205.73600612 14675.5416426 ]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44342.0317613  18493.12311579 17879.58324284  1875.31501677]
------
Step:3, Action:North
State  260
Old Q Values:  [ 4543.42843127 -5704.51612281  3666.96961989 -5679.36893145]
New Q values:  [ -610.822848   -5704.51612281  3666.96961989 -5679.36893145]
Reward: -9991  Episode Reward:  -9973
xxxxx
x.. x
xg..x
x   x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 25054.02786399 18118.4845368   1460.9765133 ]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831 25054.02786399 18118.4845368   1460.9765133 ]
New Q values:  [-5922.26708831 23215.56498153 18118.4845368   1460.9765133 ]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 43961.84611976]
------
Step:2, Action:West
State  272
Old Q Values:  [ 6314.32357871 -8521.23367799  2205.73600612 14675.5416426 ]
New Q values:  [ 6314.32357871 -8521.23367799  2205.73600612 19178.22618543]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44342.0317613  18493.12311579 17879.58324284  1875.31501677]
------
Step:3, Action:North
State  261
Old Q Values:  [1001.0682736    26.73544252 3978.16179777  123.6214372 ]
New Q values:  [ 999.14211613   26.73544252 3978.16179777  123.6214372 ]
Reward: 9  Episode Reward:  27
xxxxx
x..gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 855.20665396  445.64685634 1977.71602231  262.76946019]
------
Step:4, Action:East
State  177
Old Q Values:  [109163.23336057  23344.73803738  10136.59605449      0.        ]
New Q values:  [109163.23336057  23344.73803738   7909.82047302      0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x.g x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.28526068e+04 4.51841230e+03 1.27673579e+04]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.28526068e+04 4.51841230e+03 1.27673579e+04]
New Q values:  [3.89777037e-01 1.08939106e+04 4.51841230e+03 1.27673579e+04]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6314.32357871 -8521.23367799  2205.73600612 19178.22618543]
------
Step:6, Action:West
State  272
Old Q Values:  [ 6314.32357871 -8521.23367799  2205.73600612 19178.22618543]
New Q values:  [ 6314.32357871 -8521.23367799  2205.73600612  8770.78136014]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -610.822848   -5704.51612281  3666.96961989 -5679.36893145]
------
Step:7, Action:East
State  260
Old Q Values:  [ -610.822848   -5704.51612281  3666.96961989 -5679.36893145]
New Q values:  [ -610.822848   -5704.51612281  4097.422256   -5679.36893145]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6314.32357871 -8521.23367799  2205.73600612  8770.78136014]
------
Step:8, Action:North
State  272
Old Q Values:  [ 6314.32357871 -8521.23367799  2205.73600612  8770.78136014]
New Q values:  [ 4970.94943132 -8521.23367799  2205.73600612  8770.78136014]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  8.15273333e+03  1.03161518e+03]
------
Step:9, Action:East
State  195
Old Q Values:  [  38.85388605 3096.86073896 1652.41302943 1169.39963074]
New Q values:  [  38.85388605 3096.86073896 8073.03857853 1169.39963074]
Reward: 9  Episode Reward:  31
xxxxx
x.. x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[24688.91122251  1093.22762389   790.72804752  1050.85266124]
------
Step:10, Action:North
State  208
Old Q Values:  [19615.6484193   5866.81018498 -4584.50430574 -1713.91177491]
New Q values:  [44384.74795214  5866.81018498 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  30
xxxxx
x..ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 18222.84073342   3578.41708624   -180.00807518 121796.96194807]
------
Step:11, Action:West
State  128
Old Q Values:  [11374.93691792  6365.91482369 -8652.84       75546.55817317]
New Q values:  [11374.93691792  6365.91482369 -8652.84       51527.53475761]
Reward: 9  Episode Reward:  39
xxxxx
x.agx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.          2847.11106414 -5999.38454759 71011.70496116]
------
Step:12, Action:West
State  124
Old Q Values:  [   0.         1166.51141701 4292.78893337 1281.45211411]
New Q values:  [   0.         1166.51141701 4292.78893337  850.80874877]
Reward: 9  Episode Reward:  48
xxxxx
xag x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1109.42634374 -2165.66138672  -180.6       ]
------
Step:13, Action:South
State  108
Old Q Values:  [-8463.16477134  3774.35594955   196.50327584     0.        ]
New Q values:  [-8463.16477134  2610.77248427   196.50327584     0.        ]
Reward: -1  Episode Reward:  47
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  3672.10034816  1027.18354596     0.        ]
------
Step:14, Action:South
State  188
Old Q Values:  [-6523.78898263  3672.10034816  1027.18354596     0.        ]
New Q values:  [-6523.78898263  2697.46681606  1027.18354596     0.        ]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -610.822848   -5704.51612281  4097.422256   -5679.36893145]
------
Step:15, Action:East
State  260
Old Q Values:  [ -610.822848   -5704.51612281  4097.422256   -5679.36893145]
New Q values:  [ -610.822848   -5704.51612281  4681.44891327 -5679.36893145]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[10143.60003624 -5807.06396197   855.18004824  2779.23498274]
------
Step:16, Action:North
State  276
Old Q Values:  [10143.60003624 -5807.06396197   855.18004824  2779.23498274]
New Q values:  [ 4326.86570193 -5807.06396197   855.18004824  2779.23498274]
Reward: -1  Episode Reward:  44
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[  0.         772.36851598 900.08562479 441.58769553]
------
Step:17, Action:East
State  204
Old Q Values:  [  0.         772.36851598 900.08562479 441.58769553]
New Q values:  [   0.          772.36851598 1009.62565352  441.58769553]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  469.80385767  2167.30467869 -8896.20691497  -992.77096491]
------
Step:18, Action:South
State  208
Old Q Values:  [44384.74795214  5866.81018498 -4584.50430574 -1713.91177491]
New Q values:  [44384.74795214 66685.481096   -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100052
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x.g.x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 999.14211613   26.73544252 3978.16179777  123.6214372 ]
------
Step:1, Action:East
State  261
Old Q Values:  [ 999.14211613   26.73544252 3978.16179777  123.6214372 ]
New Q values:  [  999.14211613    26.73544252 14785.21855504   123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 43961.84611976]
------
Step:2, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 43961.84611976]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 22019.70401442]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
x..gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  999.14211613    26.73544252 14785.21855504   123.6214372 ]
------
Step:3, Action:East
State  261
Old Q Values:  [  999.14211613    26.73544252 14785.21855504   123.6214372 ]
New Q values:  [  999.14211613    26.73544252 12519.39862634   123.6214372 ]
Reward: -1  Episode Reward:  7
xxxxx
x. .x
x...x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 22019.70401442]
------
Step:4, Action:West
State  272
Old Q Values:  [ 4970.94943132 -8521.23367799  2205.73600612  8770.78136014]
New Q values:  [ 4970.94943132 -8521.23367799  2205.73600612  7263.53213196]
Reward: -1  Episode Reward:  6
xxxxx
x. .x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  999.14211613    26.73544252 12519.39862634   123.6214372 ]
------
Step:5, Action:North
State  260
Old Q Values:  [ -610.822848   -5704.51612281  4681.44891327 -5679.36893145]
New Q values:  [ 1309.08037499 -5704.51612281  4681.44891327 -5679.36893145]
Reward: 9  Episode Reward:  15
xxxxx
x. .x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  5160.03171396     0.        ]
------
Step:6, Action:East
State  180
Old Q Values:  [  150.2741814   8050.99445658 11888.01926498 -4966.32149798]
New Q values:  [  150.2741814   8050.99445658 11204.63370976 -4966.32149798]
Reward: 9  Episode Reward:  24
xxxxx
x. .x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  5101.06059802 21480.08667922   492.34934406]
------
Step:7, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.08939106e+04 4.51841230e+03 1.27673579e+04]
New Q values:  [3.89777037e-01 1.08939106e+04 2.18184092e+04 1.27673579e+04]
Reward: 9  Episode Reward:  33
xxxxx
x. .x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[44384.74795214 66685.481096   -4584.50430574 -1713.91177491]
------
Step:8, Action:South
State  210
Old Q Values:  [24688.91122251  1093.22762389   790.72804752  1050.85266124]
New Q values:  [24688.91122251  4776.04807156   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  42
xxxxx
x. .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2164.86429281 -6442.16912869 -8192.20126966 14444.52340669]
------
Step:9, Action:North
State  288
Old Q Values:  [ 2164.86429281 -6442.16912869 -8192.20126966 14444.52340669]
New Q values:  [ 8272.01908388 -6442.16912869 -8192.20126966 14444.52340669]
Reward: -1  Episode Reward:  41
xxxxx
x. .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[24688.91122251  4776.04807156   790.72804752  1050.85266124]
------
Step:10, Action:North
State  210
Old Q Values:  [24688.91122251  4776.04807156   790.72804752  1050.85266124]
New Q values:  [46420.05307343  4776.04807156   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  50
xxxxx
x. ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 18222.84073342   3578.41708624   -180.00807518 121796.96194807]
------
Step:11, Action:West
State  130
Old Q Values:  [ 18222.84073342   3578.41708624   -180.00807518 121796.96194807]
New Q values:  [18222.84073342  3578.41708624  -180.00807518 86336.00540478]
Reward: -1  Episode Reward:  49
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   18106.71132503 125392.73541853]
------
Step:12, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   18106.71132503 125392.73541853]
New Q values:  [  -180.6          3557.6642036   18106.71132503 144905.53235357]
Reward: 100009  Episode Reward:  100058
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1109.42634374 -2165.66138672  -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094  1109.42634374 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1042.48534419 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x  gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 855.20665396  445.64685634 1977.71602231  262.76946019]
------
Step:2, Action:East
State  189
Old Q Values:  [ 275.08817949 1905.69125353 2753.75707465  154.04646645]
New Q values:  [ 275.08817949 1905.69125353 1764.47747765  154.04646645]
Reward: 9  Episode Reward:  18
xxxxx
x  gx
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  2.19191549e+03 -5.74006972e+03  2.00341972e+02]
------
Step:3, Action:South
State  197
Old Q Values:  [-5833.78831344  1710.07873061 -4510.80210702   403.06255908]
New Q values:  [-5833.78831344  2171.76527724 -4510.80210702   403.06255908]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 4.94111262e+03 2.07151379e+03]
------
Step:4, Action:East
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 4.94111262e+03 2.07151379e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 3.15202069e+02 2.07151379e+03]
Reward: -9991  Episode Reward:  -9964
xxxxx
x  .x
x  .x
x. gx
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[44384.74795214 66685.481096   -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [44384.74795214 66685.481096   -4584.50430574 -1713.91177491]
New Q values:  [44384.74795214 31012.94946041 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8272.01908388 -6442.16912869 -8192.20126966 14444.52340669]
------
Step:2, Action:West
State  288
Old Q Values:  [ 8272.01908388 -6442.16912869 -8192.20126966 14444.52340669]
New Q values:  [ 8272.01908388 -6442.16912869 -8192.20126966  7962.26900226]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4970.94943132 -8521.23367799  2205.73600612  7263.53213196]
------
Step:3, Action:West
State  272
Old Q Values:  [ 4970.94943132 -8521.23367799  2205.73600612  7263.53213196]
New Q values:  [ 4970.94943132 -8521.23367799  2205.73600612 16213.42238117]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44342.0317613  18493.12311579 17879.58324284  1875.31501677]
------
Step:4, Action:North
State  260
Old Q Values:  [ 1309.08037499 -5704.51612281  4681.44891327 -5679.36893145]
New Q values:  [ 2077.04166418 -5704.51612281  4681.44891327 -5679.36893145]
Reward: 9  Episode Reward:  36
xxxxx
x...x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  5160.03171396     0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [1841.01681811 2335.80779431 4769.59868587  358.5166536 ]
New Q values:  [1841.01681811 2335.80779431 4353.05947418  358.5166536 ]
Reward: -1  Episode Reward:  35
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  8.15273333e+03  1.03161518e+03]
------
Step:6, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  8.15273333e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  1.71865093e+04  1.03161518e+03]
Reward: -1  Episode Reward:  34
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[46420.05307343  4776.04807156   790.72804752  1050.85266124]
------
Step:7, Action:North
State  210
Old Q Values:  [46420.05307343  4776.04807156   790.72804752  1050.85266124]
New Q values:  [44474.22285081  4776.04807156   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  43
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  3578.41708624  -180.00807518 86336.00540478]
------
Step:8, Action:West
State  130
Old Q Values:  [18222.84073342  3578.41708624  -180.00807518 86336.00540478]
New Q values:  [18222.84073342  3578.41708624  -180.00807518 78011.46186798]
Reward: 9  Episode Reward:  52
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   18106.71132503 144905.53235357]
------
Step:9, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   18106.71132503 144905.53235357]
New Q values:  [  -180.6          3557.6642036   18106.71132503 131138.21658017]
Reward: 100009  Episode Reward:  100061
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.08939106e+04 2.18184092e+04 1.27673579e+04]
------
Step:1, Action:East
State  200
Old Q Values:  [   62.8218634  15441.09421105   412.29309875   568.38654082]
New Q values:  [   62.8218634  15441.09421105   820.50864311   568.38654082]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  469.80385767  2167.30467869 -8896.20691497  -992.77096491]
------
Step:2, Action:South
State  208
Old Q Values:  [44384.74795214 31012.94946041 -4584.50430574 -1713.91177491]
New Q values:  [44384.74795214 14892.18550933 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8272.01908388 -6442.16912869 -8192.20126966  7962.26900226]
------
Step:3, Action:North
State  288
Old Q Values:  [ 8272.01908388 -6442.16912869 -8192.20126966  7962.26900226]
New Q values:  [16623.63201919 -6442.16912869 -8192.20126966  7962.26900226]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[44384.74795214 14892.18550933 -4584.50430574 -1713.91177491]
------
Step:4, Action:North
State  216
Old Q Values:  [  469.80385767  2167.30467869 -8896.20691497  -992.77096491]
New Q values:  [  478.42433196  2167.30467869 -8896.20691497  -992.77096491]
Reward: 9  Episode Reward:  26
xxxxx
x.gax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   950.34262965 -2383.80019164   213.35313412]
------
Step:5, Action:South
State  136
Old Q Values:  [ -170.77177351   950.34262965 -2383.80019164   213.35313412]
New Q values:  [ -170.77177351  1029.72845547 -2383.80019164   213.35313412]
Reward: -1  Episode Reward:  25
xxxxx
xg. x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  478.42433196  2167.30467869 -8896.20691497  -992.77096491]
------
Step:6, Action:South
State  216
Old Q Values:  [  478.42433196  2167.30467869 -8896.20691497  -992.77096491]
New Q values:  [  478.42433196  5853.41147723 -8896.20691497  -992.77096491]
Reward: -1  Episode Reward:  24
xxxxx
x.g x
x   x
x..ax
xxxxx
Step:7, Action:North
State  288
Old Q Values:  [16623.63201919 -6442.16912869 -8192.20126966  7962.26900226]
New Q values:  [19964.27719332 -6442.16912869 -8192.20126966  7962.26900226]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[44384.74795214 14892.18550933 -4584.50430574 -1713.91177491]
------
Step:8, Action:North
State  210
Old Q Values:  [44474.22285081  4776.04807156   790.72804752  1050.85266124]
New Q values:  [41192.52770072  4776.04807156   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  22
xxxxx
x..ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  3578.41708624  -180.00807518 78011.46186798]
------
Step:9, Action:West
State  130
Old Q Values:  [18222.84073342  3578.41708624  -180.00807518 78011.46186798]
New Q values:  [18222.84073342  3578.41708624  -180.00807518 70551.44972124]
Reward: 9  Episode Reward:  31
xxxxx
x.a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   18106.71132503 131138.21658017]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548    84.08192403   577.69900812]
New Q values:  [ -281.736      -1150.91067548    84.08192403   307.97683547]
Reward: 9  Episode Reward:  40
xxxxx
xa  x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   238.32410743  -180.6       ]
------
Step:11, Action:East
State  104
Old Q Values:  [-8652.84        2246.80188749   249.55355069 -8652.84      ]
New Q values:  [-8652.84        2246.80188749   246.29174538 -8652.84      ]
Reward: -1  Episode Reward:  39
xxxxx
xga x
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   490.23441701   179.26024029]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548    84.08192403   307.97683547]
New Q values:  [ -281.736      -1150.91067548  2531.11458277   307.97683547]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  8.32693938e+03 -3.22965309e-01  6.37971067e+02]
------
Step:13, Action:South
State  136
Old Q Values:  [ -170.77177351  1029.72845547 -2383.80019164   213.35313412]
New Q values:  [ -170.77177351  2167.31482536 -2383.80019164   213.35313412]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  478.42433196  5853.41147723 -8896.20691497  -992.77096491]
------
Step:14, Action:South
State  216
Old Q Values:  [  478.42433196  5853.41147723 -8896.20691497  -992.77096491]
New Q values:  [  478.42433196  8330.04774889 -8896.20691497  -992.77096491]
Reward: -1  Episode Reward:  36
xxxxx
x g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19964.27719332 -6442.16912869 -8192.20126966  7962.26900226]
------
Step:15, Action:North
State  288
Old Q Values:  [19964.27719332 -6442.16912869 -8192.20126966  7962.26900226]
New Q values:  [10484.12520199 -6442.16912869 -8192.20126966  7962.26900226]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  478.42433196  8330.04774889 -8896.20691497  -992.77096491]
------
Step:16, Action:South
State  216
Old Q Values:  [  478.42433196  8330.04774889 -8896.20691497  -992.77096491]
New Q values:  [  478.42433196  6476.65666015 -8896.20691497  -992.77096491]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10484.12520199 -6442.16912869 -8192.20126966  7962.26900226]
------
Step:17, Action:West
State  288
Old Q Values:  [10484.12520199 -6442.16912869 -8192.20126966  7962.26900226]
New Q values:  [10484.12520199 -6442.16912869 -8192.20126966  8054.33431526]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4970.94943132 -8521.23367799  2205.73600612 16213.42238117]
------
Step:18, Action:West
State  272
Old Q Values:  [ 4970.94943132 -8521.23367799  2205.73600612 16213.42238117]
New Q values:  [ 4970.94943132 -8521.23367799  2205.73600612 79793.37848086]
Reward: 100009  Episode Reward:  100052
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x.g.x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  999.14211613    26.73544252 12519.39862634   123.6214372 ]
------
Step:1, Action:East
State  260
Old Q Values:  [ 2077.04166418 -5704.51612281  4681.44891327 -5679.36893145]
New Q values:  [ 2077.04166418 -5704.51612281  3176.03927589 -5679.36893145]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4326.86570193 -5807.06396197   855.18004824  2779.23498274]
------
Step:2, Action:North
State  276
Old Q Values:  [ 4326.86570193 -5807.06396197   855.18004824  2779.23498274]
New Q values:  [ 8281.66905565 -5807.06396197   855.18004824  2779.23498274]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.08939106e+04 2.18184092e+04 1.27673579e+04]
------
Step:3, Action:East
State  193
Old Q Values:  [-5922.26708831 23215.56498153 18118.4845368   1460.9765133 ]
New Q values:  [-5922.26708831 23215.56498153 20568.21820036  1460.9765133 ]
Reward: 9  Episode Reward:  27
xxxxx
x. gx
x. ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[44384.74795214 14892.18550933 -4584.50430574 -1713.91177491]
------
Step:4, Action:South
State  208
Old Q Values:  [44384.74795214 14892.18550933 -4584.50430574 -1713.91177491]
New Q values:  [44384.74795214  9107.51176433 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  36
xxxxx
x. .x
x. gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10484.12520199 -6442.16912869 -8192.20126966  8054.33431526]
------
Step:5, Action:West
State  288
Old Q Values:  [10484.12520199 -6442.16912869 -8192.20126966  8054.33431526]
New Q values:  [10484.12520199 -6442.16912869 -8192.20126966 27159.14727036]
Reward: -1  Episode Reward:  35
xxxxx
x. .x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4970.94943132 -8521.23367799  2205.73600612 79793.37848086]
------
Step:6, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 22019.70401442]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 22109.89113416]
Reward: -1  Episode Reward:  34
xxxxx
x. .x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44342.0317613  18493.12311579 17879.58324284  1875.31501677]
------
Step:7, Action:North
State  257
Old Q Values:  [44342.0317613  18493.12311579 17879.58324284  1875.31501677]
New Q values:  [50491.18271269 18493.12311579 17879.58324284  1875.31501677]
Reward: 9  Episode Reward:  43
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738   7909.82047302      0.        ]
------
Step:8, Action:North
State  181
Old Q Values:  [ 855.20665396  445.64685634 1977.71602231  262.76946019]
New Q values:  [1466.88189305  445.64685634 1977.71602231  262.76946019]
Reward: 9  Episode Reward:  52
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         3731.33077154    0.            0.        ]
------
Step:9, Action:South
State  103
Old Q Values:  [ 221.30610858 2297.75268242  238.35800069    0.        ]
New Q values:  [ 221.30610858 1511.81587966  238.35800069    0.        ]
Reward: -1  Episode Reward:  51
xxxxx
x  .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1466.88189305  445.64685634 1977.71602231  262.76946019]
------
Step:10, Action:North
State  183
Old Q Values:  [1841.01681811 2335.80779431 4353.05947418  358.5166536 ]
New Q values:  [1189.35149114 2335.80779431 4353.05947418  358.5166536 ]
Reward: -1  Episode Reward:  50
xxxxx
xa .x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1511.81587966  238.35800069    0.        ]
------
Step:11, Action:South
State  103
Old Q Values:  [ 221.30610858 1511.81587966  238.35800069    0.        ]
New Q values:  [ 221.30610858 1197.44115856  238.35800069    0.        ]
Reward: -1  Episode Reward:  49
xxxxx
x  .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1466.88189305  445.64685634 1977.71602231  262.76946019]
------
Step:12, Action:North
State  181
Old Q Values:  [1466.88189305  445.64685634 1977.71602231  262.76946019]
New Q values:  [ 945.38510479  445.64685634 1977.71602231  262.76946019]
Reward: -1  Episode Reward:  48
xxxxx
xa .x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1197.44115856  238.35800069    0.        ]
------
Step:13, Action:South
State  103
Old Q Values:  [ 221.30610858 1197.44115856  238.35800069    0.        ]
New Q values:  [ 221.30610858 1784.29430568  238.35800069    0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x  .x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1189.35149114 2335.80779431 4353.05947418  358.5166536 ]
------
Step:14, Action:East
State  183
Old Q Values:  [1189.35149114 2335.80779431 4353.05947418  358.5166536 ]
New Q values:  [1189.35149114 2335.80779431 5306.65125817  358.5166536 ]
Reward: -1  Episode Reward:  46
xxxxx
x  .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.18867582e+04  0.00000000e+00]
------
Step:15, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -2.00610230e+02  1.18867582e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.00610230e+02  1.71118616e+04  0.00000000e+00]
Reward: -1  Episode Reward:  45
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[41192.52770072  4776.04807156   790.72804752  1050.85266124]
------
Step:16, Action:North
State  208
Old Q Values:  [44384.74795214  9107.51176433 -4584.50430574 -1713.91177491]
New Q values:  [98924.73409723  9107.51176433 -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100054
xxxxx
x  ax
xg  x
x   x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10484.12520199 -6442.16912869 -8192.20126966 27159.14727036]
------
Step:1, Action:West
State  288
Old Q Values:  [10484.12520199 -6442.16912869 -8192.20126966 27159.14727036]
New Q values:  [10484.12520199 -6442.16912869 -8192.20126966 17502.02624839]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 22109.89113416]
------
Step:2, Action:West
State  272
Old Q Values:  [ 4970.94943132 -8521.23367799  2205.73600612 79793.37848086]
New Q values:  [ 4970.94943132 -8521.23367799  2205.73600612 35678.57098024]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  999.14211613    26.73544252 12519.39862634   123.6214372 ]
------
Step:3, Action:North
State  260
Old Q Values:  [ 2077.04166418 -5704.51612281  3176.03927589 -5679.36893145]
New Q values:  [ 2384.22617986 -5704.51612281  3176.03927589 -5679.36893145]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  5160.03171396     0.        ]
------
Step:4, Action:East
State  180
Old Q Values:  [  150.2741814   8050.99445658 11204.63370976 -4966.32149798]
New Q values:  [  150.2741814   8050.99445658 11032.77625878 -4966.32149798]
Reward: 9  Episode Reward:  36
xxxxx
x...x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.08939106e+04 2.18184092e+04 1.27673579e+04]
------
Step:5, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.08939106e+04 2.18184092e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 1.08939106e+04 3.84041839e+04 1.27673579e+04]
Reward: -1  Episode Reward:  35
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[98924.73409723  9107.51176433 -4584.50430574 -1713.91177491]
------
Step:6, Action:North
State  208
Old Q Values:  [98924.73409723  9107.51176433 -4584.50430574 -1713.91177491]
New Q values:  [55033.55406618  9107.51176433 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  44
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[11374.93691792  6365.91482369 -8652.84       51527.53475761]
------
Step:7, Action:North
State  128
Old Q Values:  [11374.93691792  6365.91482369 -8652.84       51527.53475761]
New Q values:  [19827.63519445  6365.91482369 -8652.84       51527.53475761]
Reward: -301  Episode Reward:  -257
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[19827.63519445  6365.91482369 -8652.84       51527.53475761]
------
Step:8, Action:West
State  128
Old Q Values:  [19827.63519445  6365.91482369 -8652.84       51527.53475761]
New Q values:  [19827.63519445  6365.91482369 -8652.84       47901.34282192]
Reward: -9991  Episode Reward:  -10248
xxxxx
x.g x
x   x
x   x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  8.32693938e+03 -3.22965309e-01  6.37971067e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  8.32693938e+03 -3.22965309e-01  6.37971067e+02]
New Q values:  [ 8.43634063e+00  1.56939341e+04 -3.22965309e-01  6.37971067e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[41192.52770072  4776.04807156   790.72804752  1050.85266124]
------
Step:2, Action:North
State  210
Old Q Values:  [41192.52770072  4776.04807156   790.72804752  1050.85266124]
New Q values:  [21184.59129862  4776.04807156   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.56939341e+04 -3.22965309e-01  6.37971067e+02]
------
Step:3, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.56939341e+04 -3.22965309e-01  6.37971067e+02]
New Q values:  [ 8.43634063e+00  1.26323510e+04 -3.22965309e-01  6.37971067e+02]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21184.59129862  4776.04807156   790.72804752  1050.85266124]
------
Step:4, Action:North
State  208
Old Q Values:  [55033.55406618  9107.51176433 -4584.50430574 -1713.91177491]
New Q values:  [25802.52693068  9107.51176433 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  6
xxxxx
x..ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.26323510e+04 -3.22965309e-01  6.37971067e+02]
------
Step:5, Action:South
State  136
Old Q Values:  [ -170.77177351  2167.31482536 -2383.80019164   213.35313412]
New Q values:  [ -170.77177351  2809.32292819 -2383.80019164   213.35313412]
Reward: -1  Episode Reward:  5
xxxxx
x.g x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  478.42433196  6476.65666015 -8896.20691497  -992.77096491]
------
Step:6, Action:South
State  208
Old Q Values:  [25802.52693068  9107.51176433 -4584.50430574 -1713.91177491]
New Q values:  [25802.52693068  8899.01258025 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  14
xxxxx
x..gx
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10484.12520199 -6442.16912869 -8192.20126966 17502.02624839]
------
Step:7, Action:West
State  288
Old Q Values:  [10484.12520199 -6442.16912869 -8192.20126966 17502.02624839]
New Q values:  [10484.12520199 -6442.16912869 -8192.20126966 13639.1778396 ]
Reward: 9  Episode Reward:  23
xxxxx
x.. x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 22109.89113416]
------
Step:8, Action:West
State  272
Old Q Values:  [ 4970.94943132 -8521.23367799  2205.73600612 35678.57098024]
New Q values:  [ 4970.94943132 -8521.23367799  2205.73600612 29424.1832059 ]
Reward: 9  Episode Reward:  32
xxxxx
x.. x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[50491.18271269 18493.12311579 17879.58324284  1875.31501677]
------
Step:9, Action:North
State  257
Old Q Values:  [50491.18271269 18493.12311579 17879.58324284  1875.31501677]
New Q values:  [52950.84309325 18493.12311579 17879.58324284  1875.31501677]
Reward: 9  Episode Reward:  41
xxxxx
x.g x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738   7909.82047302      0.        ]
------
Step:10, Action:North
State  180
Old Q Values:  [  150.2741814   8050.99445658 11032.77625878 -4966.32149798]
New Q values:  [-3331.57111805  8050.99445658 11032.77625878 -4966.32149798]
Reward: -9991  Episode Reward:  -9950
xxxxx
xg. x
x   x
x   x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21184.59129862  4776.04807156   790.72804752  1050.85266124]
------
Step:1, Action:North
State  210
Old Q Values:  [21184.59129862  4776.04807156   790.72804752  1050.85266124]
New Q values:  [12268.94182365  4776.04807156   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.26323510e+04 -3.22965309e-01  6.37971067e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.26323510e+04 -3.22965309e-01  6.37971067e+02]
New Q values:  [ 8.43634063e+00  8.73302295e+03 -3.22965309e-01  6.37971067e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[12268.94182365  4776.04807156   790.72804752  1050.85266124]
------
Step:3, Action:North
State  208
Old Q Values:  [25802.52693068  8899.01258025 -4584.50430574 -1713.91177491]
New Q values:  [12940.31765808  8899.01258025 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  8.73302295e+03 -3.22965309e-01  6.37971067e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  8.73302295e+03 -3.22965309e-01  6.37971067e+02]
New Q values:  [ 8.43634063e+00  7.37470448e+03 -3.22965309e-01  6.37971067e+02]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12940.31765808  8899.01258025 -4584.50430574 -1713.91177491]
------
Step:5, Action:North
State  210
Old Q Values:  [12268.94182365  4776.04807156   790.72804752  1050.85266124]
New Q values:  [7119.38807301 4776.04807156  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.37470448e+03 -3.22965309e-01  6.37971067e+02]
------
Step:6, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  7.37470448e+03 -3.22965309e-01  6.37971067e+02]
New Q values:  [ 8.43634063e+00  6.83137709e+03 -3.22965309e-01  6.37971067e+02]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12940.31765808  8899.01258025 -4584.50430574 -1713.91177491]
------
Step:7, Action:North
State  208
Old Q Values:  [12940.31765808  8899.01258025 -4584.50430574 -1713.91177491]
New Q values:  [ 7224.94018988  8899.01258025 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  3
xxxxx
x..ax
x..gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  6.83137709e+03 -3.22965309e-01  6.37971067e+02]
------
Step:8, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  6.83137709e+03 -3.22965309e-01  6.37971067e+02]
New Q values:  [ 8.43634063e+00  6.83137709e+03 -3.22965309e-01  1.01992280e+03]
Reward: 9  Episode Reward:  12
xxxxx
x.a x
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2531.11458277   307.97683547]
------
Step:9, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2531.11458277   307.97683547]
New Q values:  [ -281.736      -1150.91067548  3061.25895976   307.97683547]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  6.83137709e+03 -3.22965309e-01  1.01992280e+03]
------
Step:10, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  6.83137709e+03 -3.22965309e-01  1.01992280e+03]
New Q values:  [ 8.43634063e+00  5.40165461e+03 -3.22965309e-01  1.01992280e+03]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7224.94018988  8899.01258025 -4584.50430574 -1713.91177491]
------
Step:11, Action:South
State  208
Old Q Values:  [ 7224.94018988  8899.01258025 -4584.50430574 -1713.91177491]
New Q values:  [ 7224.94018988  7656.75838398 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  19
xxxxx
x.g x
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10484.12520199 -6442.16912869 -8192.20126966 13639.1778396 ]
------
Step:12, Action:West
State  288
Old Q Values:  [10484.12520199 -6442.16912869 -8192.20126966 13639.1778396 ]
New Q values:  [10484.12520199 -6442.16912869 -8192.20126966 14288.32609761]
Reward: 9  Episode Reward:  28
xxxxx
x.  x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4970.94943132 -8521.23367799  2205.73600612 29424.1832059 ]
------
Step:13, Action:West
State  272
Old Q Values:  [ 4970.94943132 -8521.23367799  2205.73600612 29424.1832059 ]
New Q values:  [ 4970.94943132 -8521.23367799  2205.73600612 27654.32621034]
Reward: -1  Episode Reward:  27
xxxxx
x.g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[52950.84309325 18493.12311579 17879.58324284  1875.31501677]
------
Step:14, Action:North
State  260
Old Q Values:  [ 2384.22617986 -5704.51612281  3176.03927589 -5679.36893145]
New Q values:  [ 4268.92334958 -5704.51612281  3176.03927589 -5679.36893145]
Reward: 9  Episode Reward:  36
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-3331.57111805  8050.99445658 11032.77625878 -4966.32149798]
------
Step:15, Action:East
State  177
Old Q Values:  [109163.23336057  23344.73803738   7909.82047302      0.        ]
New Q values:  [109163.23336057  23344.73803738  14690.58336791      0.        ]
Reward: 9  Episode Reward:  45
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.08939106e+04 3.84041839e+04 1.27673579e+04]
------
Step:16, Action:East
State  193
Old Q Values:  [-5922.26708831 23215.56498153 20568.21820036  1460.9765133 ]
New Q values:  [-5922.26708831 23215.56498153 10523.71479534  1460.9765133 ]
Reward: -1  Episode Reward:  44
xxxxx
x. gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7224.94018988  7656.75838398 -4584.50430574 -1713.91177491]
------
Step:17, Action:South
State  208
Old Q Values:  [ 7224.94018988  7656.75838398 -4584.50430574 -1713.91177491]
New Q values:  [ 7224.94018988  7348.60118288 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  43
xxxxx
x.g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10484.12520199 -6442.16912869 -8192.20126966 14288.32609761]
------
Step:18, Action:West
State  288
Old Q Values:  [10484.12520199 -6442.16912869 -8192.20126966 14288.32609761]
New Q values:  [10484.12520199 -6442.16912869 -8192.20126966 12347.69777929]
Reward: -1  Episode Reward:  42
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 22109.89113416]
------
Step:19, Action:West
State  272
Old Q Values:  [ 4970.94943132 -8521.23367799  2205.73600612 27654.32621034]
New Q values:  [ 4970.94943132 -8521.23367799  2205.73600612 26946.38341211]
Reward: -1  Episode Reward:  41
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[52950.84309325 18493.12311579 17879.58324284  1875.31501677]
------
Step:20, Action:North
State  257
Old Q Values:  [52950.84309325 18493.12311579 17879.58324284  1875.31501677]
New Q values:  [53928.70724547 18493.12311579 17879.58324284  1875.31501677]
Reward: -1  Episode Reward:  40
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  14690.58336791      0.        ]
------
Step:21, Action:North
State  176
Old Q Values:  [ 76485.61294353   1621.55095326 110360.20137974      0.        ]
New Q values:  [103770.24881615   1621.55095326 110360.20137974      0.        ]
Reward: 100009  Episode Reward:  100049
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  5.40165461e+03 -3.22965309e-01  1.01992280e+03]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  5.40165461e+03 -3.22965309e-01  1.01992280e+03]
New Q values:  [ 8.43634063e+00  4.37064220e+03 -3.22965309e-01  1.01992280e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7224.94018988  7348.60118288 -4584.50430574 -1713.91177491]
------
Step:2, Action:South
State  210
Old Q Values:  [7119.38807301 4776.04807156  790.72804752 1050.85266124]
New Q values:  [7119.38807301 5620.12856241  790.72804752 1050.85266124]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10484.12520199 -6442.16912869 -8192.20126966 12347.69777929]
------
Step:3, Action:North
State  288
Old Q Values:  [10484.12520199 -6442.16912869 -8192.20126966 12347.69777929]
New Q values:  [ 6328.8665027  -6442.16912869 -8192.20126966 12347.69777929]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7119.38807301 5620.12856241  790.72804752 1050.85266124]
------
Step:4, Action:North
State  210
Old Q Values:  [7119.38807301 5620.12856241  790.72804752 1050.85266124]
New Q values:  [4158.34788882 5620.12856241  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.37064220e+03 -3.22965309e-01  1.01992280e+03]
------
Step:5, Action:South
State  130
Old Q Values:  [18222.84073342  3578.41708624  -180.00807518 70551.44972124]
New Q values:  [18222.84073342  3116.80540322  -180.00807518 70551.44972124]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4158.34788882 5620.12856241  790.72804752 1050.85266124]
------
Step:6, Action:South
State  210
Old Q Values:  [4158.34788882 5620.12856241  790.72804752 1050.85266124]
New Q values:  [4158.34788882 5951.76075875  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6328.8665027  -6442.16912869 -8192.20126966 12347.69777929]
------
Step:7, Action:North
State  288
Old Q Values:  [ 6328.8665027  -6442.16912869 -8192.20126966 12347.69777929]
New Q values:  [ 4735.52695594 -6442.16912869 -8192.20126966 12347.69777929]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7224.94018988  7348.60118288 -4584.50430574 -1713.91177491]
------
Step:8, Action:South
State  208
Old Q Values:  [ 7224.94018988  7348.60118288 -4584.50430574 -1713.91177491]
New Q values:  [ 7224.94018988  6643.14980694 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  12
xxxxx
x.. x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4735.52695594 -6442.16912869 -8192.20126966 12347.69777929]
------
Step:9, Action:West
State  288
Old Q Values:  [ 4735.52695594 -6442.16912869 -8192.20126966 12347.69777929]
New Q values:  [ 4735.52695594 -6442.16912869 -8192.20126966 13022.39413535]
Reward: -1  Episode Reward:  11
xxxxx
xg. x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4970.94943132 -8521.23367799  2205.73600612 26946.38341211]
------
Step:10, Action:West
State  272
Old Q Values:  [ 4970.94943132 -8521.23367799  2205.73600612 26946.38341211]
New Q values:  [ 4970.94943132 -8521.23367799  2205.73600612 26962.56553848]
Reward: 9  Episode Reward:  20
xxxxx
x.g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[53928.70724547 18493.12311579 17879.58324284  1875.31501677]
------
Step:11, Action:North
State  257
Old Q Values:  [53928.70724547 18493.12311579 17879.58324284  1875.31501677]
New Q values:  [54325.85290636 18493.12311579 17879.58324284  1875.31501677]
Reward: 9  Episode Reward:  29
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  14690.58336791      0.        ]
------
Step:12, Action:North
State  181
Old Q Values:  [ 945.38510479  445.64685634 1977.71602231  262.76946019]
New Q values:  [ 918.84233362  445.64685634 1977.71602231  262.76946019]
Reward: 9  Episode Reward:  38
xxxxx
xa. x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1784.29430568  238.35800069    0.        ]
------
Step:13, Action:South
State  103
Old Q Values:  [ 221.30610858 1784.29430568  238.35800069    0.        ]
New Q values:  [ 221.30610858 2305.11309972  238.35800069    0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x . x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1189.35149114 2335.80779431 5306.65125817  358.5166536 ]
------
Step:14, Action:East
State  181
Old Q Values:  [ 918.84233362  445.64685634 1977.71602231  262.76946019]
New Q values:  [ 918.84233362  445.64685634 7761.15590338  262.76946019]
Reward: 9  Episode Reward:  46
xxxxx
x . x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 23215.56498153 10523.71479534  1460.9765133 ]
------
Step:15, Action:South
State  193
Old Q Values:  [-5922.26708831 23215.56498153 10523.71479534  1460.9765133 ]
New Q values:  [-5922.26708831 15918.59333286 10523.71479534  1460.9765133 ]
Reward: -1  Episode Reward:  45
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 22109.89113416]
------
Step:16, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 22109.89113416]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 12599.17604156]
Reward: -1  Episode Reward:  44
xxxxx
x . x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  999.14211613    26.73544252 12519.39862634   123.6214372 ]
------
Step:17, Action:East
State  261
Old Q Values:  [  999.14211613    26.73544252 12519.39862634   123.6214372 ]
New Q values:  [  999.14211613    26.73544252 13095.92911208   123.6214372 ]
Reward: -1  Episode Reward:  43
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4970.94943132 -8521.23367799  2205.73600612 26962.56553848]
------
Step:18, Action:West
State  272
Old Q Values:  [ 4970.94943132 -8521.23367799  2205.73600612 26962.56553848]
New Q values:  [ 4970.94943132 -8521.23367799  2205.73600612 12065.10322027]
Reward: -1  Episode Reward:  42
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4268.92334958 -5704.51612281  3176.03927589 -5679.36893145]
------
Step:19, Action:East
State  260
Old Q Values:  [ 4268.92334958 -5704.51612281  3176.03927589 -5679.36893145]
New Q values:  [ 4268.92334958 -5704.51612281  4889.34667644 -5679.36893145]
Reward: -1  Episode Reward:  41
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4970.94943132 -8521.23367799  2205.73600612 12065.10322027]
------
Step:20, Action:West
State  272
Old Q Values:  [ 4970.94943132 -8521.23367799  2205.73600612 12065.10322027]
New Q values:  [ 4970.94943132 -8521.23367799  2205.73600612  6292.24529104]
Reward: -1  Episode Reward:  40
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4268.92334958 -5704.51612281  4889.34667644 -5679.36893145]
------
Step:21, Action:East
State  260
Old Q Values:  [ 4268.92334958 -5704.51612281  4889.34667644 -5679.36893145]
New Q values:  [ 4268.92334958 -5704.51612281  3842.81225789 -5679.36893145]
Reward: -1  Episode Reward:  39
xxxxx
x . x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4970.94943132 -8521.23367799  2205.73600612  6292.24529104]
------
Step:22, Action:North
State  272
Old Q Values:  [ 4970.94943132 -8521.23367799  2205.73600612  6292.24529104]
New Q values:  [13509.03495123 -8521.23367799  2205.73600612  6292.24529104]
Reward: -1  Episode Reward:  38
xxxxx
x . x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.08939106e+04 3.84041839e+04 1.27673579e+04]
------
Step:23, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.08939106e+04 3.84041839e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 1.08939106e+04 1.75285556e+04 1.27673579e+04]
Reward: -1  Episode Reward:  37
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7224.94018988  6643.14980694 -4584.50430574 -1713.91177491]
------
Step:24, Action:North
State  208
Old Q Values:  [ 7224.94018988  6643.14980694 -4584.50430574 -1713.91177491]
New Q values:  [17259.77892253  6643.14980694 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  36
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[19827.63519445  6365.91482369 -8652.84       47901.34282192]
------
Step:25, Action:North
State  128
Old Q Values:  [19827.63519445  6365.91482369 -8652.84       47901.34282192]
New Q values:  [22120.85692436  6365.91482369 -8652.84       47901.34282192]
Reward: -301  Episode Reward:  -265
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[22120.85692436  6365.91482369 -8652.84       47901.34282192]
------
Step:26, Action:West
State  128
Old Q Values:  [22120.85692436  6365.91482369 -8652.84       47901.34282192]
New Q values:  [ 22120.85692436   6365.91482369  -8652.84       106450.86604764]
Reward: 90009  Episode Reward:  89744
xxxxx
x g x
x   x
x   x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17259.77892253  6643.14980694 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [17259.77892253  6643.14980694 -4584.50430574 -1713.91177491]
New Q values:  [17259.77892253  6569.37816338 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4735.52695594 -6442.16912869 -8192.20126966 13022.39413535]
------
Step:2, Action:West
State  288
Old Q Values:  [ 4735.52695594 -6442.16912869 -8192.20126966 13022.39413535]
New Q values:  [ 4735.52695594 -6442.16912869 -8192.20126966  9267.06813951]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[13509.03495123 -8521.23367799  2205.73600612  6292.24529104]
------
Step:3, Action:West
State  272
Old Q Values:  [13509.03495123 -8521.23367799  2205.73600612  6292.24529104]
New Q values:  [13509.03495123 -8521.23367799  2205.73600612 18820.05398832]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[54325.85290636 18493.12311579 17879.58324284  1875.31501677]
------
Step:4, Action:North
State  261
Old Q Values:  [  999.14211613    26.73544252 13095.92911208   123.6214372 ]
New Q values:  [ 2733.40361747    26.73544252 13095.92911208   123.6214372 ]
Reward: 9  Episode Reward:  36
xxxxx
x..gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 918.84233362  445.64685634 7761.15590338  262.76946019]
------
Step:5, Action:East
State  177
Old Q Values:  [109163.23336057  23344.73803738  14690.58336791      0.        ]
New Q values:  [109163.23336057  23344.73803738  11140.20003573      0.        ]
Reward: 9  Episode Reward:  45
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.08939106e+04 1.75285556e+04 1.27673579e+04]
------
Step:6, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.08939106e+04 1.75285556e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 1.08939106e+04 1.21887559e+04 1.27673579e+04]
Reward: -1  Episode Reward:  44
xxxxx
x.. x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17259.77892253  6569.37816338 -4584.50430574 -1713.91177491]
------
Step:7, Action:North
State  208
Old Q Values:  [17259.77892253  6569.37816338 -4584.50430574 -1713.91177491]
New Q values:  [38838.5713833   6569.37816338 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  43
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 22120.85692436   6365.91482369  -8652.84       106450.86604764]
------
Step:8, Action:North
State  128
Old Q Values:  [ 22120.85692436   6365.91482369  -8652.84       106450.86604764]
New Q values:  [ 40603.00258404   6365.91482369  -8652.84       106450.86604764]
Reward: -301  Episode Reward:  -258
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 40603.00258404   6365.91482369  -8652.84       106450.86604764]
------
Step:9, Action:West
State  130
Old Q Values:  [18222.84073342  3116.80540322  -180.00807518 70551.44972124]
New Q values:  [18222.84073342  3116.80540322  -180.00807518 67567.44486255]
Reward: 9  Episode Reward:  -249
xxxxx
x.a x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   18106.71132503 131138.21658017]
------
Step:10, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   18106.71132503 131138.21658017]
New Q values:  [  -180.6          3557.6642036   18106.71132503 125631.2902708 ]
Reward: 100009  Episode Reward:  99760
xxxxx
xa  x
x   x
xg  x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4735.52695594 -6442.16912869 -8192.20126966  9267.06813951]
------
Step:1, Action:West
State  288
Old Q Values:  [ 4735.52695594 -6442.16912869 -8192.20126966  9267.06813951]
New Q values:  [ 4735.52695594 -6442.16912869 -8192.20126966  9358.2434523 ]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[13509.03495123 -8521.23367799  2205.73600612 18820.05398832]
------
Step:2, Action:West
State  272
Old Q Values:  [13509.03495123 -8521.23367799  2205.73600612 18820.05398832]
New Q values:  [13509.03495123 -8521.23367799  2205.73600612  8814.0986002 ]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4268.92334958 -5704.51612281  3842.81225789 -5679.36893145]
------
Step:3, Action:North
State  260
Old Q Values:  [ 4268.92334958 -5704.51612281  3842.81225789 -5679.36893145]
New Q values:  [ -977.19778254 -5704.51612281  3842.81225789 -5679.36893145]
Reward: -9991  Episode Reward:  -9973
xxxxx
x ..x
xg..x
x   x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2733.40361747    26.73544252 13095.92911208   123.6214372 ]
------
Step:1, Action:East
State  261
Old Q Values:  [ 2733.40361747    26.73544252 13095.92911208   123.6214372 ]
New Q values:  [2733.40361747   26.73544252 9023.5244573   123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 12599.17604156]
------
Step:2, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 12599.17604156]
New Q values:  [3915.56039739 -168.92307549 4979.82966255 7746.12775382]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x...x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2733.40361747   26.73544252 9023.5244573   123.6214372 ]
------
Step:3, Action:East
State  261
Old Q Values:  [2733.40361747   26.73544252 9023.5244573   123.6214372 ]
New Q values:  [2733.40361747   26.73544252 1661.52026829  123.6214372 ]
Reward: -10001  Episode Reward:  -9993
xxxxx
x.. x
x...x
x g.x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[38838.5713833   6569.37816338 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [38838.5713833   6569.37816338 -4584.50430574 -1713.91177491]
New Q values:  [38838.5713833   5440.62430104 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4735.52695594 -6442.16912869 -8192.20126966  9358.2434523 ]
------
Step:2, Action:West
State  288
Old Q Values:  [ 4735.52695594 -6442.16912869 -8192.20126966  9358.2434523 ]
New Q values:  [ 4735.52695594 -6442.16912869 -8192.20126966  7801.40786629]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[13509.03495123 -8521.23367799  2205.73600612  8814.0986002 ]
------
Step:3, Action:West
State  272
Old Q Values:  [13509.03495123 -8521.23367799  2205.73600612  8814.0986002 ]
New Q values:  [13509.03495123 -8521.23367799  2205.73600612  4683.88311745]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -977.19778254 -5704.51612281  3842.81225789 -5679.36893145]
------
Step:4, Action:East
State  257
Old Q Values:  [54325.85290636 18493.12311579 17879.58324284  1875.31501677]
New Q values:  [54325.85290636 18493.12311579 11203.9437825   1875.31501677]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[13509.03495123 -8521.23367799  2205.73600612  4683.88311745]
------
Step:5, Action:West
State  272
Old Q Values:  [13509.03495123 -8521.23367799  2205.73600612  4683.88311745]
New Q values:  [13509.03495123 -8521.23367799  2205.73600612  3025.79692434]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -977.19778254 -5704.51612281  3842.81225789 -5679.36893145]
------
Step:6, Action:East
State  260
Old Q Values:  [ -977.19778254 -5704.51612281  3842.81225789 -5679.36893145]
New Q values:  [ -977.19778254 -5704.51612281  5589.23538852 -5679.36893145]
Reward: -1  Episode Reward:  24
xxxxx
xg. x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[13509.03495123 -8521.23367799  2205.73600612  3025.79692434]
------
Step:7, Action:North
State  272
Old Q Values:  [13509.03495123 -8521.23367799  2205.73600612  3025.79692434]
New Q values:  [ 9239.2213531  -8521.23367799  2205.73600612  3025.79692434]
Reward: 9  Episode Reward:  33
xxxxx
x.. x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.08939106e+04 1.21887559e+04 1.27673579e+04]
------
Step:8, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  1.71865093e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  8.65953193e+03  1.03161518e+03]
Reward: -1  Episode Reward:  32
xxxxx
x.. x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4158.34788882 5951.76075875  790.72804752 1050.85266124]
------
Step:9, Action:South
State  210
Old Q Values:  [4158.34788882 5951.76075875  790.72804752 1050.85266124]
New Q values:  [4158.34788882 4720.52666339  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  31
xxxxx
x.. x
x.  x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4735.52695594 -6442.16912869 -8192.20126966  7801.40786629]
------
Step:10, Action:West
State  288
Old Q Values:  [ 4735.52695594 -6442.16912869 -8192.20126966  7801.40786629]
New Q values:  [ 4735.52695594 -6442.16912869 -8192.20126966  5891.72955245]
Reward: -1  Episode Reward:  30
xxxxx
x.. x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9239.2213531  -8521.23367799  2205.73600612  3025.79692434]
------
Step:11, Action:North
State  272
Old Q Values:  [ 9239.2213531  -8521.23367799  2205.73600612  3025.79692434]
New Q values:  [ 6292.94812015 -8521.23367799  2205.73600612  3025.79692434]
Reward: -1  Episode Reward:  29
xxxxx
x.. x
x.a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  8.65953193e+03  1.03161518e+03]
------
Step:12, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  8.65953193e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  4.87937077e+03  1.03161518e+03]
Reward: -1  Episode Reward:  28
xxxxx
x.. x
x. ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4158.34788882 4720.52666339  790.72804752 1050.85266124]
------
Step:13, Action:South
State  208
Old Q Values:  [38838.5713833   5440.62430104 -4584.50430574 -1713.91177491]
New Q values:  [38838.5713833   3943.16858615 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  27
xxxxx
x.. x
x.g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4735.52695594 -6442.16912869 -8192.20126966  5891.72955245]
------
Step:14, Action:West
State  288
Old Q Values:  [ 4735.52695594 -6442.16912869 -8192.20126966  5891.72955245]
New Q values:  [ 4735.52695594 -6442.16912869 -8192.20126966  4243.97625702]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6292.94812015 -8521.23367799  2205.73600612  3025.79692434]
------
Step:15, Action:North
State  272
Old Q Values:  [ 6292.94812015 -8521.23367799  2205.73600612  3025.79692434]
New Q values:  [ 6346.78662067 -8521.23367799  2205.73600612  3025.79692434]
Reward: -1  Episode Reward:  25
xxxxx
xg. x
x.a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.08939106e+04 1.21887559e+04 1.27673579e+04]
------
Step:16, Action:West
State  192
Old Q Values:  [3.89777037e-01 1.08939106e+04 1.21887559e+04 1.27673579e+04]
New Q values:  [3.89777037e-01 1.08939106e+04 1.21887559e+04 3.22204036e+04]
Reward: -9991  Episode Reward:  -9966
xxxxx
x.. x
xg  x
x   x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.37064220e+03 -3.22965309e-01  1.01992280e+03]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  4.37064220e+03 -3.22965309e-01  1.01992280e+03]
New Q values:  [ 8.43634063e+00  1.34052283e+04 -3.22965309e-01  1.01992280e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[38838.5713833   3943.16858615 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  208
Old Q Values:  [38838.5713833   3943.16858615 -4584.50430574 -1713.91177491]
New Q values:  [19556.39704166  3943.16858615 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.34052283e+04 -3.22965309e-01  1.01992280e+03]
------
Step:3, Action:South
State  136
Old Q Values:  [ -170.77177351  2809.32292819 -2383.80019164   213.35313412]
New Q values:  [ -170.77177351  6990.04828377 -2383.80019164   213.35313412]
Reward: -1  Episode Reward:  7
xxxxx
xg. x
x..ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19556.39704166  3943.16858615 -4584.50430574 -1713.91177491]
------
Step:4, Action:North
State  208
Old Q Values:  [19556.39704166  3943.16858615 -4584.50430574 -1713.91177491]
New Q values:  [11843.52730501  3943.16858615 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  6
xxxxx
x..ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.34052283e+04 -3.22965309e-01  1.01992280e+03]
------
Step:5, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.34052283e+04 -3.22965309e-01  1.01992280e+03]
New Q values:  [ 8.43634063e+00  8.91454951e+03 -3.22965309e-01  1.01992280e+03]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11843.52730501  3943.16858615 -4584.50430574 -1713.91177491]
------
Step:6, Action:North
State  210
Old Q Values:  [4158.34788882 4720.52666339  790.72804752 1050.85266124]
New Q values:  [4337.10400831 4720.52666339  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  4
xxxxx
x..ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  8.91454951e+03 -3.22965309e-01  1.01992280e+03]
------
Step:7, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  8.91454951e+03 -3.22965309e-01  1.01992280e+03]
New Q values:  [ 8.43634063e+00  7.11827800e+03 -3.22965309e-01  1.01992280e+03]
Reward: -1  Episode Reward:  3
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11843.52730501  3943.16858615 -4584.50430574 -1713.91177491]
------
Step:8, Action:North
State  208
Old Q Values:  [11843.52730501  3943.16858615 -4584.50430574 -1713.91177491]
New Q values:  [ 6833.82540714  3943.16858615 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  2
xxxxx
x.gax
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  6990.04828377 -2383.80019164   213.35313412]
------
Step:9, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  7.11827800e+03 -3.22965309e-01  1.01992280e+03]
New Q values:  [ 8.43634063e+00  4.89685882e+03 -3.22965309e-01  1.01992280e+03]
Reward: -1  Episode Reward:  1
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6833.82540714  3943.16858615 -4584.50430574 -1713.91177491]
------
Step:10, Action:North
State  210
Old Q Values:  [4337.10400831 4720.52666339  790.72804752 1050.85266124]
New Q values:  [3203.29924939 4720.52666339  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  0
xxxxx
x..ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.89685882e+03 -3.22965309e-01  1.01992280e+03]
------
Step:11, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  4.89685882e+03 -3.22965309e-01  1.01992280e+03]
New Q values:  [ 8.43634063e+00  4.00829115e+03 -3.22965309e-01  1.01992280e+03]
Reward: -1  Episode Reward:  -1
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6833.82540714  3943.16858615 -4584.50430574 -1713.91177491]
------
Step:12, Action:North
State  208
Old Q Values:  [ 6833.82540714  3943.16858615 -4584.50430574 -1713.91177491]
New Q values:  [ 3935.41750792  3943.16858615 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -2
xxxxx
x..ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.00829115e+03 -3.22965309e-01  1.01992280e+03]
------
Step:13, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  4.00829115e+03 -3.22965309e-01  1.01992280e+03]
New Q values:  [ 8.43634063e+00  4.00829115e+03 -3.22965309e-01  7.85357057e+02]
Reward: 9  Episode Reward:  7
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   688.72853811  1239.95978669]
------
Step:14, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  3061.25895976   307.97683547]
New Q values:  [ -281.736      -1150.91067548  3061.25895976   795.97580437]
Reward: 9  Episode Reward:  16
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 2224.61690061  153.18954677 -252.78192178]
------
Step:15, Action:South
State  107
Old Q Values:  [-252.35169558 2224.61690061  153.18954677 -252.78192178]
New Q values:  [-252.35169558 1121.21998354  153.18954677 -252.78192178]
Reward: 9  Episode Reward:  25
xxxxx
x   x
xag x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 433.98067359    0.          753.24407764 -178.98      ]
------
Step:16, Action:North
State  180
Old Q Values:  [-3331.57111805  8050.99445658 11032.77625878 -4966.32149798]
New Q values:  [-1268.03197772  8050.99445658 11032.77625878 -4966.32149798]
Reward: -1  Episode Reward:  24
xxxxx
xa  x
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 -348.92535679  217.32156498 -180.6       ]
------
Step:17, Action:East
State  108
Old Q Values:  [-8463.16477134  2610.77248427   196.50327584     0.        ]
New Q values:  [-8463.16477134  2610.77248427   225.07163544     0.        ]
Reward: -1  Episode Reward:  23
xxxxx
xga x
x . x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   490.23441701   179.26024029]
------
Step:18, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3061.25895976   795.97580437]
New Q values:  [ -281.736      -1150.91067548  2426.39092897   795.97580437]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.00829115e+03 -3.22965309e-01  7.85357057e+02]
------
Step:19, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  4.00829115e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  3.54571346e+03 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  478.42433196  6476.65666015 -8896.20691497  -992.77096491]
------
Step:20, Action:South
State  208
Old Q Values:  [ 3935.41750792  3943.16858615 -4584.50430574 -1713.91177491]
New Q values:  [ 3935.41750792  3003.32552124 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  30
xxxxx
x g x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4735.52695594 -6442.16912869 -8192.20126966  4243.97625702]
------
Step:21, Action:North
State  288
Old Q Values:  [ 4735.52695594 -6442.16912869 -8192.20126966  4243.97625702]
New Q values:  [ 3074.23603475 -6442.16912869 -8192.20126966  4243.97625702]
Reward: -1  Episode Reward:  29
xxxxx
xg  x
x .ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3935.41750792  3003.32552124 -4584.50430574 -1713.91177491]
------
Step:22, Action:North
State  208
Old Q Values:  [ 3935.41750792  3003.32552124 -4584.50430574 -1713.91177491]
New Q values:  [ 2637.28104061  3003.32552124 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.54571346e+03 -3.22965309e-01  7.85357057e+02]
------
Step:23, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.54571346e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  3.36068238e+03 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  478.42433196  6476.65666015 -8896.20691497  -992.77096491]
------
Step:24, Action:South
State  208
Old Q Values:  [ 2637.28104061  3003.32552124 -4584.50430574 -1713.91177491]
New Q values:  [ 2637.28104061  2473.9230856  -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3074.23603475 -6442.16912869 -8192.20126966  4243.97625702]
------
Step:25, Action:West
State  288
Old Q Values:  [ 3074.23603475 -6442.16912869 -8192.20126966  4243.97625702]
New Q values:  [ 3074.23603475 -6442.16912869 -8192.20126966  3601.02648901]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6346.78662067 -8521.23367799  2205.73600612  3025.79692434]
------
Step:26, Action:North
State  272
Old Q Values:  [ 6346.78662067 -8521.23367799  2205.73600612  3025.79692434]
New Q values:  [ 4007.92587953 -8521.23367799  2205.73600612  3025.79692434]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  4.87937077e+03  1.03161518e+03]
------
Step:27, Action:East
State  200
Old Q Values:  [   62.8218634  15441.09421105   820.50864311   568.38654082]
New Q values:  [   62.8218634  15441.09421105  2270.60045529   568.38654082]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xg ax
x.  x
xxxxx
Step:28, Action:West
State  216
Old Q Values:  [  478.42433196  6476.65666015 -8896.20691497  -992.77096491]
New Q values:  [  478.42433196  6476.65666015 -8896.20691497 -1765.38012265]
Reward: -10001  Episode Reward:  -9968
xxxxx
x   x
x g x
x.  x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.36068238e+03 -3.22965309e-01  7.85357057e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.36068238e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  2.76583095e+03 -3.22965309e-01  7.85357057e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3203.29924939 4720.52666339  790.72804752 1050.85266124]
------
Step:2, Action:South
State  210
Old Q Values:  [3203.29924939 4720.52666339  790.72804752 1050.85266124]
New Q values:  [ 3203.29924939 -3032.08138794   790.72804752  1050.85266124]
Reward: -10001  Episode Reward:  -9992
xxxxx
x.. x
x.. x
x..gx
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.76583095e+03 -3.22965309e-01  7.85357057e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.76583095e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  2.07272216e+03 -3.22965309e-01  7.85357057e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3203.29924939 -3032.08138794   790.72804752  1050.85266124]
------
Step:2, Action:North
State  208
Old Q Values:  [ 2637.28104061  2473.9230856  -4584.50430574 -1713.91177491]
New Q values:  [ 1676.12906287  2473.9230856  -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.07272216e+03 -3.22965309e-01  7.85357057e+02]
------
Step:3, Action:South
State  136
Old Q Values:  [ -170.77177351  6990.04828377 -2383.80019164   213.35313412]
New Q values:  [ -170.77177351  3537.59623919 -2383.80019164   213.35313412]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1676.12906287  2473.9230856  -4584.50430574 -1713.91177491]
------
Step:4, Action:South
State  208
Old Q Values:  [ 1676.12906287  2473.9230856  -4584.50430574 -1713.91177491]
New Q values:  [ 1676.12906287  2069.27718094 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  6
xxxxx
xg. x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3074.23603475 -6442.16912869 -8192.20126966  3601.02648901]
------
Step:5, Action:West
State  288
Old Q Values:  [ 3074.23603475 -6442.16912869 -8192.20126966  3601.02648901]
New Q values:  [ 3074.23603475 -6442.16912869 -8192.20126966  2648.18835946]
Reward: 9  Episode Reward:  15
xxxxx
x.g x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4007.92587953 -8521.23367799  2205.73600612  3025.79692434]
------
Step:6, Action:North
State  272
Old Q Values:  [ 4007.92587953 -8521.23367799  2205.73600612  3025.79692434]
New Q values:  [11274.69142503 -8521.23367799  2205.73600612  3025.79692434]
Reward: 9  Episode Reward:  24
xxxxx
xg. x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.08939106e+04 1.21887559e+04 3.22204036e+04]
------
Step:7, Action:West
State  192
Old Q Values:  [3.89777037e-01 1.08939106e+04 1.21887559e+04 3.22204036e+04]
New Q values:  [3.89777037e-01 1.08939106e+04 1.21887559e+04 4.00016218e+04]
Reward: -9991  Episode Reward:  -9967
xxxxx
x.. x
xg  x
x.  x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[11274.69142503 -8521.23367799  2205.73600612  3025.79692434]
------
Step:1, Action:North
State  276
Old Q Values:  [ 8281.66905565 -5807.06396197   855.18004824  2779.23498274]
New Q values:  [15318.55417572 -5807.06396197   855.18004824  2779.23498274]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.08939106e+04 1.21887559e+04 4.00016218e+04]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  4.87937077e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  2.91813808e+03  1.03161518e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3203.29924939 -3032.08138794   790.72804752  1050.85266124]
------
Step:3, Action:North
State  210
Old Q Values:  [ 3203.29924939 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [21556.95315852 -3032.08138794   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  3116.80540322  -180.00807518 67567.44486255]
------
Step:4, Action:West
State  130
Old Q Values:  [18222.84073342  3116.80540322  -180.00807518 67567.44486255]
New Q values:  [18222.84073342  3116.80540322  -180.00807518 66033.15152773]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.30002579e+05]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  2426.39092897   795.97580437]
New Q values:  [ -281.736      -1150.91067548  2426.39092897   660.15631681]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1121.21998354  153.18954677 -252.78192178]
------
Step:6, Action:South
State  110
Old Q Values:  [-239.29051573 -348.92535679  217.32156498 -180.6       ]
New Q values:  [-239.29051573  404.29096499  217.32156498 -180.6       ]
Reward: 9  Episode Reward:  54
xxxxx
x   x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  1.79487036e+03  0.00000000e+00]
------
Step:7, Action:East
State  188
Old Q Values:  [-6523.78898263  2697.46681606  1027.18354596     0.        ]
New Q values:  [-6523.78898263  2697.46681606   713.16111444     0.        ]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.          772.36851598 1009.62565352  441.58769553]
------
Step:8, Action:East
State  206
Old Q Values:  [   0.         4614.27538929 1309.6092569     0.        ]
New Q values:  [   0.         4614.27538929 1720.21125214    0.        ]
Reward: -1  Episode Reward:  52
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 158.55927744 3989.89183125    0.         1847.21017375]
------
Step:9, Action:South
State  208
Old Q Values:  [ 1676.12906287  2069.27718094 -4584.50430574 -1713.91177491]
New Q values:  [ 1676.12906287 61755.3816828  -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100061
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2426.39092897   660.15631681]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2426.39092897   660.15631681]
New Q values:  [ -281.736      -1150.91067548  1597.77301822   660.15631681]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.07272216e+03 -3.22965309e-01  7.85357057e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.07272216e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  2.77748586e+03 -3.22965309e-01  7.85357057e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  478.42433196  6476.65666015 -8896.20691497 -1765.38012265]
------
Step:3, Action:South
State  216
Old Q Values:  [  478.42433196  6476.65666015 -8896.20691497 -1765.38012265]
New Q values:  [  478.42433196  3518.33347449 -8896.20691497 -1765.38012265]
Reward: 9  Episode Reward:  27
xxxxx
x.  x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3074.23603475 -6442.16912869 -8192.20126966  2648.18835946]
------
Step:4, Action:North
State  288
Old Q Values:  [ 3074.23603475 -6442.16912869 -8192.20126966  2648.18835946]
New Q values:  [19755.70891874 -6442.16912869 -8192.20126966  2648.18835946]
Reward: -1  Episode Reward:  26
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1676.12906287 61755.3816828  -4584.50430574 -1713.91177491]
------
Step:5, Action:South
State  216
Old Q Values:  [  478.42433196  3518.33347449 -8896.20691497 -1765.38012265]
New Q values:  [  478.42433196  7333.44606542 -8896.20691497 -1765.38012265]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19755.70891874 -6442.16912869 -8192.20126966  2648.18835946]
------
Step:6, Action:North
State  288
Old Q Values:  [19755.70891874 -6442.16912869 -8192.20126966  2648.18835946]
New Q values:  [10101.71738712 -6442.16912869 -8192.20126966  2648.18835946]
Reward: -1  Episode Reward:  24
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  478.42433196  7333.44606542 -8896.20691497 -1765.38012265]
------
Step:7, Action:South
State  216
Old Q Values:  [  478.42433196  7333.44606542 -8896.20691497 -1765.38012265]
New Q values:  [  478.42433196  5963.2936423  -8896.20691497 -1765.38012265]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10101.71738712 -6442.16912869 -8192.20126966  2648.18835946]
------
Step:8, Action:North
State  288
Old Q Values:  [10101.71738712 -6442.16912869 -8192.20126966  2648.18835946]
New Q values:  [ 5829.07504754 -6442.16912869 -8192.20126966  2648.18835946]
Reward: -1  Episode Reward:  22
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  478.42433196  5963.2936423  -8896.20691497 -1765.38012265]
------
Step:9, Action:South
State  208
Old Q Values:  [ 1676.12906287 61755.3816828  -4584.50430574 -1713.91177491]
New Q values:  [ 1676.12906287 26450.27518738 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  21
xxxxx
x.g x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5829.07504754 -6442.16912869 -8192.20126966  2648.18835946]
------
Step:10, Action:North
State  288
Old Q Values:  [ 5829.07504754 -6442.16912869 -8192.20126966  2648.18835946]
New Q values:  [10266.11257523 -6442.16912869 -8192.20126966  2648.18835946]
Reward: -1  Episode Reward:  20
xxxxx
x. gx
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1676.12906287 26450.27518738 -4584.50430574 -1713.91177491]
------
Step:11, Action:South
State  208
Old Q Values:  [ 1676.12906287 26450.27518738 -4584.50430574 -1713.91177491]
New Q values:  [ 1676.12906287 13659.34384752 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x. gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10266.11257523 -6442.16912869 -8192.20126966  2648.18835946]
------
Step:12, Action:West
State  288
Old Q Values:  [10266.11257523 -6442.16912869 -8192.20126966  2648.18835946]
New Q values:  [10266.11257523 -6442.16912869 -8192.20126966  3388.51366993]
Reward: 9  Episode Reward:  28
xxxxx
x.  x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 4979.82966255 7746.12775382]
------
Step:13, Action:West
State  272
Old Q Values:  [11274.69142503 -8521.23367799  2205.73600612  3025.79692434]
New Q values:  [11274.69142503 -8521.23367799  2205.73600612 17513.47464165]
Reward: 9  Episode Reward:  37
xxxxx
x.  x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[54325.85290636 18493.12311579 11203.9437825   1875.31501677]
------
Step:14, Action:North
State  257
Old Q Values:  [54325.85290636 18493.12311579 11203.9437825   1875.31501677]
New Q values:  [44912.72708596 18493.12311579 11203.9437825   1875.31501677]
Reward: 9  Episode Reward:  46
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[16872.63888686 16101.90751562 77256.61974473     0.        ]
------
Step:15, Action:East
State  179
Old Q Values:  [16872.63888686 16101.90751562 77256.61974473     0.        ]
New Q values:  [16872.63888686 16101.90751562 31777.48932284     0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  2.91813808e+03  1.03161518e+03]
------
Step:16, Action:East
State  195
Old Q Values:  [  38.85388605 3096.86073896 8073.03857853 1169.39963074]
New Q values:  [  38.85388605 3096.86073896 9695.70137897 1169.39963074]
Reward: -1  Episode Reward:  44
xxxxx
x.  x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21556.95315852 -3032.08138794   790.72804752  1050.85266124]
------
Step:17, Action:North
State  208
Old Q Values:  [ 1676.12906287 13659.34384752 -4584.50430574 -1713.91177491]
New Q values:  [20479.79708347 13659.34384752 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  43
xxxxx
x. ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  3116.80540322  -180.00807518 66033.15152773]
------
Step:18, Action:West
State  130
Old Q Values:  [18222.84073342  3116.80540322  -180.00807518 66033.15152773]
New Q values:  [18222.84073342  3116.80540322  -180.00807518 65413.43419381]
Reward: -1  Episode Reward:  42
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.30002579e+05]
------
Step:19, Action:West
State  115
Old Q Values:  [-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.30002579e+05]
New Q values:  [-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.46749470e+05]
Reward: 100009  Episode Reward:  100051
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.08939106e+04 1.21887559e+04 4.00016218e+04]
------
Step:1, Action:West
State  196
Old Q Values:  [-2469.90645144  5101.06059802 21480.08667922   492.34934406]
New Q values:  [-2469.90645144  5101.06059802 21480.08667922  3512.17261526]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-1268.03197772  8050.99445658 11032.77625878 -4966.32149798]
------
Step:2, Action:East
State  180
Old Q Values:  [-1268.03197772  8050.99445658 11032.77625878 -4966.32149798]
New Q values:  [-1268.03197772  8050.99445658 10856.53650728 -4966.32149798]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  5101.06059802 21480.08667922  3512.17261526]
------
Step:3, Action:East
State  200
Old Q Values:  [   62.8218634  15441.09421105  2270.60045529   568.38654082]
New Q values:  [   62.8218634  15441.09421105  2702.62827481   568.38654082]
Reward: 9  Episode Reward:  17
xxxxx
x. .x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  478.42433196  5963.2936423  -8896.20691497 -1765.38012265]
------
Step:4, Action:South
State  208
Old Q Values:  [20479.79708347 13659.34384752 -4584.50430574 -1713.91177491]
New Q values:  [20479.79708347  8548.97131158 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  26
xxxxx
x. .x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10266.11257523 -6442.16912869 -8192.20126966  3388.51366993]
------
Step:5, Action:West
State  288
Old Q Values:  [10266.11257523 -6442.16912869 -8192.20126966  3388.51366993]
New Q values:  [10266.11257523 -6442.16912869 -8192.20126966  6614.84786047]
Reward: 9  Episode Reward:  35
xxxxx
x. .x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[11274.69142503 -8521.23367799  2205.73600612 17513.47464165]
------
Step:6, Action:West
State  273
Old Q Values:  [3915.56039739 -168.92307549 4979.82966255 7746.12775382]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 16577.66922731]
Reward: 9  Episode Reward:  44
xxxxx
x. .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[44912.72708596 18493.12311579 11203.9437825   1875.31501677]
------
Step:7, Action:North
State  257
Old Q Values:  [44912.72708596 18493.12311579 11203.9437825   1875.31501677]
New Q values:  [50713.46084256 18493.12311579 11203.9437825   1875.31501677]
Reward: -1  Episode Reward:  43
xxxxx
x. .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[109163.23336057  23344.73803738  11140.20003573      0.        ]
------
Step:8, Action:North
State  181
Old Q Values:  [ 918.84233362  445.64685634 7761.15590338  262.76946019]
New Q values:  [1064.47086336  445.64685634 7761.15590338  262.76946019]
Reward: 9  Episode Reward:  52
xxxxx
xa .x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2305.11309972  238.35800069    0.        ]
------
Step:9, Action:South
State  103
Old Q Values:  [ 221.30610858 2305.11309972  238.35800069    0.        ]
New Q values:  [ 221.30610858 2513.44061734  238.35800069    0.        ]
Reward: -1  Episode Reward:  51
xxxxx
x  .x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1189.35149114 2335.80779431 5306.65125817  358.5166536 ]
------
Step:10, Action:East
State  181
Old Q Values:  [1064.47086336  445.64685634 7761.15590338  262.76946019]
New Q values:  [1064.47086336  445.64685634 3755.39194453  262.76946019]
Reward: -1  Episode Reward:  50
xxxxx
x  .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[-5833.78831344  2171.76527724 -4510.80210702   403.06255908]
------
Step:11, Action:South
State  196
Old Q Values:  [-2469.90645144  5101.06059802 21480.08667922  3512.17261526]
New Q values:  [-2469.90645144  6635.39049193 21480.08667922  3512.17261526]
Reward: -1  Episode Reward:  49
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[15318.55417572 -5807.06396197   855.18004824  2779.23498274]
------
Step:12, Action:West
State  276
Old Q Values:  [15318.55417572 -5807.06396197   855.18004824  2779.23498274]
New Q values:  [15318.55417572 -5807.06396197   855.18004824  1931.11507833]
Reward: -1  Episode Reward:  48
xxxxx
x  .x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2733.40361747   26.73544252 1661.52026829  123.6214372 ]
------
Step:13, Action:North
State  261
Old Q Values:  [2733.40361747   26.73544252 1661.52026829  123.6214372 ]
New Q values:  [2219.37903034   26.73544252 1661.52026829  123.6214372 ]
Reward: -1  Episode Reward:  47
xxxxx
x  .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1064.47086336  445.64685634 3755.39194453  262.76946019]
------
Step:14, Action:North
State  183
Old Q Values:  [1189.35149114 2335.80779431 5306.65125817  358.5166536 ]
New Q values:  [1229.17278166 2335.80779431 5306.65125817  358.5166536 ]
Reward: -1  Episode Reward:  46
xxxxx
xa .x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2513.44061734  238.35800069    0.        ]
------
Step:15, Action:South
State  103
Old Q Values:  [ 221.30610858 2513.44061734  238.35800069    0.        ]
New Q values:  [ 221.30610858 2131.39383029  238.35800069    0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x  .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1064.47086336  445.64685634 3755.39194453  262.76946019]
------
Step:16, Action:North
State  180
Old Q Values:  [-1268.03197772  8050.99445658 10856.53650728 -4966.32149798]
New Q values:  [ -122.403451    8050.99445658 10856.53650728 -4966.32149798]
Reward: -1  Episode Reward:  44
xxxxx
xa .x
xg  x
x   x
xxxxx
Step:17, Action:East
State  102
Old Q Values:  [-180.6        1284.69780031    5.16       -180.6       ]
New Q values:  [-180.6        1284.69780031  522.17020433 -180.6       ]
Reward: -1  Episode Reward:  43
xxxxx
x a.x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[ 617.06804554 1735.68734778    0.          503.49427758]
------
Step:18, Action:South
State  118
Old Q Values:  [ 617.06804554 1735.68734778    0.          503.49427758]
New Q values:  [ 617.06804554 5827.23341958    0.          503.49427758]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.71118616e+04  0.00000000e+00]
------
Step:19, Action:East
State  199
Old Q Values:  [  14.86214194  479.07551978 3204.13160084 1915.70494401]
New Q values:  [  14.86214194  479.07551978 7748.13858789 1915.70494401]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21556.95315852 -3032.08138794   790.72804752  1050.85266124]
------
Step:20, Action:North
State  210
Old Q Values:  [21556.95315852 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [88252.21152155 -3032.08138794   790.72804752  1050.85266124]
Reward: 100009  Episode Reward:  100050
xxxxx
x  ax
x   x
x  gx
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -122.403451    8050.99445658 10856.53650728 -4966.32149798]
------
Step:1, Action:East
State  181
Old Q Values:  [1064.47086336  445.64685634 3755.39194453  262.76946019]
New Q values:  [1064.47086336  445.64685634 7951.58278158  262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6635.39049193 21480.08667922  3512.17261526]
------
Step:2, Action:East
State  201
Old Q Values:  [ 2.33354578e+00  2.19191549e+03 -5.74006972e+03  2.00341972e+02]
New Q values:  [   2.33354578 2191.91549263 -501.63979658  200.3419716 ]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  478.42433196  5963.2936423  -8896.20691497 -1765.38012265]
------
Step:3, Action:South
State  216
Old Q Values:  [  478.42433196  5963.2936423  -8896.20691497 -1765.38012265]
New Q values:  [  478.42433196  5470.55122949 -8896.20691497 -1765.38012265]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10266.11257523 -6442.16912869 -8192.20126966  6614.84786047]
------
Step:4, Action:North
State  288
Old Q Values:  [10266.11257523 -6442.16912869 -8192.20126966  6614.84786047]
New Q values:  [ 5747.01039894 -6442.16912869 -8192.20126966  6614.84786047]
Reward: -1  Episode Reward:  26
xxxxx
x .gx
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  478.42433196  5470.55122949 -8896.20691497 -1765.38012265]
------
Step:5, Action:South
State  216
Old Q Values:  [  478.42433196  5470.55122949 -8896.20691497 -1765.38012265]
New Q values:  [  478.42433196  4172.07484994 -8896.20691497 -1765.38012265]
Reward: -1  Episode Reward:  25
xxxxx
x g.x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5747.01039894 -6442.16912869 -8192.20126966  6614.84786047]
------
Step:6, Action:West
State  288
Old Q Values:  [ 5747.01039894 -6442.16912869 -8192.20126966  6614.84786047]
New Q values:  [ 5747.01039894 -6442.16912869 -8192.20126966  7624.63991238]
Reward: 9  Episode Reward:  34
xxxxx
x .gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 16577.66922731]
------
Step:7, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 16577.66922731]
New Q values:  [3915.56039739 -168.92307549 4979.82966255 7302.28140003]
Reward: 9  Episode Reward:  43
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2219.37903034   26.73544252 1661.52026829  123.6214372 ]
------
Step:8, Action:North
State  261
Old Q Values:  [2219.37903034   26.73544252 1661.52026829  123.6214372 ]
New Q values:  [2479.14698959   26.73544252 1661.52026829  123.6214372 ]
Reward: -1  Episode Reward:  42
xxxxx
x ..x
xa  x
x  gx
xxxxx
Step:9, Action:West
State  183
Old Q Values:  [1229.17278166 2335.80779431 5306.65125817  358.5166536 ]
New Q values:  [1229.17278166 2335.80779431 5306.65125817 1554.80203889]
Reward: -301  Episode Reward:  -259
xxxxx
x ..x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1229.17278166 2335.80779431 5306.65125817 1554.80203889]
------
Step:10, Action:East
State  183
Old Q Values:  [1229.17278166 2335.80779431 5306.65125817 1554.80203889]
New Q values:  [1229.17278166 2335.80779431 5030.77091696 1554.80203889]
Reward: -1  Episode Reward:  -260
xxxxx
x ..x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 3096.86073896 9695.70137897 1169.39963074]
------
Step:11, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  2.91813808e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  2.76423187e+04  1.03161518e+03]
Reward: -1  Episode Reward:  -261
xxxxx
x ..x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[88252.21152155 -3032.08138794   790.72804752  1050.85266124]
------
Step:12, Action:North
State  208
Old Q Values:  [20479.79708347  8548.97131158 -4584.50430574 -1713.91177491]
New Q values:  [27821.34909153  8548.97131158 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  -252
xxxxx
x .ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  3116.80540322  -180.00807518 65413.43419381]
------
Step:13, Action:West
State  128
Old Q Values:  [ 40603.00258404   6365.91482369  -8652.84       106450.86604764]
New Q values:  [ 40603.00258404   6365.91482369  -8652.84       129870.67533793]
Reward: 90009  Episode Reward:  89757
xxxxx
x g x
x   x
x   x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.77748586e+03 -3.22965309e-01  7.85357057e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.77748586e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  2.75920578e+04 -3.22965309e-01  7.85357057e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[88252.21152155 -3032.08138794   790.72804752  1050.85266124]
------
Step:2, Action:North
State  208
Old Q Values:  [27821.34909153  8548.97131158 -4584.50430574 -1713.91177491]
New Q values:  [19405.55697678  8548.97131158 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.75920578e+04 -3.22965309e-01  7.85357057e+02]
------
Step:3, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.75920578e+04 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  3.75118866e+04 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x..ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[88252.21152155 -3032.08138794   790.72804752  1050.85266124]
------
Step:4, Action:North
State  208
Old Q Values:  [19405.55697678  8548.97131158 -4584.50430574 -1713.91177491]
New Q values:  [19015.18876372  8548.97131158 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  6
xxxxx
x..ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.75118866e+04 -3.22965309e-01  7.85357057e+02]
------
Step:5, Action:South
State  136
Old Q Values:  [ -170.77177351  3537.59623919 -2383.80019164   213.35313412]
New Q values:  [ -170.77177351  7118.99512479 -2383.80019164   213.35313412]
Reward: -1  Episode Reward:  5
xxxxx
xg. x
x..ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19015.18876372  8548.97131158 -4584.50430574 -1713.91177491]
------
Step:6, Action:North
State  208
Old Q Values:  [19015.18876372  8548.97131158 -4584.50430574 -1713.91177491]
New Q values:  [ 9741.17404292  8548.97131158 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  4
xxxxx
x.gax
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  7118.99512479 -2383.80019164   213.35313412]
------
Step:7, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.75118866e+04 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  1.79265068e+04 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  3
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9741.17404292  8548.97131158 -4584.50430574 -1713.91177491]
------
Step:8, Action:North
State  208
Old Q Values:  [ 9741.17404292  8548.97131158 -4584.50430574 -1713.91177491]
New Q values:  [ 6031.56815461  8548.97131158 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  2
xxxxx
x.gax
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  7118.99512479 -2383.80019164   213.35313412]
------
Step:9, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.79265068e+04 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  9.73469413e+03 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  1
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6031.56815461  8548.97131158 -4584.50430574 -1713.91177491]
------
Step:10, Action:South
State  208
Old Q Values:  [ 6031.56815461  8548.97131158 -4584.50430574 -1713.91177491]
New Q values:  [ 6031.56815461  5712.38049835 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  10
xxxxx
x.. x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5747.01039894 -6442.16912869 -8192.20126966  7624.63991238]
------
Step:11, Action:West
State  288
Old Q Values:  [ 5747.01039894 -6442.16912869 -8192.20126966  7624.63991238]
New Q values:  [ 5747.01039894 -6442.16912869 -8192.20126966  8303.29835745]
Reward: -1  Episode Reward:  9
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[11274.69142503 -8521.23367799  2205.73600612 17513.47464165]
------
Step:12, Action:West
State  272
Old Q Values:  [11274.69142503 -8521.23367799  2205.73600612 17513.47464165]
New Q values:  [11274.69142503 -8521.23367799  2205.73600612  8687.56047321]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -977.19778254 -5704.51612281  5589.23538852 -5679.36893145]
------
Step:13, Action:East
State  256
Old Q Values:  [33845.23946664 10195.91087381 10063.72056248   644.94785455]
New Q values:  [33845.23946664 10195.91087381  7407.2956525    644.94785455]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x.. x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[11274.69142503 -8521.23367799  2205.73600612  8687.56047321]
------
Step:14, Action:North
State  272
Old Q Values:  [11274.69142503 -8521.23367799  2205.73600612  8687.56047321]
New Q values:  [12807.97217693 -8521.23367799  2205.73600612  8687.56047321]
Reward: 9  Episode Reward:  26
xxxxx
x.. x
x.a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  2.76423187e+04  1.03161518e+03]
------
Step:15, Action:East
State  195
Old Q Values:  [  38.85388605 3096.86073896 9695.70137897 1169.39963074]
New Q values:  [   38.85388605  3096.86073896 30353.34400805  1169.39963074]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x. ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[88252.21152155 -3032.08138794   790.72804752  1050.85266124]
------
Step:16, Action:North
State  210
Old Q Values:  [88252.21152155 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [54924.31486676 -3032.08138794   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  24
xxxxx
x..ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  3116.80540322  -180.00807518 65413.43419381]
------
Step:17, Action:West
State  130
Old Q Values:  [18222.84073342  3116.80540322  -180.00807518 65413.43419381]
New Q values:  [18222.84073342  3116.80540322  -180.00807518 63860.16075876]
Reward: 9  Episode Reward:  33
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   18106.71132503 125631.2902708 ]
------
Step:18, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   18106.71132503 125631.2902708 ]
New Q values:  [ -180.6         3557.6642036  18106.71132503 63428.51974706]
Reward: 9  Episode Reward:  42
xxxxx
xa  x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SW
[    0.         43902.01212913 28308.61746576     0.        ]
------
Step:19, Action:East
State  107
Old Q Values:  [-252.35169558 1121.21998354  153.18954677 -252.78192178]
New Q values:  [-252.35169558 1121.21998354  540.00772417 -252.78192178]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1597.77301822   660.15631681]
------
Step:20, Action:East
State  114
Old Q Values:  [ -180.6         3557.6642036  18106.71132503 63428.51974706]
New Q values:  [ -180.6         3557.6642036  26400.13275764 63428.51974706]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  3116.80540322  -180.00807518 63860.16075876]
------
Step:21, Action:West
State  136
Old Q Values:  [ -170.77177351  7118.99512479 -2383.80019164   213.35313412]
New Q values:  [ -170.77177351  7118.99512479 -2383.80019164   231.81157875]
Reward: -1  Episode Reward:  39
xxxxx
xga x
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225   490.23441701   179.26024029]
------
Step:22, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   490.23441701   179.26024029]
New Q values:  [-9594.56523706 -8069.05606225  2331.19230424   179.26024029]
Reward: -1  Episode Reward:  38
xxxxx
x gax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  7118.99512479 -2383.80019164   231.81157875]
------
Step:23, Action:South
State  136
Old Q Values:  [ -170.77177351  7118.99512479 -2383.80019164   231.81157875]
New Q values:  [ -170.77177351  4656.4684963  -2383.80019164   231.81157875]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6031.56815461  5712.38049835 -4584.50430574 -1713.91177491]
------
Step:24, Action:North
State  208
Old Q Values:  [ 6031.56815461  5712.38049835 -4584.50430574 -1713.91177491]
New Q values:  [ 3808.96781073  5712.38049835 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  36
xxxxx
x gax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  4656.4684963  -2383.80019164   231.81157875]
------
Step:25, Action:South
State  136
Old Q Values:  [ -170.77177351  4656.4684963  -2383.80019164   231.81157875]
New Q values:  [ -170.77177351  3575.70154802 -2383.80019164   231.81157875]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3808.96781073  5712.38049835 -4584.50430574 -1713.91177491]
------
Step:26, Action:South
State  208
Old Q Values:  [ 3808.96781073  5712.38049835 -4584.50430574 -1713.91177491]
New Q values:  [ 3808.96781073  4775.34170657 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x.  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5747.01039894 -6442.16912869 -8192.20126966  8303.29835745]
------
Step:27, Action:West
State  288
Old Q Values:  [ 5747.01039894 -6442.16912869 -8192.20126966  8303.29835745]
New Q values:  [ 5747.01039894 -6442.16912869 -8192.20126966  5511.40376299]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 4979.82966255 7302.28140003]
------
Step:28, Action:West
State  273
Old Q Values:  [3915.56039739 -168.92307549 4979.82966255 7302.28140003]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 18134.35081278]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[50713.46084256 18493.12311579 11203.9437825   1875.31501677]
------
Step:29, Action:North
State  257
Old Q Values:  [50713.46084256 18493.12311579 11203.9437825   1875.31501677]
New Q values:  [113039.75434519  18493.12311579  11203.9437825    1875.31501677]
Reward: 100009  Episode Reward:  100041
xxxxx
x  gx
xa  x
x   x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3808.96781073  4775.34170657 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [ 3808.96781073  4775.34170657 -4584.50430574 -1713.91177491]
New Q values:  [ 3808.96781073  3639.63980231 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5747.01039894 -6442.16912869 -8192.20126966  5511.40376299]
------
Step:2, Action:North
State  288
Old Q Values:  [ 5747.01039894 -6442.16912869 -8192.20126966  5511.40376299]
New Q values:  [ 3440.8945028  -6442.16912869 -8192.20126966  5511.40376299]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3808.96781073  3639.63980231 -4584.50430574 -1713.91177491]
------
Step:3, Action:North
State  208
Old Q Values:  [ 3808.96781073  3639.63980231 -4584.50430574 -1713.91177491]
New Q values:  [20687.03535192  3639.63980231 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  17
xxxxx
x..ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  3116.80540322  -180.00807518 63860.16075876]
------
Step:4, Action:West
State  136
Old Q Values:  [ -170.77177351  3575.70154802 -2383.80019164   231.81157875]
New Q values:  [ -170.77177351  3575.70154802 -2383.80019164   797.48232277]
Reward: 9  Episode Reward:  26
xxxxx
xga x
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2331.19230424   179.26024029]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1597.77301822   660.15631681]
New Q values:  [ -281.736      -1150.91067548  3558.91744656   660.15631681]
Reward: -1  Episode Reward:  25
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  9.73469413e+03 -3.22965309e-01  7.85357057e+02]
------
Step:6, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  9.73469413e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  2.03705721e+04 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[54924.31486676 -3032.08138794   790.72804752  1050.85266124]
------
Step:7, Action:North
State  216
Old Q Values:  [  478.42433196  4172.07484994 -8896.20691497 -1765.38012265]
New Q values:  [ 6301.9413665   4172.07484994 -8896.20691497 -1765.38012265]
Reward: -1  Episode Reward:  23
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.03705721e+04 -3.22965309e-01  7.85357057e+02]
------
Step:8, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.03705721e+04 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  2.46249233e+04 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[54924.31486676 -3032.08138794   790.72804752  1050.85266124]
------
Step:9, Action:North
State  210
Old Q Values:  [54924.31486676 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [29356.6029382  -3032.08138794   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  21
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.46249233e+04 -3.22965309e-01  7.85357057e+02]
------
Step:10, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.46249233e+04 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  1.86563502e+04 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[29356.6029382  -3032.08138794   790.72804752  1050.85266124]
------
Step:11, Action:North
State  210
Old Q Values:  [29356.6029382  -3032.08138794   790.72804752  1050.85266124]
New Q values:  [17338.94623632 -3032.08138794   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  19
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.86563502e+04 -3.22965309e-01  7.85357057e+02]
------
Step:12, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.86563502e+04 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  1.36680507e+04 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  18
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20687.03535192  3639.63980231 -4584.50430574 -1713.91177491]
------
Step:13, Action:North
State  216
Old Q Values:  [ 6301.9413665   4172.07484994 -8896.20691497 -1765.38012265]
New Q values:  [ 6620.59175269  4172.07484994 -8896.20691497 -1765.38012265]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.36680507e+04 -3.22965309e-01  7.85357057e+02]
------
Step:14, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.36680507e+04 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  1.16727309e+04 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20687.03535192  3639.63980231 -4584.50430574 -1713.91177491]
------
Step:15, Action:North
State  216
Old Q Values:  [ 6620.59175269  4172.07484994 -8896.20691497 -1765.38012265]
New Q values:  [ 6149.45596518  4172.07484994 -8896.20691497 -1765.38012265]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.16727309e+04 -3.22965309e-01  7.85357057e+02]
------
Step:16, Action:South
State  136
Old Q Values:  [ -170.77177351  3575.70154802 -2383.80019164   797.48232277]
New Q values:  [ -170.77177351  3274.51740876 -2383.80019164   797.48232277]
Reward: -1  Episode Reward:  14
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 6149.45596518  4172.07484994 -8896.20691497 -1765.38012265]
------
Step:17, Action:North
State  216
Old Q Values:  [ 6149.45596518  4172.07484994 -8896.20691497 -1765.38012265]
New Q values:  [ 5961.00165018  4172.07484994 -8896.20691497 -1765.38012265]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.16727309e+04 -3.22965309e-01  7.85357057e+02]
------
Step:18, Action:South
State  136
Old Q Values:  [ -170.77177351  3274.51740876 -2383.80019164   797.48232277]
New Q values:  [ -170.77177351  3097.50745856 -2383.80019164   797.48232277]
Reward: -1  Episode Reward:  12
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 5961.00165018  4172.07484994 -8896.20691497 -1765.38012265]
------
Step:19, Action:North
State  216
Old Q Values:  [ 5961.00165018  4172.07484994 -8896.20691497 -1765.38012265]
New Q values:  [ 5885.61992418  4172.07484994 -8896.20691497 -1765.38012265]
Reward: -1  Episode Reward:  11
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.16727309e+04 -3.22965309e-01  7.85357057e+02]
------
Step:20, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.16727309e+04 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  1.08746030e+04 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  10
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[20687.03535192  3639.63980231 -4584.50430574 -1713.91177491]
------
Step:21, Action:North
State  208
Old Q Values:  [20687.03535192  3639.63980231 -4584.50430574 -1713.91177491]
New Q values:  [ 9203.46637834  3639.63980231 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  9
xxxxx
x.gax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3097.50745856 -2383.80019164   797.48232277]
------
Step:22, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.08746030e+04 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  7.11028110e+03 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9203.46637834  3639.63980231 -4584.50430574 -1713.91177491]
------
Step:23, Action:North
State  216
Old Q Values:  [ 5885.61992418  4172.07484994 -8896.20691497 -1765.38012265]
New Q values:  [ 4486.73229865  4172.07484994 -8896.20691497 -1765.38012265]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.11028110e+03 -3.22965309e-01  7.85357057e+02]
------
Step:24, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  7.11028110e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  5.60455235e+03 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  6
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9203.46637834  3639.63980231 -4584.50430574 -1713.91177491]
------
Step:25, Action:North
State  208
Old Q Values:  [ 9203.46637834  3639.63980231 -4584.50430574 -1713.91177491]
New Q values:  [ 5362.15225698  3639.63980231 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  5
xxxxx
x. ax
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  5.60455235e+03 -3.22965309e-01  7.85357057e+02]
------
Step:26, Action:West
State  136
Old Q Values:  [ -170.77177351  3097.50745856 -2383.80019164   797.48232277]
New Q values:  [ -170.77177351  3097.50745856 -2383.80019164   443.00918022]
Reward: -1  Episode Reward:  4
xxxxx
x.agx
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:27, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   688.72853811  1239.95978669]
New Q values:  [ -253.44886264 -1902.20915811   688.72853811   837.74990974]
Reward: 9  Episode Reward:  13
xxxxx
xa  x
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1121.21998354  540.00772417 -252.78192178]
------
Step:28, Action:South
State  107
Old Q Values:  [-252.35169558 1121.21998354  540.00772417 -252.78192178]
New Q values:  [-252.35169558  679.86121671  540.00772417 -252.78192178]
Reward: 9  Episode Reward:  22
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 433.98067359    0.          753.24407764 -178.98      ]
------
Step:29, Action:North
State  184
Old Q Values:  [ 176.80187437    0.         3820.33267721    0.        ]
New Q values:  [ 141.61798197    0.         3820.33267721    0.        ]
Reward: -1  Episode Reward:  21
xxxxx
xa  x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   238.32410743  -180.6       ]
------
Step:30, Action:East
State  104
Old Q Values:  [-8652.84        2246.80188749   246.29174538 -8652.84      ]
New Q values:  [-8652.84        2246.80188749   797.27438942 -8652.84      ]
Reward: -1  Episode Reward:  20
xxxxx
xga x
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2331.19230424   179.26024029]
------
Step:31, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2331.19230424   179.26024029]
New Q values:  [-9594.56523706 -8069.05606225  1861.12915926   179.26024029]
Reward: -1  Episode Reward:  19
xxxxx
x gax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3097.50745856 -2383.80019164   443.00918022]
------
Step:32, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  5.60455235e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  3.58724063e+03 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  18
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4486.73229865  4172.07484994 -8896.20691497 -1765.38012265]
------
Step:33, Action:North
State  218
Old Q Values:  [ 158.55927744 3989.89183125    0.         1847.21017375]
New Q values:  [1138.99590011 3989.89183125    0.         1847.21017375]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.58724063e+03 -3.22965309e-01  7.85357057e+02]
------
Step:34, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.58724063e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  2.63126380e+03 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  16
xxxxx
x   x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1138.99590011 3989.89183125    0.         1847.21017375]
------
Step:35, Action:West
State  216
Old Q Values:  [ 4486.73229865  4172.07484994 -8896.20691497 -1765.38012265]
New Q values:  [ 4486.73229865  4172.07484994 -8896.20691497   -49.17740127]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[   2.33354578 2191.91549263 -501.63979658  200.3419716 ]
------
Step:36, Action:South
State  200
Old Q Values:  [   62.8218634  15441.09421105  2702.62827481   568.38654082]
New Q values:  [   62.8218634  10024.2293375   2702.62827481   568.38654082]
Reward: 9  Episode Reward:  24
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[12807.97217693 -8521.23367799  2205.73600612  8687.56047321]
------
Step:37, Action:West
State  272
Old Q Values:  [12807.97217693 -8521.23367799  2205.73600612  8687.56047321]
New Q values:  [12807.97217693 -8521.23367799  2205.73600612 73633.99602928]
Reward: 100009  Episode Reward:  100033
xxxxx
x   x
xg  x
xa  x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5362.15225698  3639.63980231 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [ 5362.15225698  3639.63980231 -4584.50430574 -1713.91177491]
New Q values:  [ 5362.15225698  3114.67704982 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3440.8945028  -6442.16912869 -8192.20126966  5511.40376299]
------
Step:2, Action:West
State  288
Old Q Values:  [ 3440.8945028  -6442.16912869 -8192.20126966  5511.40376299]
New Q values:  [ 3440.8945028  -6442.16912869 -8192.20126966 24300.16031398]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[12807.97217693 -8521.23367799  2205.73600612 73633.99602928]
------
Step:3, Action:West
State  272
Old Q Values:  [12807.97217693 -8521.23367799  2205.73600612 73633.99602928]
New Q values:  [12807.97217693 -8521.23367799  2205.73600612 63370.92471527]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[113039.75434519  18493.12311579  11203.9437825    1875.31501677]
------
Step:4, Action:North
State  260
Old Q Values:  [ -977.19778254 -5704.51612281  5589.23538852 -5679.36893145]
New Q values:  [ 1162.53040117 -5704.51612281  5589.23538852 -5679.36893145]
Reward: 9  Episode Reward:  36
xxxxx
x.. x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  5160.03171396     0.        ]
------
Step:5, Action:East
State  176
Old Q Values:  [103770.24881615   1621.55095326 110360.20137974      0.        ]
New Q values:  [103770.24881615   1621.55095326  56149.96710536      0.        ]
Reward: 9  Episode Reward:  45
xxxxx
x.. x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.08939106e+04 1.21887559e+04 4.00016218e+04]
------
Step:6, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  2.76423187e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  1.62580113e+04  1.03161518e+03]
Reward: -1  Episode Reward:  44
xxxxx
x.. x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[17338.94623632 -3032.08138794   790.72804752  1050.85266124]
------
Step:7, Action:North
State  208
Old Q Values:  [ 5362.15225698  3114.67704982 -4584.50430574 -1713.91177491]
New Q values:  [21302.30913042  3114.67704982 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  43
xxxxx
x..ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[18222.84073342  3116.80540322  -180.00807518 63860.16075876]
------
Step:8, Action:West
State  128
Old Q Values:  [ 40603.00258404   6365.91482369  -8652.84       129870.67533793]
New Q values:  [40603.00258404  6365.91482369 -8652.84       85238.59905405]
Reward: 9  Episode Reward:  52
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[     0.           3629.92591876  32120.21206907 110949.76306292]
------
Step:9, Action:East
State  112
Old Q Values:  [     0.           3629.92591876  32120.21206907 110949.76306292]
New Q values:  [     0.           3629.92591876  38419.06454384 110949.76306292]
Reward: -1  Episode Reward:  51
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[40603.00258404  6365.91482369 -8652.84       85238.59905405]
------
Step:10, Action:North
State  130
Old Q Values:  [18222.84073342  3116.80540322  -180.00807518 63860.16075876]
New Q values:  [26266.584521    3116.80540322  -180.00807518 63860.16075876]
Reward: -301  Episode Reward:  -250
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    3116.80540322  -180.00807518 63860.16075876]
------
Step:11, Action:West
State  130
Old Q Values:  [26266.584521    3116.80540322  -180.00807518 63860.16075876]
New Q values:  [26266.584521    3116.80540322  -180.00807518 44572.02022762]
Reward: -1  Episode Reward:  -251
xxxxx
x.a x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  26400.13275764 63428.51974706]
------
Step:12, Action:West
State  114
Old Q Values:  [ -180.6         3557.6642036  26400.13275764 63428.51974706]
New Q values:  [  -180.6          3557.6642036   26400.13275764 120119.84608498]
Reward: 100009  Episode Reward:  99758
xxxxx
xa  x
x g x
x   x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:1, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   688.72853811   837.74990974]
New Q values:  [ -253.44886264 -1902.20915811   688.72853811   833.71340777]
Reward: 9  Episode Reward:  9
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1644.04481293  520.46511977 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 1644.04481293  520.46511977 -120.29354603]
New Q values:  [-177.44732869 1234.72530123  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1905.69125353 1764.47747765  154.04646645]
------
Step:3, Action:South
State  181
Old Q Values:  [1064.47086336  445.64685634 7951.58278158  262.76946019]
New Q values:  [1064.47086336  927.40283941 7951.58278158  262.76946019]
Reward: 9  Episode Reward:  27
xxxxx
x g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2479.14698959   26.73544252 1661.52026829  123.6214372 ]
------
Step:4, Action:North
State  261
Old Q Values:  [2479.14698959   26.73544252 1661.52026829  123.6214372 ]
New Q values:  [1562.7661719    26.73544252 1661.52026829  123.6214372 ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1905.69125353 1764.47747765  154.04646645]
------
Step:5, Action:South
State  183
Old Q Values:  [1229.17278166 2335.80779431 5030.77091696 1554.80203889]
New Q values:  [1229.17278166 1432.17919821 5030.77091696 1554.80203889]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1562.7661719    26.73544252 1661.52026829  123.6214372 ]
------
Step:6, Action:North
State  261
Old Q Values:  [1562.7661719    26.73544252 1661.52026829  123.6214372 ]
New Q values:  [2133.73774385   26.73544252 1661.52026829  123.6214372 ]
Reward: -1  Episode Reward:  24
xxxxx
x   x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1229.17278166 1432.17919821 5030.77091696 1554.80203889]
------
Step:7, Action:East
State  189
Old Q Values:  [ 275.08817949 1905.69125353 1764.47747765  154.04646645]
New Q values:  [ 275.08817949 1905.69125353 1368.76563885  154.04646645]
Reward: 9  Episode Reward:  33
xxxxx
x   x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[   2.33354578 2191.91549263 -501.63979658  200.3419716 ]
------
Step:8, Action:South
State  197
Old Q Values:  [-5833.78831344  2171.76527724 -4510.80210702   403.06255908]
New Q values:  [-5833.78831344  1495.56024905 -4510.80210702   403.06255908]
Reward: 9  Episode Reward:  42
xxxxx
x  gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.15202069e+02 2.07151379e+03]
------
Step:9, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.15202069e+02 2.07151379e+03]
New Q values:  [   1.64433       0.          315.20206867 1468.12684069]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2133.73774385   26.73544252 1661.52026829  123.6214372 ]
------
Step:10, Action:North
State  261
Old Q Values:  [2133.73774385   26.73544252 1661.52026829  123.6214372 ]
New Q values:  [2362.12637263   26.73544252 1661.52026829  123.6214372 ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1229.17278166 1432.17919821 5030.77091696 1554.80203889]
------
Step:11, Action:East
State  183
Old Q Values:  [1229.17278166 1432.17919821 5030.77091696 1554.80203889]
New Q values:  [1229.17278166 1432.17919821 7145.26684725 1554.80203889]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.71118616e+04  0.00000000e+00]
------
Step:12, Action:East
State  204
Old Q Values:  [   0.          772.36851598 1009.62565352  441.58769553]
New Q values:  [   0.          772.36851598 1755.269951    441.58769553]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 4486.73229865  4172.07484994 -8896.20691497   -49.17740127]
------
Step:13, Action:North
State  216
Old Q Values:  [ 4486.73229865  4172.07484994 -8896.20691497   -49.17740127]
New Q values:  [ 2583.47205993  4172.07484994 -8896.20691497   -49.17740127]
Reward: -1  Episode Reward:  47
xxxxx
x  ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.63126380e+03 -3.22965309e-01  7.85357057e+02]
------
Step:14, Action:West
State  130
Old Q Values:  [26266.584521    3116.80540322  -180.00807518 44572.02022762]
New Q values:  [26266.584521    3116.80540322  -180.00807518 61853.04897998]
Reward: -1  Episode Reward:  46
xxxxx
x a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.46749470e+05]
------
Step:15, Action:West
State  127
Old Q Values:  [  0.           1.67014986 384.73306724 831.13599391]
New Q values:  [  0.           1.67014986 384.73306724 702.27198793]
Reward: -1  Episode Reward:  45
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1234.72530123  520.46511977 -120.29354603]
------
Step:16, Action:South
State  99
Old Q Values:  [     0.        115810.1272872  38863.4580325      0.       ]
New Q values:  [    0.         55856.69771173 38863.4580325      0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[16872.63888686 16101.90751562 31777.48932284     0.        ]
------
Step:17, Action:East
State  189
Old Q Values:  [ 275.08817949 1905.69125353 1368.76563885  154.04646645]
New Q values:  [ 275.08817949 1905.69125353 1125.76350725  154.04646645]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[   0.         1929.52417238    0.          198.38683706]
------
Step:18, Action:South
State  204
Old Q Values:  [   0.          772.36851598 1755.269951    441.58769553]
New Q values:  [   0.         4903.91365911 1755.269951    441.58769553]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[15318.55417572 -5807.06396197   855.18004824  1931.11507833]
------
Step:19, Action:West
State  276
Old Q Values:  [15318.55417572 -5807.06396197   855.18004824  1931.11507833]
New Q values:  [15318.55417572 -5807.06396197   855.18004824  1480.48394312]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2362.12637263   26.73544252 1661.52026829  123.6214372 ]
------
Step:20, Action:North
State  261
Old Q Values:  [2362.12637263   26.73544252 1661.52026829  123.6214372 ]
New Q values:  [1515.95792511   26.73544252 1661.52026829  123.6214372 ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1905.69125353 1125.76350725  154.04646645]
------
Step:21, Action:South
State  189
Old Q Values:  [ 275.08817949 1905.69125353 1125.76350725  154.04646645]
New Q values:  [ 275.08817949 1260.1325819  1125.76350725  154.04646645]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1515.95792511   26.73544252 1661.52026829  123.6214372 ]
------
Step:22, Action:East
State  261
Old Q Values:  [1515.95792511   26.73544252 1661.52026829  123.6214372 ]
New Q values:  [1515.95792511   26.73544252 1104.44615952  123.6214372 ]
Reward: -1  Episode Reward:  38
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.          315.20206867 1468.12684069]
------
Step:23, Action:West
State  277
Old Q Values:  [   1.64433       0.          315.20206867 1468.12684069]
New Q values:  [   1.64433       0.          315.20206867 1041.43811381]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1515.95792511   26.73544252 1104.44615952  123.6214372 ]
------
Step:24, Action:North
State  257
Old Q Values:  [113039.75434519  18493.12311579  11203.9437825    1875.31501677]
New Q values:  [54748.54853493 18493.12311579 11203.9437825   1875.31501677]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[16872.63888686 16101.90751562 31777.48932284     0.        ]
------
Step:25, Action:East
State  189
Old Q Values:  [ 275.08817949 1260.1325819  1125.76350725  154.04646645]
New Q values:  [ 275.08817949 1260.1325819  1028.56265462  154.04646645]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[   0.         1929.52417238    0.          198.38683706]
------
Step:26, Action:South
State  204
Old Q Values:  [   0.         4903.91365911 1755.269951    441.58769553]
New Q values:  [   0.         6556.53171636 1755.269951    441.58769553]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[15318.55417572 -5807.06396197   855.18004824  1480.48394312]
------
Step:27, Action:West
State  276
Old Q Values:  [15318.55417572 -5807.06396197   855.18004824  1480.48394312]
New Q values:  [15318.55417572 -5807.06396197   855.18004824  1046.38095478]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1515.95792511   26.73544252 1104.44615952  123.6214372 ]
------
Step:28, Action:North
State  260
Old Q Values:  [ 1162.53040117 -5704.51612281  5589.23538852 -5679.36893145]
New Q values:  [ 1002.87326818 -5704.51612281  5589.23538852 -5679.36893145]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  1.79487036e+03  0.00000000e+00]
------
Step:29, Action:East
State  188
Old Q Values:  [-6523.78898263  2697.46681606   713.16111444     0.        ]
New Q values:  [-6523.78898263  2697.46681606  2251.62396068     0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         6556.53171636 1755.269951    441.58769553]
------
Step:30, Action:South
State  204
Old Q Values:  [   0.         6556.53171636 1755.269951    441.58769553]
New Q values:  [   0.         7217.57893926 1755.269951    441.58769553]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[15318.55417572 -5807.06396197   855.18004824  1046.38095478]
------
Step:31, Action:West
State  276
Old Q Values:  [15318.55417572 -5807.06396197   855.18004824  1046.38095478]
New Q values:  [15318.55417572 -5807.06396197   855.18004824  2094.72299847]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1002.87326818 -5704.51612281  5589.23538852 -5679.36893145]
------
Step:32, Action:East
State  260
Old Q Values:  [ 1002.87326818 -5704.51612281  5589.23538852 -5679.36893145]
New Q values:  [ 1002.87326818 -5704.51612281  6830.66040813 -5679.36893145]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[15318.55417572 -5807.06396197   855.18004824  2094.72299847]
------
Step:33, Action:North
State  276
Old Q Values:  [15318.55417572 -5807.06396197   855.18004824  2094.72299847]
New Q values:  [ 7511.10428708 -5807.06396197   855.18004824  2094.72299847]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[   0.         4614.27538929 1720.21125214    0.        ]
------
Step:34, Action:East
State  204
Old Q Values:  [   0.         7217.57893926 1755.269951    441.58769553]
New Q values:  [   0.         7217.57893926 1953.13043538  441.58769553]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2583.47205993  4172.07484994 -8896.20691497   -49.17740127]
------
Step:35, Action:South
State  208
Old Q Values:  [21302.30913042  3114.67704982 -4584.50430574 -1713.91177491]
New Q values:  [21302.30913042 68541.31891412 -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100035
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1234.72530123  520.46511977 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 2131.39383029  238.35800069    0.        ]
New Q values:  [ 221.30610858 3243.43236659  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xag.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1064.47086336  927.40283941 7951.58278158  262.76946019]
------
Step:2, Action:North
State  183
Old Q Values:  [1229.17278166 1432.17919821 7145.26684725 1554.80203889]
New Q values:  [ 861.48670303 1432.17919821 7145.26684725 1554.80203889]
Reward: -1  Episode Reward:  8
xxxxx
xa..x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1234.72530123  520.46511977 -120.29354603]
------
Step:3, Action:South
State  110
Old Q Values:  [-239.29051573  404.29096499  217.32156498 -180.6       ]
New Q values:  [-239.29051573 1709.12590018  217.32156498 -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  5160.03171396     0.        ]
------
Step:4, Action:East
State  180
Old Q Values:  [ -122.403451    8050.99445658 10856.53650728 -4966.32149798]
New Q values:  [ -122.403451    8050.99445658 10792.04060668 -4966.32149798]
Reward: 9  Episode Reward:  16
xxxxx
x ..x
xga.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6635.39049193 21480.08667922  3512.17261526]
------
Step:5, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.08939106e+04 1.21887559e+04 4.00016218e+04]
New Q values:  [3.89777037e-01 1.08939106e+04 2.54432980e+04 4.00016218e+04]
Reward: 9  Episode Reward:  25
xxxxx
xg..x
x  ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21302.30913042 68541.31891412 -4584.50430574 -1713.91177491]
------
Step:6, Action:South
State  208
Old Q Values:  [21302.30913042 68541.31891412 -4584.50430574 -1713.91177491]
New Q values:  [21302.30913042 34711.97565984 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3440.8945028  -6442.16912869 -8192.20126966 24300.16031398]
------
Step:7, Action:West
State  288
Old Q Values:  [ 3440.8945028  -6442.16912869 -8192.20126966 24300.16031398]
New Q values:  [ 3440.8945028  -6442.16912869 -8192.20126966 28730.74154017]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[12807.97217693 -8521.23367799  2205.73600612 63370.92471527]
------
Step:8, Action:North
State  272
Old Q Values:  [12807.97217693 -8521.23367799  2205.73600612 63370.92471527]
New Q values:  [ 9999.99227481 -8521.23367799  2205.73600612 63370.92471527]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  1.62580113e+04  1.03161518e+03]
------
Step:9, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  1.62580113e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  1.17042884e+04  1.03161518e+03]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[17338.94623632 -3032.08138794   790.72804752  1050.85266124]
------
Step:10, Action:North
State  210
Old Q Values:  [17338.94623632 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [25496.89318852 -3032.08138794   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  40
xxxxx
x .ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    3116.80540322  -180.00807518 61853.04897998]
------
Step:11, Action:West
State  130
Old Q Values:  [26266.584521    3116.80540322  -180.00807518 61853.04897998]
New Q values:  [26266.584521    3116.80540322  -180.00807518 60782.57341749]
Reward: 9  Episode Reward:  49
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   26400.13275764 120119.84608498]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  3558.91744656   660.15631681]
New Q values:  [ -281.736      -1150.91067548  3558.91744656   334.95975895]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   238.32410743  -180.6       ]
------
Step:13, Action:East
State  98
Old Q Values:  [    0.         43902.01212913 28308.61746576     0.        ]
New Q values:  [    0.         43902.01212913 47358.8008118      0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   26400.13275764 120119.84608498]
------
Step:14, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  3558.91744656   334.95975895]
New Q values:  [ -281.736      -1150.91067548  3558.91744656   337.34226859]
Reward: -1  Episode Reward:  46
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  679.86121671  540.00772417 -252.78192178]
------
Step:15, Action:South
State  107
Old Q Values:  [-252.35169558  679.86121671  540.00772417 -252.78192178]
New Q values:  [-252.35169558  497.31770998  540.00772417 -252.78192178]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 433.98067359    0.          753.24407764 -178.98      ]
------
Step:16, Action:North
State  185
Old Q Values:  [ 433.98067359    0.          753.24407764 -178.98      ]
New Q values:  [ 334.99458669    0.          753.24407764 -178.98      ]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  497.31770998  540.00772417 -252.78192178]
------
Step:17, Action:East
State  105
Old Q Values:  [-180.6         469.44311696   86.99637671    0.        ]
New Q values:  [-180.6         469.44311696  158.81480179    0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:18, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1861.12915926   179.26024029]
New Q values:  [-9594.56523706 -8069.05606225  1861.12915926   211.9370312 ]
Reward: -1  Episode Reward:  42
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         469.44311696  158.81480179    0.        ]
------
Step:19, Action:South
State  107
Old Q Values:  [-252.35169558  497.31770998  540.00772417 -252.78192178]
New Q values:  [-252.35169558  424.30030728  540.00772417 -252.78192178]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 334.99458669    0.          753.24407764 -178.98      ]
------
Step:20, Action:North
State  184
Old Q Values:  [ 141.61798197    0.         3820.33267721    0.        ]
New Q values:  [ 127.54442502    0.         3820.33267721    0.        ]
Reward: -1  Episode Reward:  40
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   238.32410743  -180.6       ]
------
Step:21, Action:East
State  98
Old Q Values:  [    0.         43902.01212913 47358.8008118      0.        ]
New Q values:  [    0.         43902.01212913 54978.87415022     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   26400.13275764 120119.84608498]
------
Step:22, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  3558.91744656   337.34226859]
New Q values:  [ -281.736      -1150.91067548  3558.91744656   205.83413966]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   238.32410743  -180.6       ]
------
Step:23, Action:East
State  98
Old Q Values:  [    0.         43902.01212913 54978.87415022     0.        ]
New Q values:  [    0.         43902.01212913 58026.90348558     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   26400.13275764 120119.84608498]
------
Step:24, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  3558.91744656   205.83413966]
New Q values:  [ -281.736      -1150.91067548  3558.91744656   243.73597312]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  424.30030728  540.00772417 -252.78192178]
------
Step:25, Action:East
State  107
Old Q Values:  [-252.35169558  424.30030728  540.00772417 -252.78192178]
New Q values:  [-252.35169558  424.30030728 1283.07832364 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3558.91744656   243.73597312]
------
Step:26, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   688.72853811   833.71340777]
New Q values:  [ -253.44886264 -1902.20915811  1064.27055571   833.71340777]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.63126380e+03 -3.22965309e-01  7.85357057e+02]
------
Step:27, Action:West
State  136
Old Q Values:  [ -170.77177351  3097.50745856 -2383.80019164   443.00918022]
New Q values:  [ -170.77177351  3097.50745856 -2383.80019164   301.2199232 ]
Reward: -1  Episode Reward:  33
xxxxx
x agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:28, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1064.27055571   833.71340777]
New Q values:  [ -253.44886264 -1902.20915811  1064.27055571   717.8088602 ]
Reward: -1  Episode Reward:  32
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  424.30030728 1283.07832364 -252.78192178]
------
Step:29, Action:East
State  107
Old Q Values:  [-252.35169558  424.30030728 1283.07832364 -252.78192178]
New Q values:  [-252.35169558  424.30030728 1580.30656342 -252.78192178]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3558.91744656   243.73597312]
------
Step:30, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3558.91744656   243.73597312]
New Q values:  [ -281.736      -1150.91067548  2212.34611909   243.73597312]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.63126380e+03 -3.22965309e-01  7.85357057e+02]
------
Step:31, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.63126380e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  2.24887307e+03 -3.22965309e-01  7.85357057e+02]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1138.99590011 3989.89183125    0.         1847.21017375]
------
Step:32, Action:West
State  216
Old Q Values:  [ 2583.47205993  4172.07484994 -8896.20691497   -49.17740127]
New Q values:  [ 2583.47205993  4172.07484994 -8896.20691497   637.30368728]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x agx
x.  x
xxxxx
Step:33, Action:South
State  203
Old Q Values:  [3.60604218e+00 3.88953274e+03 3.50642584e+03 9.06816004e+03]
New Q values:  [3.60604218e+00 6.99551834e+03 3.50642584e+03 9.06816004e+03]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 18134.35081278]
------
Step:34, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 18134.35081278]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 83683.70488559]
Reward: 100009  Episode Reward:  100036
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.24887307e+03 -3.22965309e-01  7.85357057e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.24887307e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  8.55401718e+03 -3.22965309e-01  7.85357057e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[25496.89318852 -3032.08138794   790.72804752  1050.85266124]
------
Step:2, Action:North
State  210
Old Q Values:  [25496.89318852 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [12764.36243077 -3032.08138794   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  8.55401718e+03 -3.22965309e-01  7.85357057e+02]
------
Step:3, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  8.55401718e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  7.83459957e+03 -3.22965309e-01  7.85357057e+02]
Reward: -10001  Episode Reward:  -9993
xxxxx
x.. x
x..gx
x.. x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21302.30913042 34711.97565984 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [21302.30913042 34711.97565984 -4584.50430574 -1713.91177491]
New Q values:  [21302.30913042 22509.41272599 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3440.8945028  -6442.16912869 -8192.20126966 28730.74154017]
------
Step:2, Action:West
State  288
Old Q Values:  [ 3440.8945028  -6442.16912869 -8192.20126966 28730.74154017]
New Q values:  [ 3440.8945028  -6442.16912869 -8192.20126966 30508.97403065]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9999.99227481 -8521.23367799  2205.73600612 63370.92471527]
------
Step:3, Action:West
State  272
Old Q Values:  [ 9999.99227481 -8521.23367799  2205.73600612 63370.92471527]
New Q values:  [ 9999.99227481 -8521.23367799  2205.73600612 25808.55726364]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1515.95792511   26.73544252 1104.44615952  123.6214372 ]
------
Step:4, Action:North
State  260
Old Q Values:  [ 1002.87326818 -5704.51612281  6830.66040813 -5679.36893145]
New Q values:  [ 3644.16148927 -5704.51612281  6830.66040813 -5679.36893145]
Reward: 9  Episode Reward:  36
xxxxx
xg..x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -122.403451    8050.99445658 10792.04060668 -4966.32149798]
------
Step:5, Action:East
State  181
Old Q Values:  [1064.47086336  927.40283941 7951.58278158  262.76946019]
New Q values:  [1064.47086336  927.40283941 9630.0591164   262.76946019]
Reward: 9  Episode Reward:  45
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6635.39049193 21480.08667922  3512.17261526]
------
Step:6, Action:East
State  193
Old Q Values:  [-5922.26708831 15918.59333286 10523.71479534  1460.9765133 ]
New Q values:  [-5922.26708831 15918.59333286 10961.70973593  1460.9765133 ]
Reward: -1  Episode Reward:  44
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21302.30913042 22509.41272599 -4584.50430574 -1713.91177491]
------
Step:7, Action:South
State  208
Old Q Values:  [21302.30913042 22509.41272599 -4584.50430574 -1713.91177491]
New Q values:  [21302.30913042 18155.85729959 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  43
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3440.8945028  -6442.16912869 -8192.20126966 30508.97403065]
------
Step:8, Action:West
State  288
Old Q Values:  [ 3440.8945028  -6442.16912869 -8192.20126966 30508.97403065]
New Q values:  [ 3440.8945028  -6442.16912869 -8192.20126966 37308.10107794]
Reward: -1  Episode Reward:  42
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 83683.70488559]
------
Step:9, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 83683.70488559]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 33927.66933177]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1515.95792511   26.73544252 1104.44615952  123.6214372 ]
------
Step:10, Action:North
State  261
Old Q Values:  [1515.95792511   26.73544252 1104.44615952  123.6214372 ]
New Q values:  [3494.80090496   26.73544252 1104.44615952  123.6214372 ]
Reward: -1  Episode Reward:  40
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1064.47086336  927.40283941 9630.0591164   262.76946019]
------
Step:11, Action:East
State  181
Old Q Values:  [1064.47086336  927.40283941 9630.0591164   262.76946019]
New Q values:  [1064.47086336  927.40283941 8627.00164642  262.76946019]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 15918.59333286 10961.70973593  1460.9765133 ]
------
Step:12, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.08939106e+04 2.54432980e+04 4.00016218e+04]
New Q values:  [3.89777037e-01 1.20995314e+04 2.54432980e+04 4.00016218e+04]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9999.99227481 -8521.23367799  2205.73600612 25808.55726364]
------
Step:13, Action:West
State  276
Old Q Values:  [ 7511.10428708 -5807.06396197   855.18004824  2094.72299847]
New Q values:  [ 7511.10428708 -5807.06396197   855.18004824  1885.72947088]
Reward: -1  Episode Reward:  37
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3494.80090496   26.73544252 1104.44615952  123.6214372 ]
------
Step:14, Action:North
State  260
Old Q Values:  [ 3644.16148927 -5704.51612281  6830.66040813 -5679.36893145]
New Q values:  [ 4694.67677771 -5704.51612281  6830.66040813 -5679.36893145]
Reward: -1  Episode Reward:  36
xxxxx
xg..x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -122.403451    8050.99445658 10792.04060668 -4966.32149798]
------
Step:15, Action:East
State  181
Old Q Values:  [1064.47086336  927.40283941 8627.00164642  262.76946019]
New Q values:  [1064.47086336  927.40283941 9894.22666233  262.76946019]
Reward: -1  Episode Reward:  35
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6635.39049193 21480.08667922  3512.17261526]
------
Step:16, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.20995314e+04 2.54432980e+04 4.00016218e+04]
New Q values:  [3.89777037e-01 1.20995314e+04 1.65674120e+04 4.00016218e+04]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21302.30913042 18155.85729959 -4584.50430574 -1713.91177491]
------
Step:17, Action:North
State  208
Old Q Values:  [21302.30913042 18155.85729959 -4584.50430574 -1713.91177491]
New Q values:  [34097.90336838 18155.85729959 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  43
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[40603.00258404  6365.91482369 -8652.84       85238.59905405]
------
Step:18, Action:North
State  128
Old Q Values:  [40603.00258404  6365.91482369 -8652.84       85238.59905405]
New Q values:  [41632.18074983  6365.91482369 -8652.84       85238.59905405]
Reward: -301  Episode Reward:  -258
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[41632.18074983  6365.91482369 -8652.84       85238.59905405]
------
Step:19, Action:West
State  130
Old Q Values:  [26266.584521    3116.80540322  -180.00807518 60782.57341749]
New Q values:  [ 26266.584521     3116.80540322   -180.00807518 120354.38319249]
Reward: 100009  Episode Reward:  99751
xxxxx
x a x
xg  x
x   x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9999.99227481 -8521.23367799  2205.73600612 25808.55726364]
------
Step:1, Action:North
State  272
Old Q Values:  [ 9999.99227481 -8521.23367799  2205.73600612 25808.55726364]
New Q values:  [ 7516.68343281 -8521.23367799  2205.73600612 25808.55726364]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  1.17042884e+04  1.03161518e+03]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.20995314e+04 1.65674120e+04 4.00016218e+04]
New Q values:  [3.89777037e-01 1.20995314e+04 1.68617358e+04 4.00016218e+04]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[34097.90336838 18155.85729959 -4584.50430574 -1713.91177491]
------
Step:3, Action:North
State  208
Old Q Values:  [34097.90336838 18155.85729959 -4584.50430574 -1713.91177491]
New Q values:  [49750.8763051  18155.85729959 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x. gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     3116.80540322   -180.00807518 120354.38319249]
------
Step:4, Action:West
State  130
Old Q Values:  [ 26266.584521     3116.80540322   -180.00807518 120354.38319249]
New Q values:  [26266.584521    3116.80540322  -180.00807518 92171.99416593]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.46749470e+05]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1064.27055571   717.8088602 ]
New Q values:  [ -253.44886264 -1902.20915811  1064.27055571   766.61551311]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x. gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  424.30030728 1580.30656342 -252.78192178]
------
Step:6, Action:East
State  107
Old Q Values:  [-252.35169558  424.30030728 1580.30656342 -252.78192178]
New Q values:  [-252.35169558  424.30030728  950.80379208 -252.78192178]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1064.27055571   766.61551311]
------
Step:7, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1064.27055571   766.61551311]
New Q values:  [ -253.44886264 -1902.20915811  2775.48809382   766.61551311]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
x. gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.83459957e+03 -3.22965309e-01  7.85357057e+02]
------
Step:8, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  7.83459957e+03 -3.22965309e-01  7.85357057e+02]
New Q values:  [ 8.43634063e+00  7.83459957e+03 -3.22965309e-01  9.77246658e+02]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x.g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2212.34611909   243.73597312]
------
Step:9, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1861.12915926   211.9370312 ]
New Q values:  [-9594.56523706 -8069.05606225  1673.10390127   211.9370312 ]
Reward: -1  Episode Reward:  41
xxxxx
x gax
x.  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3097.50745856 -2383.80019164   301.2199232 ]
------
Step:10, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  7.83459957e+03 -3.22965309e-01  9.77246658e+02]
New Q values:  [ 8.43634063e+00  1.80585027e+04 -3.22965309e-01  9.77246658e+02]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x.gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[49750.8763051  18155.85729959 -4584.50430574 -1713.91177491]
------
Step:11, Action:North
State  210
Old Q Values:  [12764.36243077 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [10522.69578838 -3032.08138794   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.80585027e+04 -3.22965309e-01  9.77246658e+02]
------
Step:12, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.80585027e+04 -3.22965309e-01  9.77246658e+02]
New Q values:  [ 8.43634063e+00  1.03796098e+04 -3.22965309e-01  9.77246658e+02]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x. ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[10522.69578838 -3032.08138794   790.72804752  1050.85266124]
------
Step:13, Action:North
State  208
Old Q Values:  [49750.8763051  18155.85729959 -4584.50430574 -1713.91177491]
New Q values:  [23013.63346942 18155.85729959 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
x. gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.03796098e+04 -3.22965309e-01  9.77246658e+02]
------
Step:14, Action:West
State  136
Old Q Values:  [ -170.77177351  3097.50745856 -2383.80019164   301.2199232 ]
New Q values:  [ -170.77177351  3097.50745856 -2383.80019164   244.50422039]
Reward: -1  Episode Reward:  36
xxxxx
x agx
x.  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:15, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1673.10390127   211.9370312 ]
New Q values:  [-9594.56523706 -8069.05606225  1673.10390127   225.00774757]
Reward: -1  Episode Reward:  35
xxxxx
xag x
x.  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         469.44311696  158.81480179    0.        ]
------
Step:16, Action:South
State  111
Old Q Values:  [-177.44732869 1234.72530123  520.46511977 -120.29354603]
New Q values:  [-177.44732869  877.32989506  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  44
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1260.1325819  1028.56265462  154.04646645]
------
Step:17, Action:South
State  189
Old Q Values:  [ 275.08817949 1260.1325819  1028.56265462  154.04646645]
New Q values:  [ 275.08817949 1551.89330425 1028.56265462  154.04646645]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3494.80090496   26.73544252 1104.44615952  123.6214372 ]
------
Step:18, Action:North
State  260
Old Q Values:  [ 4694.67677771 -5704.51612281  6830.66040813 -5679.36893145]
New Q values:  [ 2686.5107559  -5704.51612281  6830.66040813 -5679.36893145]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  2697.46681606  2251.62396068     0.        ]
------
Step:19, Action:South
State  188
Old Q Values:  [-6523.78898263  2697.46681606  2251.62396068     0.        ]
New Q values:  [-6523.78898263  3127.58484886  2251.62396068     0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2686.5107559  -5704.51612281  6830.66040813 -5679.36893145]
------
Step:20, Action:East
State  260
Old Q Values:  [ 2686.5107559  -5704.51612281  6830.66040813 -5679.36893145]
New Q values:  [ 2686.5107559  -5704.51612281  4984.99544937 -5679.36893145]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7511.10428708 -5807.06396197   855.18004824  1885.72947088]
------
Step:21, Action:North
State  276
Old Q Values:  [ 7511.10428708 -5807.06396197   855.18004824  1885.72947088]
New Q values:  [ 5169.11539661 -5807.06396197   855.18004824  1885.72947088]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         7217.57893926 1953.13043538  441.58769553]
------
Step:22, Action:South
State  204
Old Q Values:  [   0.         7217.57893926 1953.13043538  441.58769553]
New Q values:  [   0.         4437.16619469 1953.13043538  441.58769553]
Reward: -1  Episode Reward:  38
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 5169.11539661 -5807.06396197   855.18004824  1885.72947088]
------
Step:23, Action:North
State  276
Old Q Values:  [ 5169.11539661 -5807.06396197   855.18004824  1885.72947088]
New Q values:  [ 3398.19601705 -5807.06396197   855.18004824  1885.72947088]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         4437.16619469 1953.13043538  441.58769553]
------
Step:24, Action:South
State  204
Old Q Values:  [   0.         4437.16619469 1953.13043538  441.58769553]
New Q values:  [   0.         2793.72528299 1953.13043538  441.58769553]
Reward: -1  Episode Reward:  36
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3398.19601705 -5807.06396197   855.18004824  1885.72947088]
------
Step:25, Action:North
State  276
Old Q Values:  [ 3398.19601705 -5807.06396197   855.18004824  1885.72947088]
New Q values:  [ 2196.79599172 -5807.06396197   855.18004824  1885.72947088]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         2793.72528299 1953.13043538  441.58769553]
------
Step:26, Action:South
State  206
Old Q Values:  [   0.         4614.27538929 1720.21125214    0.        ]
New Q values:  [   0.         2504.14895323 1720.21125214    0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 2196.79599172 -5807.06396197   855.18004824  1885.72947088]
------
Step:27, Action:North
State  276
Old Q Values:  [ 2196.79599172 -5807.06396197   855.18004824  1885.72947088]
New Q values:  [ 1716.23598158 -5807.06396197   855.18004824  1885.72947088]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xga x
x  .x
xxxxx
Step:28, Action:South
State  204
Old Q Values:  [   0.         2793.72528299 1953.13043538  441.58769553]
New Q values:  [   0.         1682.60895446 1953.13043538  441.58769553]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1716.23598158 -5807.06396197   855.18004824  1885.72947088]
------
Step:29, Action:West
State  276
Old Q Values:  [ 1716.23598158 -5807.06396197   855.18004824  1885.72947088]
New Q values:  [ 1716.23598158 -5807.06396197   855.18004824  2249.19042316]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2686.5107559  -5704.51612281  4984.99544937 -5679.36893145]
------
Step:30, Action:East
State  261
Old Q Values:  [3494.80090496   26.73544252 1104.44615952  123.6214372 ]
New Q values:  [3494.80090496   26.73544252 1115.93559076  123.6214372 ]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1716.23598158 -5807.06396197   855.18004824  2249.19042316]
------
Step:31, Action:West
State  276
Old Q Values:  [ 1716.23598158 -5807.06396197   855.18004824  2249.19042316]
New Q values:  [ 1716.23598158 -5807.06396197   855.18004824  2394.57480408]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2686.5107559  -5704.51612281  4984.99544937 -5679.36893145]
------
Step:32, Action:East
State  261
Old Q Values:  [3494.80090496   26.73544252 1115.93559076  123.6214372 ]
New Q values:  [3494.80090496   26.73544252 1164.14667753  123.6214372 ]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1716.23598158 -5807.06396197   855.18004824  2394.57480408]
------
Step:33, Action:West
State  276
Old Q Values:  [ 1716.23598158 -5807.06396197   855.18004824  2394.57480408]
New Q values:  [ 1716.23598158 -5807.06396197   855.18004824  2005.67019312]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3494.80090496   26.73544252 1164.14667753  123.6214372 ]
------
Step:34, Action:North
State  261
Old Q Values:  [3494.80090496   26.73544252 1164.14667753  123.6214372 ]
New Q values:  [1862.88835326   26.73544252 1164.14667753  123.6214372 ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1551.89330425 1028.56265462  154.04646645]
------
Step:35, Action:South
State  189
Old Q Values:  [ 275.08817949 1551.89330425 1028.56265462  154.04646645]
New Q values:  [ 275.08817949 1179.02382768 1028.56265462  154.04646645]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1862.88835326   26.73544252 1164.14667753  123.6214372 ]
------
Step:36, Action:North
State  257
Old Q Values:  [54748.54853493 18493.12311579 11203.9437825   1875.31501677]
New Q values:  [31432.06621083 18493.12311579 11203.9437825   1875.31501677]
Reward: -1  Episode Reward:  24
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[16872.63888686 16101.90751562 31777.48932284     0.        ]
------
Step:37, Action:East
State  191
Old Q Values:  [  3.06655861 941.79461398 631.32287843   0.        ]
New Q values:  [   3.06655861  941.79461398 1003.17383734    0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[   0.         2504.14895323 1720.21125214    0.        ]
------
Step:38, Action:East
State  206
Old Q Values:  [   0.         2504.14895323 1720.21125214    0.        ]
New Q values:  [   0.         2504.14895323 1884.45205023    0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1138.99590011 3989.89183125    0.         1847.21017375]
------
Step:39, Action:South
State  208
Old Q Values:  [23013.63346942 18155.85729959 -4584.50430574 -1713.91177491]
New Q values:  [23013.63346942 78460.17324322 -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100031
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[10522.69578838 -3032.08138794   790.72804752  1050.85266124]
------
Step:1, Action:North
State  208
Old Q Values:  [23013.63346942 78460.17324322 -4584.50430574 -1713.91177491]
New Q values:  [12324.73633515 78460.17324322 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.03796098e+04 -3.22965309e-01  9.77246658e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.03796098e+04 -3.22965309e-01  9.77246658e+02]
New Q values:  [ 8.43634063e+00  2.16892959e+04 -3.22965309e-01  9.77246658e+02]
Reward: -10001  Episode Reward:  -9992
xxxxx
x.. x
x..gx
x. .x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.16892959e+04 -3.22965309e-01  9.77246658e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  2.16892959e+04 -3.22965309e-01  9.77246658e+02]
New Q values:  [ 8.43634063e+00  2.16892959e+04 -3.22965309e-01  1.22894509e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2775.48809382   766.61551311]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2212.34611909   243.73597312]
New Q values:  [ -281.736      -1150.91067548  7391.12721848   243.73597312]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.16892959e+04 -3.22965309e-01  1.22894509e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.16892959e+04 -3.22965309e-01  1.22894509e+03]
New Q values:  [ 8.43634063e+00  9.92674082e+03 -3.22965309e-01  1.22894509e+03]
Reward: -1  Episode Reward:  7
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2583.47205993  4172.07484994 -8896.20691497   637.30368728]
------
Step:4, Action:South
State  208
Old Q Values:  [12324.73633515 78460.17324322 -4584.50430574 -1713.91177491]
New Q values:  [12324.73633515 42581.89962067 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  16
xxxxx
x.  x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3440.8945028  -6442.16912869 -8192.20126966 37308.10107794]
------
Step:5, Action:West
State  288
Old Q Values:  [ 3440.8945028  -6442.16912869 -8192.20126966 37308.10107794]
New Q values:  [ 3440.8945028  -6442.16912869 -8192.20126966 22671.20761027]
Reward: 9  Episode Reward:  25
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7516.68343281 -8521.23367799  2205.73600612 25808.55726364]
------
Step:6, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 33927.66933177]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 23006.08759596]
Reward: 9  Episode Reward:  34
xxxxx
x.  x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[31432.06621083 18493.12311579 11203.9437825   1875.31501677]
------
Step:7, Action:North
State  257
Old Q Values:  [31432.06621083 18493.12311579 11203.9437825   1875.31501677]
New Q values:  [22111.47328118 18493.12311579 11203.9437825   1875.31501677]
Reward: 9  Episode Reward:  43
xxxxx
x.  x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[16872.63888686 16101.90751562 31777.48932284     0.        ]
------
Step:8, Action:East
State  179
Old Q Values:  [16872.63888686 16101.90751562 31777.48932284     0.        ]
New Q values:  [16872.63888686 16101.90751562 16227.68225202     0.        ]
Reward: 9  Episode Reward:  52
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  1.17042884e+04  1.03161518e+03]
------
Step:9, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.20995314e+04 1.68617358e+04 4.00016218e+04]
New Q values:  [3.89777037e-01 1.20995314e+04 1.95186642e+04 4.00016218e+04]
Reward: -1  Episode Reward:  51
xxxxx
x.  x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12324.73633515 42581.89962067 -4584.50430574 -1713.91177491]
------
Step:10, Action:South
State  208
Old Q Values:  [12324.73633515 42581.89962067 -4584.50430574 -1713.91177491]
New Q values:  [12324.73633515 23833.52213135 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  50
xxxxx
x.  x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3440.8945028  -6442.16912869 -8192.20126966 22671.20761027]
------
Step:11, Action:West
State  288
Old Q Values:  [ 3440.8945028  -6442.16912869 -8192.20126966 22671.20761027]
New Q values:  [ 3440.8945028  -6442.16912869 -8192.20126966 15969.70932289]
Reward: -1  Episode Reward:  49
xxxxx
x. gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 23006.08759596]
------
Step:12, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 23006.08759596]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 15835.27702274]
Reward: -1  Episode Reward:  48
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[22111.47328118 18493.12311579 11203.9437825   1875.31501677]
------
Step:13, Action:North
State  257
Old Q Values:  [22111.47328118 18493.12311579 11203.9437825   1875.31501677]
New Q values:  [13905.78097853 18493.12311579 11203.9437825   1875.31501677]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[16872.63888686 16101.90751562 16227.68225202     0.        ]
------
Step:14, Action:North
State  177
Old Q Values:  [109163.23336057  23344.73803738  11140.20003573      0.        ]
New Q values:  [120427.70265775  23344.73803738  11140.20003573      0.        ]
Reward: 100009  Episode Reward:  100056
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3440.8945028  -6442.16912869 -8192.20126966 15969.70932289]
------
Step:1, Action:North
State  288
Old Q Values:  [ 3440.8945028  -6442.16912869 -8192.20126966 15969.70932289]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966 15969.70932289]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[10522.69578838 -3032.08138794   790.72804752  1050.85266124]
------
Step:2, Action:North
State  208
Old Q Values:  [12324.73633515 23833.52213135 -4584.50430574 -1713.91177491]
New Q values:  [32586.89278384 23833.52213135 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    3116.80540322  -180.00807518 92171.99416593]
------
Step:3, Action:West
State  136
Old Q Values:  [ -170.77177351  3097.50745856 -2383.80019164   244.50422039]
New Q values:  [ -170.77177351  3097.50745856 -2383.80019164   605.13285854]
Reward: 9  Episode Reward:  27
xxxxx
xga x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1673.10390127   225.00774757]
------
Step:4, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1673.10390127   225.00774757]
New Q values:  [-9594.56523706 -8069.05606225  1597.89379808   225.00774757]
Reward: -1  Episode Reward:  26
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3097.50745856 -2383.80019164   605.13285854]
------
Step:5, Action:South
State  136
Old Q Values:  [ -170.77177351  3097.50745856 -2383.80019164   605.13285854]
New Q values:  [ -170.77177351 11014.47081858 -2383.80019164   605.13285854]
Reward: -1  Episode Reward:  25
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[32586.89278384 23833.52213135 -4584.50430574 -1713.91177491]
------
Step:6, Action:North
State  208
Old Q Values:  [32586.89278384 23833.52213135 -4584.50430574 -1713.91177491]
New Q values:  [16012.17935837 23833.52213135 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  9.92674082e+03 -3.22965309e-01  1.22894509e+03]
------
Step:7, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  9.92674082e+03 -3.22965309e-01  1.22894509e+03]
New Q values:  [ 8.43634063e+00  7.12690506e+03 -3.22965309e-01  1.22894509e+03]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[10522.69578838 -3032.08138794   790.72804752  1050.85266124]
------
Step:8, Action:North
State  210
Old Q Values:  [10522.69578838 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [ 6346.54983424 -3032.08138794   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  22
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.12690506e+03 -3.22965309e-01  1.22894509e+03]
------
Step:9, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  7.12690506e+03 -3.22965309e-01  1.22894509e+03]
New Q values:  [ 8.43634063e+00  4.75412698e+03 -3.22965309e-01  1.22894509e+03]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6346.54983424 -3032.08138794   790.72804752  1050.85266124]
------
Step:10, Action:North
State  210
Old Q Values:  [ 6346.54983424 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [ 3964.25802633 -3032.08138794   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.75412698e+03 -3.22965309e-01  1.22894509e+03]
------
Step:11, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  4.75412698e+03 -3.22965309e-01  1.22894509e+03]
New Q values:  [ 8.43634063e+00  3.09032820e+03 -3.22965309e-01  1.22894509e+03]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 3964.25802633 -3032.08138794   790.72804752  1050.85266124]
------
Step:12, Action:North
State  210
Old Q Values:  [ 3964.25802633 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [ 2512.20166996 -3032.08138794   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  18
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.09032820e+03 -3.22965309e-01  1.22894509e+03]
------
Step:13, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.09032820e+03 -3.22965309e-01  1.22894509e+03]
New Q values:  [ 8.43634063e+00  1.98919178e+03 -3.22965309e-01  1.22894509e+03]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2512.20166996 -3032.08138794   790.72804752  1050.85266124]
------
Step:14, Action:North
State  208
Old Q Values:  [16012.17935837 23833.52213135 -4584.50430574 -1713.91177491]
New Q values:  [ 7001.02927741 23833.52213135 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.98919178e+03 -3.22965309e-01  1.22894509e+03]
------
Step:15, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.98919178e+03 -3.22965309e-01  1.22894509e+03]
New Q values:  [ 8.43634063e+00  1.98919178e+03 -3.22965309e-01  2.70831620e+03]
Reward: -1  Episode Reward:  15
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  7391.12721848   243.73597312]
------
Step:16, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1597.89379808   225.00774757]
New Q values:  [-9594.56523706 -8069.05606225  3942.8987648    225.00774757]
Reward: -1  Episode Reward:  14
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 11014.47081858 -2383.80019164   605.13285854]
------
Step:17, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.98919178e+03 -3.22965309e-01  2.70831620e+03]
New Q values:  [ 8.43634063e+00  7.94513335e+03 -3.22965309e-01  2.70831620e+03]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7001.02927741 23833.52213135 -4584.50430574 -1713.91177491]
------
Step:18, Action:South
State  208
Old Q Values:  [ 7001.02927741 23833.52213135 -4584.50430574 -1713.91177491]
New Q values:  [ 7001.02927741 14323.72164941 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966 15969.70932289]
------
Step:19, Action:West
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966 15969.70932289]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966 14129.85090825]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7516.68343281 -8521.23367799  2205.73600612 25808.55726364]
------
Step:20, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 15835.27702274]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 11887.44774383]
Reward: 9  Episode Reward:  20
xxxxx
x.  x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[13905.78097853 18493.12311579 11203.9437825   1875.31501677]
------
Step:21, Action:South
State  257
Old Q Values:  [13905.78097853 18493.12311579 11203.9437825   1875.31501677]
New Q values:  [13905.78097853 12764.58618105 11203.9437825   1875.31501677]
Reward: -301  Episode Reward:  -281
xxxxx
x. gx
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[13905.78097853 12764.58618105 11203.9437825   1875.31501677]
------
Step:22, Action:North
State  257
Old Q Values:  [13905.78097853 12764.58618105 11203.9437825   1875.31501677]
New Q values:  [41696.02318874 12764.58618105 11203.9437825   1875.31501677]
Reward: 9  Episode Reward:  -272
xxxxx
x.g x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[120427.70265775  23344.73803738  11140.20003573      0.        ]
------
Step:23, Action:North
State  181
Old Q Values:  [1064.47086336  927.40283941 9894.22666233  262.76946019]
New Q values:  [ 743.9339486   927.40283941 9894.22666233  262.76946019]
Reward: 9  Episode Reward:  -263
xxxxx
xa gx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1042.48534419 -2165.66138672  -180.6       ]
------
Step:24, Action:South
State  111
Old Q Values:  [-177.44732869  877.32989506  520.46511977 -120.29354603]
New Q values:  [-177.44732869 3318.59995673  520.46511977 -120.29354603]
Reward: -1  Episode Reward:  -264
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 743.9339486   927.40283941 9894.22666233  262.76946019]
------
Step:25, Action:East
State  179
Old Q Values:  [16872.63888686 16101.90751562 16227.68225202     0.        ]
New Q values:  [16872.63888686 16101.90751562 75602.47610322     0.        ]
Reward: 100009  Episode Reward:  99745
xxxxx
x   x
x a x
x  gx
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 743.9339486   927.40283941 9894.22666233  262.76946019]
------
Step:1, Action:East
State  189
Old Q Values:  [ 275.08817949 1179.02382768 1028.56265462  154.04646645]
New Q values:  [ 275.08817949 1179.02382768 3424.0938631   154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  10024.2293375   2702.62827481   568.38654082]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831 15918.59333286 10961.70973593  1460.9765133 ]
New Q values:  [-5922.26708831  9939.07165629 10961.70973593  1460.9765133 ]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 11887.44774383]
------
Step:3, Action:West
State  276
Old Q Values:  [ 1716.23598158 -5807.06396197   855.18004824  2005.67019312]
New Q values:  [ 1716.23598158 -5807.06396197   855.18004824  1366.53458323]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1862.88835326   26.73544252 1164.14667753  123.6214372 ]
------
Step:4, Action:North
State  261
Old Q Values:  [1862.88835326   26.73544252 1164.14667753  123.6214372 ]
New Q values:  [3712.82334      26.73544252 1164.14667753  123.6214372 ]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 743.9339486   927.40283941 9894.22666233  262.76946019]
------
Step:5, Action:South
State  183
Old Q Values:  [ 861.48670303 1432.17919821 7145.26684725 1554.80203889]
New Q values:  [ 861.48670303 1686.11868129 7145.26684725 1554.80203889]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3712.82334      26.73544252 1164.14667753  123.6214372 ]
------
Step:6, Action:North
State  261
Old Q Values:  [3712.82334      26.73544252 1164.14667753  123.6214372 ]
New Q values:  [3628.10939018   26.73544252 1164.14667753  123.6214372 ]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 861.48670303 1686.11868129 7145.26684725 1554.80203889]
------
Step:7, Action:East
State  183
Old Q Values:  [ 861.48670303 1686.11868129 7145.26684725 1554.80203889]
New Q values:  [ 861.48670303 1686.11868129 6368.79326178 1554.80203889]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  1.17042884e+04  1.03161518e+03]
------
Step:8, Action:East
State  195
Old Q Values:  [   38.85388605  3096.86073896 30353.34400805  1169.39963074]
New Q values:  [   38.85388605  3096.86073896 12900.39810421  1169.39963074]
Reward: 9  Episode Reward:  32
xxxxx
x.. x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 2512.20166996 -3032.08138794   790.72804752  1050.85266124]
------
Step:9, Action:North
State  210
Old Q Values:  [ 2512.20166996 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [28655.87891776 -3032.08138794   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  31
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    3116.80540322  -180.00807518 92171.99416593]
------
Step:10, Action:West
State  130
Old Q Values:  [26266.584521    3116.80540322  -180.00807518 92171.99416593]
New Q values:  [26266.584521    3116.80540322  -180.00807518 72910.15149187]
Reward: 9  Episode Reward:  40
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   26400.13275764 120119.84608498]
------
Step:11, Action:West
State  126
Old Q Values:  [   0.          331.64678262 6806.92715127  768.78001264]
New Q values:  [   0.          331.64678262 6806.92715127  825.64977511]
Reward: 9  Episode Reward:  49
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1709.12590018  217.32156498 -180.6       ]
------
Step:12, Action:East
State  108
Old Q Values:  [-8463.16477134  2610.77248427   225.07163544     0.        ]
New Q values:  [-8463.16477134  2610.77248427  1377.26533419     0.        ]
Reward: -1  Episode Reward:  48
xxxxx
xga x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 4292.78893337  850.80874877]
------
Step:13, Action:East
State  124
Old Q Values:  [   0.         1166.51141701 4292.78893337  850.80874877]
New Q values:  [   0.         1166.51141701 5020.85681892  850.80874877]
Reward: -1  Episode Reward:  47
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 11014.47081858 -2383.80019164   605.13285854]
------
Step:14, Action:South
State  136
Old Q Values:  [ -170.77177351 11014.47081858 -2383.80019164   605.13285854]
New Q values:  [ -170.77177351  5656.81078241 -2383.80019164   605.13285854]
Reward: -1  Episode Reward:  46
xxxxx
x  gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2583.47205993  4172.07484994 -8896.20691497   637.30368728]
------
Step:15, Action:South
State  208
Old Q Values:  [ 7001.02927741 14323.72164941 -4584.50430574 -1713.91177491]
New Q values:  [ 7001.02927741 69973.84393224 -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100055
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7516.68343281 -8521.23367799  2205.73600612 25808.55726364]
------
Step:1, Action:North
State  272
Old Q Values:  [ 7516.68343281 -8521.23367799  2205.73600612 25808.55726364]
New Q values:  [ 6523.35989601 -8521.23367799  2205.73600612 25808.55726364]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  1.17042884e+04  1.03161518e+03]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  1.17042884e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  1.32838790e+04  1.03161518e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28655.87891776 -3032.08138794   790.72804752  1050.85266124]
------
Step:3, Action:North
State  208
Old Q Values:  [ 7001.02927741 69973.84393224 -4584.50430574 -1713.91177491]
New Q values:  [24678.85715853 69973.84393224 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    3116.80540322  -180.00807518 72910.15149187]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  7.94513335e+03 -3.22965309e-01  2.70831620e+03]
New Q values:  [ 8.43634063e+00  7.94513335e+03 -3.22965309e-01  3.71246803e+04]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   26400.13275764 120119.84608498]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  7391.12721848   243.73597312]
New Q values:  [ -281.736      -1150.91067548  7391.12721848   388.13552687]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  424.30030728  950.80379208 -252.78192178]
------
Step:6, Action:East
State  107
Old Q Values:  [-252.35169558  424.30030728  950.80379208 -252.78192178]
New Q values:  [-252.35169558  424.30030728 2597.05968238 -252.78192178]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x.g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  7391.12721848   388.13552687]
------
Step:7, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3942.8987648    225.00774757]
New Q values:  [-9594.56523706 -8069.05606225  3273.60274064   225.00774757]
Reward: -1  Episode Reward:  43
xxxxx
x gax
x.  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5656.81078241 -2383.80019164   605.13285854]
------
Step:8, Action:South
State  136
Old Q Values:  [ -170.77177351  5656.81078241 -2383.80019164   605.13285854]
New Q values:  [ -170.77177351 23254.27749264 -2383.80019164   605.13285854]
Reward: -1  Episode Reward:  42
xxxxx
x  gx
x. ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24678.85715853 69973.84393224 -4584.50430574 -1713.91177491]
------
Step:9, Action:South
State  208
Old Q Values:  [24678.85715853 69973.84393224 -4584.50430574 -1713.91177491]
New Q values:  [24678.85715853 32233.89284537 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  51
xxxxx
x   x
x. gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966 14129.85090825]
------
Step:10, Action:West
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966 14129.85090825]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966  9217.57468645]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x. gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 11887.44774383]
------
Step:11, Action:West
State  272
Old Q Values:  [ 6523.35989601 -8521.23367799  2205.73600612 25808.55726364]
New Q values:  [ 6523.35989601 -8521.23367799  2205.73600612 22831.62986208]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41696.02318874 12764.58618105 11203.9437825   1875.31501677]
------
Step:12, Action:North
State  257
Old Q Values:  [41696.02318874 12764.58618105 11203.9437825   1875.31501677]
New Q values:  [99364.55210646 12764.58618105 11203.9437825   1875.31501677]
Reward: 100009  Episode Reward:  100058
xxxxx
x   x
xa  x
x g x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3628.10939018   26.73544252 1164.14667753  123.6214372 ]
------
Step:1, Action:North
State  261
Old Q Values:  [3628.10939018   26.73544252 1164.14667753  123.6214372 ]
New Q values:  [3367.28173461   26.73544252 1164.14667753  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 861.48670303 1686.11868129 6368.79326178 1554.80203889]
------
Step:2, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243  5160.03171396     0.        ]
New Q values:  [    0.         -5536.05678243  6054.57639733     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  1.32838790e+04  1.03161518e+03]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  1.32838790e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  1.39157153e+04  1.03161518e+03]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28655.87891776 -3032.08138794   790.72804752  1050.85266124]
------
Step:4, Action:North
State  208
Old Q Values:  [24678.85715853 32233.89284537 -4584.50430574 -1713.91177491]
New Q values:  [31749.98831097 32233.89284537 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    3116.80540322  -180.00807518 72910.15149187]
------
Step:5, Action:West
State  130
Old Q Values:  [26266.584521    3116.80540322  -180.00807518 72910.15149187]
New Q values:  [26266.584521    3116.80540322  -180.00807518 65205.41442224]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   26400.13275764 120119.84608498]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  7391.12721848   388.13552687]
New Q values:  [ -281.736      -1150.91067548  7391.12721848   673.3919808 ]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1709.12590018  217.32156498 -180.6       ]
------
Step:7, Action:South
State  110
Old Q Values:  [-239.29051573 1709.12590018  217.32156498 -180.6       ]
New Q values:  [ -239.29051573 -4378.67418527   217.32156498  -180.6       ]
Reward: -10001  Episode Reward:  -9947
xxxxx
x   x
xg  x
x . x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  7391.12721848   673.3919808 ]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  7391.12721848   673.3919808 ]
New Q values:  [ -281.736      -1150.91067548 14099.2549793    673.3919808 ]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.94513335e+03 -3.22965309e-01  3.71246803e+04]
------
Step:2, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  7.94513335e+03 -3.22965309e-01  3.71246803e+04]
New Q values:  [ 8.43634063e+00  7.94513335e+03 -3.22965309e-01  1.90790486e+04]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 14099.2549793    673.3919808 ]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548 14099.2549793    673.3919808 ]
New Q values:  [ -281.736      -1150.91067548 11362.81657662   673.3919808 ]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.94513335e+03 -3.22965309e-01  1.90790486e+04]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  7.94513335e+03 -3.22965309e-01  1.90790486e+04]
New Q values:  [ 8.43634063e+00  7.94513335e+03 -3.22965309e-01  1.10398644e+04]
Reward: -1  Episode Reward:  6
xxxxx
x.a x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 11362.81657662   673.3919808 ]
------
Step:5, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3273.60274064   225.00774757]
New Q values:  [-9594.56523706 -8069.05606225  8285.12434405   225.00774757]
Reward: -1  Episode Reward:  5
xxxxx
x.gax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 23254.27749264 -2383.80019164   605.13285854]
------
Step:6, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  7.94513335e+03 -3.22965309e-01  1.10398644e+04]
New Q values:  [ 8.43634063e+00  1.28536212e+04 -3.22965309e-01  1.10398644e+04]
Reward: 9  Episode Reward:  14
xxxxx
x.  x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31749.98831097 32233.89284537 -4584.50430574 -1713.91177491]
------
Step:7, Action:South
State  208
Old Q Values:  [31749.98831097 32233.89284537 -4584.50430574 -1713.91177491]
New Q values:  [31749.98831097 15664.22954408 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  23
xxxxx
x.  x
xg. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966  9217.57468645]
------
Step:8, Action:West
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966  9217.57468645]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966 10541.9188332 ]
Reward: 9  Episode Reward:  32
xxxxx
x.  x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6523.35989601 -8521.23367799  2205.73600612 22831.62986208]
------
Step:9, Action:West
State  272
Old Q Values:  [ 6523.35989601 -8521.23367799  2205.73600612 22831.62986208]
New Q values:  [ 6523.35989601 -8521.23367799  2205.73600612 38941.41757677]
Reward: -1  Episode Reward:  31
xxxxx
x.g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[99364.55210646 12764.58618105 11203.9437825   1875.31501677]
------
Step:10, Action:North
State  257
Old Q Values:  [99364.55210646 12764.58618105 11203.9437825   1875.31501677]
New Q values:  [75879.53163991 12764.58618105 11203.9437825   1875.31501677]
Reward: 9  Episode Reward:  40
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[120427.70265775  23344.73803738  11140.20003573      0.        ]
------
Step:11, Action:North
State  181
Old Q Values:  [ 743.9339486   927.40283941 9894.22666233  262.76946019]
New Q values:  [1298.55356646  927.40283941 9894.22666233  262.76946019]
Reward: 9  Episode Reward:  49
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3318.59995673  520.46511977 -120.29354603]
------
Step:12, Action:South
State  109
Old Q Values:  [ -241.10880094  1042.48534419 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  3384.66213638 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  48
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1298.55356646  927.40283941 9894.22666233  262.76946019]
------
Step:13, Action:East
State  177
Old Q Values:  [120427.70265775  23344.73803738  11140.20003573      0.        ]
New Q values:  [120427.70265775  23344.73803738  67749.99293507      0.        ]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6523.35989601 -8521.23367799  2205.73600612 38941.41757677]
------
Step:1, Action:West
State  276
Old Q Values:  [ 1716.23598158 -5807.06396197   855.18004824  1366.53458323]
New Q values:  [ 1716.23598158 -5807.06396197   855.18004824  2047.5124681 ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2686.5107559  -5704.51612281  4984.99544937 -5679.36893145]
------
Step:2, Action:East
State  260
Old Q Values:  [ 2686.5107559  -5704.51612281  4984.99544937 -5679.36893145]
New Q values:  [ 2686.5107559  -5704.51612281 13675.82345278 -5679.36893145]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6523.35989601 -8521.23367799  2205.73600612 38941.41757677]
------
Step:3, Action:West
State  276
Old Q Values:  [ 1716.23598158 -5807.06396197   855.18004824  2047.5124681 ]
New Q values:  [ 1716.23598158 -5807.06396197   855.18004824  1828.58950762]
Reward: -1  Episode Reward:  7
xxxxx
x g.x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3367.28173461   26.73544252 1164.14667753  123.6214372 ]
------
Step:4, Action:North
State  261
Old Q Values:  [3367.28173461   26.73544252 1164.14667753  123.6214372 ]
New Q values:  [4320.58069254   26.73544252 1164.14667753  123.6214372 ]
Reward: 9  Episode Reward:  16
xxxxx
x .gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1298.55356646  927.40283941 9894.22666233  262.76946019]
------
Step:5, Action:East
State  181
Old Q Values:  [1298.55356646  927.40283941 9894.22666233  262.76946019]
New Q values:  [ 1298.55356646   927.40283941 10407.1166687    262.76946019]
Reward: 9  Episode Reward:  25
xxxxx
x g.x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6635.39049193 21480.08667922  3512.17261526]
------
Step:6, Action:East
State  193
Old Q Values:  [-5922.26708831  9939.07165629 10961.70973593  1460.9765133 ]
New Q values:  [-5922.26708831  9939.07165629 13915.08038766  1460.9765133 ]
Reward: 9  Episode Reward:  34
xxxxx
x .gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31749.98831097 15664.22954408 -4584.50430574 -1713.91177491]
------
Step:7, Action:South
State  208
Old Q Values:  [31749.98831097 15664.22954408 -4584.50430574 -1713.91177491]
New Q values:  [31749.98831097  9433.66746759 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  43
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966 10541.9188332 ]
------
Step:8, Action:West
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966 10541.9188332 ]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966  7782.40185643]
Reward: -1  Episode Reward:  42
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 11887.44774383]
------
Step:9, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 11887.44774383]
New Q values:  [3915.56039739 -168.92307549 4979.82966255 6050.5533053 ]
Reward: -1  Episode Reward:  41
xxxxx
x .gx
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4320.58069254   26.73544252 1164.14667753  123.6214372 ]
------
Step:10, Action:North
State  261
Old Q Values:  [4320.58069254   26.73544252 1164.14667753  123.6214372 ]
New Q values:  [4849.76727763   26.73544252 1164.14667753  123.6214372 ]
Reward: -1  Episode Reward:  40
xxxxx
x g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1298.55356646   927.40283941 10407.1166687    262.76946019]
------
Step:11, Action:East
State  180
Old Q Values:  [ -122.403451    8050.99445658 10792.04060668 -4966.32149798]
New Q values:  [ -122.403451    8050.99445658 16316.70279614 -4966.32149798]
Reward: -1  Episode Reward:  39
xxxxx
xg..x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.20995314e+04 1.95186642e+04 4.00016218e+04]
------
Step:12, Action:West
State  192
Old Q Values:  [3.89777037e-01 1.20995314e+04 1.95186642e+04 4.00016218e+04]
New Q values:  [3.89777037e-01 1.20995314e+04 1.95186642e+04 1.48950596e+04]
Reward: -10001  Episode Reward:  -9962
xxxxx
x ..x
xg  x
x   x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.28536212e+04 -3.22965309e-01  1.10398644e+04]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.28536212e+04 -3.22965309e-01  1.10398644e+04]
New Q values:  [ 8.43634063e+00  1.37436122e+04 -3.22965309e-01  1.10398644e+04]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28655.87891776 -3032.08138794   790.72804752  1050.85266124]
------
Step:2, Action:North
State  210
Old Q Values:  [28655.87891776 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [15584.83521301 -3032.08138794   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.37436122e+04 -3.22965309e-01  1.10398644e+04]
------
Step:3, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.37436122e+04 -3.22965309e-01  1.10398644e+04]
New Q values:  [ 8.43634063e+00  1.50218414e+04 -3.22965309e-01  1.10398644e+04]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31749.98831097  9433.66746759 -4584.50430574 -1713.91177491]
------
Step:4, Action:North
State  208
Old Q Values:  [31749.98831097  9433.66746759 -4584.50430574 -1713.91177491]
New Q values:  [17205.94773074  9433.66746759 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  6
xxxxx
x..ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.50218414e+04 -3.22965309e-01  1.10398644e+04]
------
Step:5, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.50218414e+04 -3.22965309e-01  1.10398644e+04]
New Q values:  [ 8.43634063e+00  1.11699209e+04 -3.22965309e-01  1.10398644e+04]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17205.94773074  9433.66746759 -4584.50430574 -1713.91177491]
------
Step:6, Action:North
State  210
Old Q Values:  [15584.83521301 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [ 9584.31034351 -3032.08138794   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  4
xxxxx
x..ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.11699209e+04 -3.22965309e-01  1.10398644e+04]
------
Step:7, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.11699209e+04 -3.22965309e-01  1.10398644e+04]
New Q values:  [ 8.43634063e+00  7.34266145e+03 -3.22965309e-01  1.10398644e+04]
Reward: -1  Episode Reward:  3
xxxxx
x.. x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 9584.31034351 -3032.08138794   790.72804752  1050.85266124]
------
Step:8, Action:North
State  208
Old Q Values:  [17205.94773074  9433.66746759 -4584.50430574 -1713.91177491]
New Q values:  [10193.73841815  9433.66746759 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  2
xxxxx
x..ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.34266145e+03 -3.22965309e-01  1.10398644e+04]
------
Step:9, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  7.34266145e+03 -3.22965309e-01  1.10398644e+04]
New Q values:  [ 8.43634063e+00  7.34266145e+03 -3.22965309e-01  7.83019074e+03]
Reward: 9  Episode Reward:  11
xxxxx
x.a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 11362.81657662   673.3919808 ]
------
Step:10, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  8285.12434405   225.00774757]
New Q values:  [-9594.56523706 -8069.05606225 10289.73298541   225.00774757]
Reward: -1  Episode Reward:  10
xxxxx
x.gax
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 23254.27749264 -2383.80019164   605.13285854]
------
Step:11, Action:South
State  136
Old Q Values:  [ -170.77177351 23254.27749264 -2383.80019164   605.13285854]
New Q values:  [ -170.77177351 12359.2325225  -2383.80019164   605.13285854]
Reward: -1  Episode Reward:  9
xxxxx
x. gx
x..ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10193.73841815  9433.66746759 -4584.50430574 -1713.91177491]
------
Step:12, Action:South
State  208
Old Q Values:  [10193.73841815  9433.66746759 -4584.50430574 -1713.91177491]
New Q values:  [10193.73841815  6113.58754397 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966  7782.40185643]
------
Step:13, Action:West
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966  7782.40185643]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966  4927.52673416]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 4979.82966255 6050.5533053 ]
------
Step:14, Action:West
State  273
Old Q Values:  [3915.56039739 -168.92307549 4979.82966255 6050.5533053 ]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 25189.48081409]
Reward: 9  Episode Reward:  26
xxxxx
x.  x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[75879.53163991 12764.58618105 11203.9437825   1875.31501677]
------
Step:15, Action:North
State  257
Old Q Values:  [75879.53163991 12764.58618105 11203.9437825   1875.31501677]
New Q values:  [66485.52345329 12764.58618105 11203.9437825   1875.31501677]
Reward: 9  Episode Reward:  35
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[120427.70265775  23344.73803738  67749.99293507      0.        ]
------
Step:16, Action:North
State  181
Old Q Values:  [ 1298.55356646   927.40283941 10407.1166687    262.76946019]
New Q values:  [ 1520.4014136    927.40283941 10407.1166687    262.76946019]
Reward: 9  Episode Reward:  44
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3318.59995673  520.46511977 -120.29354603]
------
Step:17, Action:South
State  111
Old Q Values:  [-177.44732869 3318.59995673  520.46511977 -120.29354603]
New Q values:  [-177.44732869 3237.47796122  520.46511977 -120.29354603]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 861.48670303 1686.11868129 6368.79326178 1554.80203889]
------
Step:18, Action:East
State  179
Old Q Values:  [16872.63888686 16101.90751562 75602.47610322     0.        ]
New Q values:  [16872.63888686 16101.90751562 94421.10502859     0.        ]
Reward: 100009  Episode Reward:  100052
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 861.48670303 1686.11868129 6368.79326178 1554.80203889]
------
Step:1, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243  6054.57639733     0.        ]
New Q values:  [    0.         -5536.05678243  6601.94514623     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  1.39157153e+04  1.03161518e+03]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  1.39157153e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  8.44697922e+03  1.03161518e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 9584.31034351 -3032.08138794   790.72804752  1050.85266124]
------
Step:3, Action:North
State  210
Old Q Values:  [ 9584.31034351 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [23400.74846408 -3032.08138794   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x   x
x.g.x
xxxxx
Step:4, Action:South
State  130
Old Q Values:  [26266.584521    3116.80540322  -180.00807518 65205.41442224]
New Q values:  [26266.584521    4304.24368673  -180.00807518 65205.41442224]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10193.73841815  6113.58754397 -4584.50430574 -1713.91177491]
------
Step:5, Action:North
State  208
Old Q Values:  [10193.73841815  6113.58754397 -4584.50430574 -1713.91177491]
New Q values:  [23638.51969393  6113.58754397 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  25
xxxxx
x..ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    4304.24368673  -180.00807518 65205.41442224]
------
Step:6, Action:West
State  136
Old Q Values:  [ -170.77177351 12359.2325225  -2383.80019164   605.13285854]
New Q values:  [ -170.77177351 12359.2325225  -2383.80019164  3334.37303904]
Reward: 9  Episode Reward:  34
xxxxx
xga x
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225 10289.73298541   225.00774757]
------
Step:7, Action:East
State  112
Old Q Values:  [     0.           3629.92591876  38419.06454384 110949.76306292]
New Q values:  [     0.           3629.92591876  19074.79557429 110949.76306292]
Reward: -1  Episode Reward:  33
xxxxx
x.gax
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 12359.2325225  -2383.80019164  3334.37303904]
------
Step:8, Action:South
State  136
Old Q Values:  [ -170.77177351 12359.2325225  -2383.80019164  3334.37303904]
New Q values:  [ -170.77177351  6194.71546398 -2383.80019164  3334.37303904]
Reward: -1  Episode Reward:  32
xxxxx
xg  x
x  ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2583.47205993  4172.07484994 -8896.20691497   637.30368728]
------
Step:9, Action:South
State  208
Old Q Values:  [23638.51969393  6113.58754397 -4584.50430574 -1713.91177491]
New Q values:  [23638.51969393  3929.09303784 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  41
xxxxx
x.g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966  4927.52673416]
------
Step:10, Action:West
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966  4927.52673416]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966 13652.8359667 ]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6523.35989601 -8521.23367799  2205.73600612 38941.41757677]
------
Step:11, Action:West
State  272
Old Q Values:  [ 6523.35989601 -8521.23367799  2205.73600612 38941.41757677]
New Q values:  [ 6523.35989601 -8521.23367799  2205.73600612 35527.62406669]
Reward: 9  Episode Reward:  49
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[66485.52345329 12764.58618105 11203.9437825   1875.31501677]
------
Step:12, Action:North
State  257
Old Q Values:  [66485.52345329 12764.58618105 11203.9437825   1875.31501677]
New Q values:  [62721.92017864 12764.58618105 11203.9437825   1875.31501677]
Reward: -1  Episode Reward:  48
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[120427.70265775  23344.73803738  67749.99293507      0.        ]
------
Step:13, Action:North
State  177
Old Q Values:  [120427.70265775  23344.73803738  67749.99293507      0.        ]
New Q values:  [124933.49037662  23344.73803738  67749.99293507      0.        ]
Reward: 100009  Episode Reward:  100057
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.34266145e+03 -3.22965309e-01  7.83019074e+03]
------
Step:1, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  7.34266145e+03 -3.22965309e-01  7.83019074e+03]
New Q values:  [ 8.43634063e+00  7.34266145e+03 -3.22965309e-01  6.54632127e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 11362.81657662   673.3919808 ]
------
Step:2, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2775.48809382   766.61551311]
New Q values:  [ -253.44886264 -1902.20915811  3312.39367177   766.61551311]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.34266145e+03 -3.22965309e-01  6.54632127e+03]
------
Step:3, Action:West
State  136
Old Q Values:  [ -170.77177351  6194.71546398 -2383.80019164  3334.37303904]
New Q values:  [ -170.77177351  6194.71546398 -2383.80019164  1457.76546673]
Reward: -1  Episode Reward:  7
xxxxx
x.agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:4, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225 10289.73298541   225.00774757]
New Q values:  [-9594.56523706 -8069.05606225 10289.73298541  1110.80173994]
Reward: 9  Episode Reward:  16
xxxxx
xag x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  3384.66213638 -2165.66138672  -180.6       ]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 3237.47796122  520.46511977 -120.29354603]
New Q values:  [-177.44732869 2327.61934342  520.46511977 -120.29354603]
Reward: 9  Episode Reward:  25
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1179.02382768 3424.0938631   154.04646645]
------
Step:6, Action:South
State  189
Old Q Values:  [ 275.08817949 1179.02382768 3424.0938631   154.04646645]
New Q values:  [ 275.08817949 1931.93971436 3424.0938631   154.04646645]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4849.76727763   26.73544252 1164.14667753  123.6214372 ]
------
Step:7, Action:North
State  261
Old Q Values:  [4849.76727763   26.73544252 1164.14667753  123.6214372 ]
New Q values:  [2966.53506998   26.73544252 1164.14667753  123.6214372 ]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1931.93971436 3424.0938631   154.04646645]
------
Step:8, Action:East
State  189
Old Q Values:  [ 275.08817949 1931.93971436 3424.0938631   154.04646645]
New Q values:  [ 275.08817949 1931.93971436 2032.61219303  154.04646645]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[   2.33354578 2191.91549263 -501.63979658  200.3419716 ]
------
Step:9, Action:South
State  205
Old Q Values:  [   0.         1929.52417238    0.          198.38683706]
New Q values:  [   0.         1089.6411031     0.          198.38683706]
Reward: 9  Episode Reward:  51
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.          315.20206867 1041.43811381]
------
Step:10, Action:West
State  277
Old Q Values:  [   1.64433       0.          315.20206867 1041.43811381]
New Q values:  [   1.64433       0.          315.20206867 1305.93576652]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2966.53506998   26.73544252 1164.14667753  123.6214372 ]
------
Step:11, Action:North
State  257
Old Q Values:  [62721.92017864 12764.58618105 11203.9437825   1875.31501677]
New Q values:  [53414.49958003 12764.58618105 11203.9437825   1875.31501677]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[16872.63888686 16101.90751562 94421.10502859     0.        ]
------
Step:12, Action:East
State  189
Old Q Values:  [ 275.08817949 1931.93971436 2032.61219303  154.04646645]
New Q values:  [ 275.08817949 1931.93971436 1139.33720814  154.04646645]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[   0.         1089.6411031     0.          198.38683706]
------
Step:13, Action:South
State  195
Old Q Values:  [   38.85388605  3096.86073896 12900.39810421  1169.39963074]
New Q values:  [   38.85388605  8794.98853981 12900.39810421  1169.39963074]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 25189.48081409]
------
Step:14, Action:West
State  276
Old Q Values:  [ 1716.23598158 -5807.06396197   855.18004824  1828.58950762]
New Q values:  [ 1716.23598158 -5807.06396197   855.18004824  1620.79632404]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2966.53506998   26.73544252 1164.14667753  123.6214372 ]
------
Step:15, Action:North
State  260
Old Q Values:  [ 2686.5107559  -5704.51612281 13675.82345278 -5679.36893145]
New Q values:  [ 1612.46541007 -5704.51612281 13675.82345278 -5679.36893145]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  1.79487036e+03  0.00000000e+00]
------
Step:16, Action:East
State  191
Old Q Values:  [   3.06655861  941.79461398 1003.17383734    0.        ]
New Q values:  [   3.06655861  941.79461398 1151.91422091    0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[   0.         2504.14895323 1884.45205023    0.        ]
------
Step:17, Action:East
State  195
Old Q Values:  [   38.85388605  8794.98853981 12900.39810421  1169.39963074]
New Q values:  [   38.85388605  8794.98853981 12179.78378091  1169.39963074]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[23400.74846408 -3032.08138794   790.72804752  1050.85266124]
------
Step:18, Action:North
State  218
Old Q Values:  [1138.99590011 3989.89183125    0.         1847.21017375]
New Q values:  [2657.79679428 3989.89183125    0.         1847.21017375]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.34266145e+03 -3.22965309e-01  6.54632127e+03]
------
Step:19, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  7.34266145e+03 -3.22965309e-01  6.54632127e+03]
New Q values:  [ 8.43634063e+00  4.13343213e+03 -3.22965309e-01  6.54632127e+03]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2657.79679428 3989.89183125    0.         1847.21017375]
------
Step:20, Action:South
State  208
Old Q Values:  [23638.51969393  3929.09303784 -4584.50430574 -1713.91177491]
New Q values:  [23638.51969393 65672.88800514 -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100050
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2327.61934342  520.46511977 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 3243.43236659  238.35800069    0.        ]
New Q values:  [ 221.30610858 4424.90794725  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1520.4014136    927.40283941 10407.1166687    262.76946019]
------
Step:2, Action:East
State  181
Old Q Values:  [ 1520.4014136    927.40283941 10407.1166687    262.76946019]
New Q values:  [1520.4014136   927.40283941 4612.27267125  262.76946019]
Reward: -9991  Episode Reward:  -9982
xxxxx
x ..x
x g.x
x.. x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 11362.81657662   673.3919808 ]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548 11362.81657662   673.3919808 ]
New Q values:  [ -281.736      -1150.91067548  6514.42301144   673.3919808 ]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.13343213e+03 -3.22965309e-01  6.54632127e+03]
------
Step:2, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  4.13343213e+03 -3.22965309e-01  6.54632127e+03]
New Q values:  [ 8.43634063e+00  4.13343213e+03 -3.22965309e-01  4.57225541e+03]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  6514.42301144   673.3919808 ]
------
Step:3, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225 10289.73298541  1110.80173994]
New Q values:  [-9594.56523706 -8069.05606225  5973.70783336  1110.80173994]
Reward: -1  Episode Reward:  7
xxxxx
x.gax
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  6194.71546398 -2383.80019164  1457.76546673]
------
Step:4, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  4.13343213e+03 -3.22965309e-01  4.57225541e+03]
New Q values:  [ 8.43634063e+00  2.91039531e+03 -3.22965309e-01  4.57225541e+03]
Reward: 9  Episode Reward:  16
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2583.47205993  4172.07484994 -8896.20691497   637.30368728]
------
Step:5, Action:South
State  208
Old Q Values:  [23638.51969393 65672.88800514 -4584.50430574 -1713.91177491]
New Q values:  [23638.51969393 30370.40599207 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  25
xxxxx
x.  x
x. gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966 13652.8359667 ]
------
Step:6, Action:West
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966 13652.8359667 ]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966 13023.37863091]
Reward: 9  Episode Reward:  34
xxxxx
x. gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 25189.48081409]
------
Step:7, Action:West
State  272
Old Q Values:  [ 6523.35989601 -8521.23367799  2205.73600612 35527.62406669]
New Q values:  [ 6523.35989601 -8521.23367799  2205.73600612 30240.79950069]
Reward: 9  Episode Reward:  43
xxxxx
x.g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[53414.49958003 12764.58618105 11203.9437825   1875.31501677]
------
Step:8, Action:North
State  257
Old Q Values:  [53414.49958003 12764.58618105 11203.9437825   1875.31501677]
New Q values:  [58851.246945   12764.58618105 11203.9437825   1875.31501677]
Reward: 9  Episode Reward:  52
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[124933.49037662  23344.73803738  67749.99293507      0.        ]
------
Step:9, Action:North
State  177
Old Q Values:  [124933.49037662  23344.73803738  67749.99293507      0.        ]
New Q values:  [120985.1011118   23344.73803738  67749.99293507      0.        ]
Reward: 100009  Episode Reward:  100061
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966 13023.37863091]
------
Step:1, Action:West
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966 13023.37863091]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966 14286.99130257]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6523.35989601 -8521.23367799  2205.73600612 30240.79950069]
------
Step:2, Action:West
State  272
Old Q Values:  [ 6523.35989601 -8521.23367799  2205.73600612 30240.79950069]
New Q values:  [ 6523.35989601 -8521.23367799  2205.73600612 16204.46683611]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1612.46541007 -5704.51612281 13675.82345278 -5679.36893145]
------
Step:3, Action:East
State  261
Old Q Values:  [2966.53506998   26.73544252 1164.14667753  123.6214372 ]
New Q values:  [2966.53506998   26.73544252 5326.39872184  123.6214372 ]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
x...x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6523.35989601 -8521.23367799  2205.73600612 16204.46683611]
------
Step:4, Action:West
State  272
Old Q Values:  [ 6523.35989601 -8521.23367799  2205.73600612 16204.46683611]
New Q values:  [ 6523.35989601 -8521.23367799  2205.73600612  8079.106351  ]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2966.53506998   26.73544252 5326.39872184  123.6214372 ]
------
Step:5, Action:East
State  261
Old Q Values:  [2966.53506998   26.73544252 5326.39872184  123.6214372 ]
New Q values:  [ 2966.53506998    26.73544252 -1446.30860596   123.6214372 ]
Reward: -10001  Episode Reward:  -9985
xxxxx
x ..x
x...x
x g x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  8.44697922e+03  1.03161518e+03]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  8.44697922e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  1.04044162e+04  1.03161518e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x g.x
xxxxx
Step:2, Action:South
State  210
Old Q Values:  [23400.74846408 -3032.08138794   790.72804752  1050.85266124]
New Q values:  [23400.74846408 -2921.33516441   790.72804752  1050.85266124]
Reward: -9991  Episode Reward:  -9982
xxxxx
x...x
x.  x
x .gx
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23638.51969393 30370.40599207 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [23638.51969393 30370.40599207 -4584.50430574 -1713.91177491]
New Q values:  [23638.51969393 16439.6597876  -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966 14286.99130257]
------
Step:2, Action:West
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966 14286.99130257]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966  8143.92842633]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6523.35989601 -8521.23367799  2205.73600612  8079.106351  ]
------
Step:3, Action:West
State  272
Old Q Values:  [ 6523.35989601 -8521.23367799  2205.73600612  8079.106351  ]
New Q values:  [ 6523.35989601 -8521.23367799  2205.73600612  4127.00306139]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2966.53506998    26.73544252 -1446.30860596   123.6214372 ]
------
Step:4, Action:North
State  261
Old Q Values:  [ 2966.53506998    26.73544252 -1446.30860596   123.6214372 ]
New Q values:  [ 3096.65200653    26.73544252 -1446.30860596   123.6214372 ]
Reward: -1  Episode Reward:  26
xxxxx
x...x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 861.48670303 1686.11868129 6368.79326178 1554.80203889]
------
Step:5, Action:East
State  181
Old Q Values:  [1520.4014136   927.40283941 4612.27267125  262.76946019]
New Q values:  [1520.4014136   927.40283941 1705.90832957  262.76946019]
Reward: -9991  Episode Reward:  -9965
xxxxx
x...x
x g x
x   x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.20995314e+04 1.95186642e+04 1.48950596e+04]
------
Step:1, Action:East
State  200
Old Q Values:  [   62.8218634  10024.2293375   2702.62827481   568.38654082]
New Q values:  [   62.8218634  10024.2293375   2338.0737649    568.38654082]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2583.47205993  4172.07484994 -8896.20691497   637.30368728]
------
Step:2, Action:South
State  208
Old Q Values:  [23638.51969393 16439.6597876  -4584.50430574 -1713.91177491]
New Q values:  [23638.51969393  9024.44244294 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966  8143.92842633]
------
Step:3, Action:West
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966  8143.92842633]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966  5219.97933933]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6523.35989601 -8521.23367799  2205.73600612  4127.00306139]
------
Step:4, Action:North
State  272
Old Q Values:  [ 6523.35989601 -8521.23367799  2205.73600612  4127.00306139]
New Q values:  [ 8464.34321948 -8521.23367799  2205.73600612  4127.00306139]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.20995314e+04 1.95186642e+04 1.48950596e+04]
------
Step:5, Action:East
State  193
Old Q Values:  [-5922.26708831  9939.07165629 13915.08038766  1460.9765133 ]
New Q values:  [-5922.26708831  9939.07165629 12656.98806325  1460.9765133 ]
Reward: -1  Episode Reward:  25
xxxxx
x. gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23638.51969393  9024.44244294 -4584.50430574 -1713.91177491]
------
Step:6, Action:South
State  208
Old Q Values:  [23638.51969393  9024.44244294 -4584.50430574 -1713.91177491]
New Q values:  [23638.51969393  5175.17077897 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  24
xxxxx
x. .x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966  5219.97933933]
------
Step:7, Action:West
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966  5219.97933933]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966  4626.69470158]
Reward: -1  Episode Reward:  23
xxxxx
x. .x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 8464.34321948 -8521.23367799  2205.73600612  4127.00306139]
------
Step:8, Action:West
State  272
Old Q Values:  [ 8464.34321948 -8521.23367799  2205.73600612  4127.00306139]
New Q values:  [ 8464.34321948 -8521.23367799  2205.73600612 11809.77306455]
Reward: 9  Episode Reward:  32
xxxxx
x. .x
xg  x
xa  x
xxxxx
Step:9, Action:South
State  256
Old Q Values:  [33845.23946664 10195.91087381  7407.2956525    644.94785455]
New Q values:  [33845.23946664 14051.33618952  7407.2956525    644.94785455]
Reward: -301  Episode Reward:  -269
xxxxx
xg .x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[33845.23946664 14051.33618952  7407.2956525    644.94785455]
------
Step:10, Action:North
State  257
Old Q Values:  [58851.246945   12764.58618105 11203.9437825   1875.31501677]
New Q values:  [59841.42911154 12764.58618105 11203.9437825   1875.31501677]
Reward: 9  Episode Reward:  -260
xxxxx
x.g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[120985.1011118   23344.73803738  67749.99293507      0.        ]
------
Step:11, Action:North
State  177
Old Q Values:  [120985.1011118   23344.73803738  67749.99293507      0.        ]
New Q values:  [59405.74540588 23344.73803738 67749.99293507     0.        ]
Reward: 9  Episode Reward:  -251
xxxxx
xa gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:NE
[    0.         36687.68320385     0.             0.        ]
------
Step:12, Action:South
State  103
Old Q Values:  [ 221.30610858 4424.90794725  238.35800069    0.        ]
New Q values:  [ 221.30610858 2281.13567777  238.35800069    0.        ]
Reward: -1  Episode Reward:  -252
xxxxx
x  .x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1520.4014136   927.40283941 1705.90832957  262.76946019]
------
Step:13, Action:East
State  177
Old Q Values:  [59405.74540588 23344.73803738 67749.99293507     0.        ]
New Q values:  [59405.74540588 23344.73803738 30896.493593       0.        ]
Reward: -1  Episode Reward:  -253
xxxxx
x  gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9939.07165629 12656.98806325  1460.9765133 ]
------
Step:14, Action:East
State  197
Old Q Values:  [-5833.78831344  1495.56024905 -4510.80210702   403.06255908]
New Q values:  [-5833.78831344  1495.56024905  -713.36493463   403.06255908]
Reward: -10001  Episode Reward:  -10254
xxxxx
x  .x
x  gx
x   x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.91039531e+03 -3.22965309e-01  4.57225541e+03]
------
Step:1, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  2.91039531e+03 -3.22965309e-01  4.57225541e+03]
New Q values:  [ 8.43634063e+00  2.91039531e+03 -3.22965309e-01  2.82802027e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  3312.39367177   766.61551311]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  6514.42301144   673.3919808 ]
New Q values:  [ -281.736      -1150.91067548  3478.28779647   673.3919808 ]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.91039531e+03 -3.22965309e-01  2.82802027e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.91039531e+03 -3.22965309e-01  2.82802027e+03]
New Q values:  [ 8.43634063e+00  2.26111403e+03 -3.22965309e-01  2.82802027e+03]
Reward: -9991  Episode Reward:  -9983
xxxxx
x.  x
x..gx
x.. x
xxxxx
xxxxx
x.a.x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3478.28779647   673.3919808 ]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3478.28779647   673.3919808 ]
New Q values:  [ -281.736      -1150.91067548  2245.12119839   673.3919808 ]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.26111403e+03 -3.22965309e-01  2.82802027e+03]
------
Step:2, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  2.26111403e+03 -3.22965309e-01  2.82802027e+03]
New Q values:  [ 8.43634063e+00  2.26111403e+03 -3.22965309e-01  1.80414447e+03]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2245.12119839   673.3919808 ]
------
Step:3, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  3312.39367177   766.61551311]
New Q values:  [ -253.44886264 -1902.20915811  2002.69167792   766.61551311]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.26111403e+03 -3.22965309e-01  1.80414447e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.26111403e+03 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  2.00140152e+03 -3.22965309e-01  1.80414447e+03]
Reward: -9991  Episode Reward:  -9984
xxxxx
x.  x
x..gx
x. .x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.00140152e+03 -3.22965309e-01  1.80414447e+03]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.00140152e+03 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  7.82618515e+03 -3.22965309e-01  1.80414447e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[23400.74846408 -2921.33516441   790.72804752  1050.85266124]
------
Step:2, Action:North
State  210
Old Q Values:  [23400.74846408 -2921.33516441   790.72804752  1050.85266124]
New Q values:  [11707.55492985 -2921.33516441   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.82618515e+03 -3.22965309e-01  1.80414447e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  7.82618515e+03 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  6.64214054e+03 -3.22965309e-01  1.80414447e+03]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11707.55492985 -2921.33516441   790.72804752  1050.85266124]
------
Step:4, Action:North
State  210
Old Q Values:  [11707.55492985 -2921.33516441   790.72804752  1050.85266124]
New Q values:  [ 6675.06413332 -2921.33516441   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  6
xxxxx
x..ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  6.64214054e+03 -3.22965309e-01  1.80414447e+03]
------
Step:5, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  6.64214054e+03 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  9.74781212e+03 -3.22965309e-01  1.80414447e+03]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23638.51969393  5175.17077897 -4584.50430574 -1713.91177491]
------
Step:6, Action:North
State  210
Old Q Values:  [ 6675.06413332 -2921.33516441   790.72804752  1050.85266124]
New Q values:  [ 5593.76929033 -2921.33516441   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  4
xxxxx
x..ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  9.74781212e+03 -3.22965309e-01  1.80414447e+03]
------
Step:7, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  9.74781212e+03 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  1.09900808e+04 -3.22965309e-01  1.80414447e+03]
Reward: -1  Episode Reward:  3
xxxxx
x.. x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23638.51969393  5175.17077897 -4584.50430574 -1713.91177491]
------
Step:8, Action:North
State  208
Old Q Values:  [23638.51969393  5175.17077897 -4584.50430574 -1713.91177491]
New Q values:  [11313.22251677  5175.17077897 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  2
xxxxx
xg.ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  6194.71546398 -2383.80019164  1457.76546673]
------
Step:9, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.09900808e+04 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  7.78939906e+03 -3.22965309e-01  1.80414447e+03]
Reward: -1  Episode Reward:  1
xxxxx
x.. x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11313.22251677  5175.17077897 -4584.50430574 -1713.91177491]
------
Step:10, Action:North
State  208
Old Q Values:  [11313.22251677  5175.17077897 -4584.50430574 -1713.91177491]
New Q values:  [ 6383.1036459   5175.17077897 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  0
xxxxx
xg.ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  6194.71546398 -2383.80019164  1457.76546673]
------
Step:11, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  7.78939906e+03 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  5.03009072e+03 -3.22965309e-01  1.80414447e+03]
Reward: -1  Episode Reward:  -1
xxxxx
x.. x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6383.1036459   5175.17077897 -4584.50430574 -1713.91177491]
------
Step:12, Action:North
State  208
Old Q Values:  [ 6383.1036459   5175.17077897 -4584.50430574 -1713.91177491]
New Q values:  [ 4061.66867346  5175.17077897 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -2
xxxxx
x..ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  5.03009072e+03 -3.22965309e-01  1.80414447e+03]
------
Step:13, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  5.03009072e+03 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  3.68956707e+03 -3.22965309e-01  1.80414447e+03]
Reward: -1  Episode Reward:  -3
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5593.76929033 -2921.33516441   790.72804752  1050.85266124]
------
Step:14, Action:North
State  208
Old Q Values:  [ 4061.66867346  5175.17077897 -4584.50430574 -1713.91177491]
New Q values:  [ 2730.93759155  5175.17077897 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -4
xxxxx
x..ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.68956707e+03 -3.22965309e-01  1.80414447e+03]
------
Step:15, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.68956707e+03 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  3.15335762e+03 -3.22965309e-01  1.80414447e+03]
Reward: -1  Episode Reward:  -5
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5593.76929033 -2921.33516441   790.72804752  1050.85266124]
------
Step:16, Action:North
State  208
Old Q Values:  [ 2730.93759155  5175.17077897 -4584.50430574 -1713.91177491]
New Q values:  [ 2037.78232162  5175.17077897 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -6
xxxxx
x..ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.15335762e+03 -3.22965309e-01  1.80414447e+03]
------
Step:17, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.15335762e+03 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  2.81329428e+03 -3.22965309e-01  1.80414447e+03]
Reward: -1  Episode Reward:  -7
xxxxx
x.. x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2037.78232162  5175.17077897 -4584.50430574 -1713.91177491]
------
Step:18, Action:South
State  210
Old Q Values:  [ 5593.76929033 -2921.33516441   790.72804752  1050.85266124]
New Q values:  [5593.76929033  218.87434471  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  -8
xxxxx
x.. x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966  4626.69470158]
------
Step:19, Action:West
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966  4626.69470158]
New Q values:  [ 4538.56653763 -6442.16912869 -8192.20126966  -600.9902    ]
Reward: -9991  Episode Reward:  -9999
xxxxx
x.. x
x.. x
x.g x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.81329428e+03 -3.22965309e-01  1.80414447e+03]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.81329428e+03 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  2.80884850e+03 -3.22965309e-01  1.80414447e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5593.76929033  218.87434471  790.72804752 1050.85266124]
------
Step:2, Action:North
State  208
Old Q Values:  [ 2037.78232162  5175.17077897 -4584.50430574 -1713.91177491]
New Q values:  [ 1657.16747842  5175.17077897 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.80884850e+03 -3.22965309e-01  1.80414447e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.80884850e+03 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  2.37456185e+03 -3.22965309e-01  1.80414447e+03]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2583.47205993  4172.07484994 -8896.20691497   637.30368728]
------
Step:4, Action:South
State  208
Old Q Values:  [ 1657.16747842  5175.17077897 -4584.50430574 -1713.91177491]
New Q values:  [ 1657.16747842  3437.03827288 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  16
xxxxx
x.. x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4538.56653763 -6442.16912869 -8192.20126966  -600.9902    ]
------
Step:5, Action:North
State  288
Old Q Values:  [ 4538.56653763 -6442.16912869 -8192.20126966  -600.9902    ]
New Q values:  [ 3492.95740215 -6442.16912869 -8192.20126966  -600.9902    ]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5593.76929033  218.87434471  790.72804752 1050.85266124]
------
Step:6, Action:North
State  210
Old Q Values:  [5593.76929033  218.87434471  790.72804752 1050.85266124]
New Q values:  [21798.5320428    218.87434471   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  14
xxxxx
x..ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    4304.24368673  -180.00807518 65205.41442224]
------
Step:7, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  2.37456185e+03 -3.22965309e-01  1.80414447e+03]
New Q values:  [ 8.43634063e+00  2.37456185e+03 -3.22965309e-01  1.40059415e+03]
Reward: 9  Episode Reward:  23
xxxxx
x.a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2245.12119839   673.3919808 ]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2245.12119839   673.3919808 ]
New Q values:  [ -281.736      -1150.91067548  1609.81703576   673.3919808 ]
Reward: -1  Episode Reward:  22
xxxxx
x. ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.37456185e+03 -3.22965309e-01  1.40059415e+03]
------
Step:9, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.37456185e+03 -3.22965309e-01  1.40059415e+03]
New Q values:  [ 8.43634063e+00  7.48878435e+03 -3.22965309e-01  1.40059415e+03]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21798.5320428    218.87434471   790.72804752  1050.85266124]
------
Step:10, Action:North
State  208
Old Q Values:  [ 1657.16747842  3437.03827288 -4584.50430574 -1713.91177491]
New Q values:  [ 2908.90229778  3437.03827288 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.48878435e+03 -3.22965309e-01  1.40059415e+03]
------
Step:11, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  7.48878435e+03 -3.22965309e-01  1.40059415e+03]
New Q values:  [ 8.43634063e+00  9.53447335e+03 -3.22965309e-01  1.40059415e+03]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21798.5320428    218.87434471   790.72804752  1050.85266124]
------
Step:12, Action:North
State  208
Old Q Values:  [ 2908.90229778  3437.03827288 -4584.50430574 -1713.91177491]
New Q values:  [ 4023.30292553  3437.03827288 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  18
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  9.53447335e+03 -3.22965309e-01  1.40059415e+03]
------
Step:13, Action:South
State  136
Old Q Values:  [ -170.77177351  6194.71546398 -2383.80019164  1457.76546673]
New Q values:  [ -170.77177351  3728.90864057 -2383.80019164  1457.76546673]
Reward: -1  Episode Reward:  17
xxxxx
xg  x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2583.47205993  4172.07484994 -8896.20691497   637.30368728]
------
Step:14, Action:South
State  208
Old Q Values:  [ 4023.30292553  3437.03827288 -4584.50430574 -1713.91177491]
New Q values:  [ 4023.30292553  2422.1025298  -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3492.95740215 -6442.16912869 -8192.20126966  -600.9902    ]
------
Step:15, Action:North
State  288
Old Q Values:  [ 3492.95740215 -6442.16912869 -8192.20126966  -600.9902    ]
New Q values:  [ 2603.57383852 -6442.16912869 -8192.20126966  -600.9902    ]
Reward: -1  Episode Reward:  15
xxxxx
x. gx
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4023.30292553  2422.1025298  -4584.50430574 -1713.91177491]
------
Step:16, Action:South
State  208
Old Q Values:  [ 4023.30292553  2422.1025298  -4584.50430574 -1713.91177491]
New Q values:  [ 4023.30292553  1749.31316348 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  14
xxxxx
x. gx
x . x
x..ax
xxxxx
Step:17, Action:North
State  288
Old Q Values:  [ 2603.57383852 -6442.16912869 -8192.20126966  -600.9902    ]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966  -600.9902    ]
Reward: -10001  Episode Reward:  -9987
xxxxx
x.  x
x .gx
x.. x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21798.5320428    218.87434471   790.72804752  1050.85266124]
------
Step:1, Action:North
State  210
Old Q Values:  [21798.5320428    218.87434471   790.72804752  1050.85266124]
New Q values:  [11585.15482354   218.87434471   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  9.53447335e+03 -3.22965309e-01  1.40059415e+03]
------
Step:2, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  9.53447335e+03 -3.22965309e-01  1.40059415e+03]
New Q values:  [ 8.43634063e+00  7.28873579e+03 -3.22965309e-01  1.40059415e+03]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11585.15482354   218.87434471   790.72804752  1050.85266124]
------
Step:3, Action:North
State  208
Old Q Values:  [ 4023.30292553  1749.31316348 -4584.50430574 -1713.91177491]
New Q values:  [ 3795.3419069   1749.31316348 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.28873579e+03 -3.22965309e-01  1.40059415e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  7.28873579e+03 -3.22965309e-01  1.40059415e+03]
New Q values:  [ 8.43634063e+00  4.05349689e+03 -3.22965309e-01  1.40059415e+03]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
xg.ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3795.3419069   1749.31316348 -4584.50430574 -1713.91177491]
------
Step:5, Action:North
State  208
Old Q Values:  [ 3795.3419069   1749.31316348 -4584.50430574 -1713.91177491]
New Q values:  [ 2636.20935493  1749.31316348 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  5
xxxxx
xg.ax
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  3728.90864057 -2383.80019164  1457.76546673]
------
Step:6, Action:South
State  136
Old Q Values:  [ -170.77177351  3728.90864057 -2383.80019164  1457.76546673]
New Q values:  [ -170.77177351  2281.82626271 -2383.80019164  1457.76546673]
Reward: -1  Episode Reward:  4
xxxxx
x.g x
x..ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2636.20935493  1749.31316348 -4584.50430574 -1713.91177491]
------
Step:7, Action:North
State  208
Old Q Values:  [ 2636.20935493  1749.31316348 -4584.50430574 -1713.91177491]
New Q values:  [ 2269.93280827  1749.31316348 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  3
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.05349689e+03 -3.22965309e-01  1.40059415e+03]
------
Step:8, Action:South
State  136
Old Q Values:  [ -170.77177351  2281.82626271 -2383.80019164  1457.76546673]
New Q values:  [ -170.77177351  1593.11034756 -2383.80019164  1457.76546673]
Reward: -1  Episode Reward:  2
xxxxx
x.g x
x..ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2269.93280827  1749.31316348 -4584.50430574 -1713.91177491]
------
Step:9, Action:North
State  208
Old Q Values:  [ 2269.93280827  1749.31316348 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242  1749.31316348 -4584.50430574 -1713.91177491]
Reward: -10001  Episode Reward:  -9999
xxxxx
x..gx
x.. x
x. .x
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1609.81703576   673.3919808 ]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1609.81703576   673.3919808 ]
New Q values:  [ -281.736      -1150.91067548  1865.3758806    673.3919808 ]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.05349689e+03 -3.22965309e-01  1.40059415e+03]
------
Step:2, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  4.05349689e+03 -3.22965309e-01  1.40059415e+03]
New Q values:  [ 8.43634063e+00  5.10234520e+03 -3.22965309e-01  1.40059415e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11585.15482354   218.87434471   790.72804752  1050.85266124]
------
Step:3, Action:North
State  210
Old Q Values:  [11585.15482354   218.87434471   790.72804752  1050.85266124]
New Q values:  [6164.16549005  218.87434471  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  5.10234520e+03 -3.22965309e-01  1.40059415e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  5.10234520e+03 -3.22965309e-01  1.40059415e+03]
New Q values:  [ 8.43634063e+00  3.88958773e+03 -3.22965309e-01  1.40059415e+03]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6164.16549005  218.87434471  790.72804752 1050.85266124]
------
Step:5, Action:North
State  216
Old Q Values:  [ 2583.47205993  4172.07484994 -8896.20691497   637.30368728]
New Q values:  [ 2199.66514233  4172.07484994 -8896.20691497   637.30368728]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.88958773e+03 -3.22965309e-01  1.40059415e+03]
------
Step:6, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.88958773e+03 -3.22965309e-01  1.40059415e+03]
New Q values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  1.40059415e+03]
Reward: -10001  Episode Reward:  -9986
xxxxx
x.  x
x .gx
x...x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  1.40059415e+03]
------
Step:1, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  1.40059415e+03]
New Q values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  1.16644516e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2002.69167792   766.61551311]
------
Step:2, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2002.69167792   766.61551311]
New Q values:  [ -253.44886264 -1902.20915811  1150.41021969   766.61551311]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  1.16644516e+03]
------
Step:3, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  1.16644516e+03]
New Q values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  1.02559083e+03]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1865.3758806    673.3919808 ]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1865.3758806    673.3919808 ]
New Q values:  [ -281.736      -1150.91067548  1053.2276009    673.3919808 ]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  1.02559083e+03]
------
Step:5, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  1.02559083e+03]
New Q values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  7.25604612e+02]
Reward: -1  Episode Reward:  5
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1053.2276009    673.3919808 ]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1053.2276009    673.3919808 ]
New Q values:  [ -281.736      -1150.91067548   638.37242391   673.3919808 ]
Reward: -1  Episode Reward:  4
xxxxx
x. ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  7.25604612e+02]
------
Step:7, Action:West
State  136
Old Q Values:  [ -170.77177351  1593.11034756 -2383.80019164  1457.76546673]
New Q values:  [ -170.77177351  1593.11034756 -2383.80019164  2374.6185367 ]
Reward: -1  Episode Reward:  3
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  5973.70783336  1110.80173994]
------
Step:8, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  5973.70783336  1110.80173994]
New Q values:  [-9594.56523706 -8069.05606225  3101.26869435  1110.80173994]
Reward: -1  Episode Reward:  2
xxxxx
x.gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1593.11034756 -2383.80019164  2374.6185367 ]
------
Step:9, Action:South
State  136
Old Q Values:  [ -170.77177351  1593.11034756 -2383.80019164  2374.6185367 ]
New Q values:  [ -170.77177351  1888.26659401 -2383.80019164  2374.6185367 ]
Reward: -1  Episode Reward:  1
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2199.66514233  4172.07484994 -8896.20691497   637.30368728]
------
Step:10, Action:South
State  208
Old Q Values:  [-4614.69377242  1749.31316348 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242   524.82820539 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  10
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966  -600.9902    ]
------
Step:11, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966  -600.9902    ]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966  7321.84816423]
Reward: 9  Episode Reward:  19
xxxxx
x. gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 25189.48081409]
------
Step:12, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 25189.48081409]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 28033.6210591 ]
Reward: 9  Episode Reward:  28
xxxxx
x.  x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[59841.42911154 12764.58618105 11203.9437825   1875.31501677]
------
Step:13, Action:North
State  257
Old Q Values:  [59841.42911154 12764.58618105 11203.9437825   1875.31501677]
New Q values:  [41763.69526638 12764.58618105 11203.9437825   1875.31501677]
Reward: 9  Episode Reward:  37
xxxxx
x. gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[59405.74540588 23344.73803738 30896.493593       0.        ]
------
Step:14, Action:North
State  181
Old Q Values:  [1520.4014136   927.40283941 1705.90832957  262.76946019]
New Q values:  [1311.84636847  927.40283941 1705.90832957  262.76946019]
Reward: 9  Episode Reward:  46
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2327.61934342  520.46511977 -120.29354603]
------
Step:15, Action:South
State  99
Old Q Values:  [    0.         55856.69771173 38863.4580325      0.        ]
New Q values:  [    0.         40163.80270646 38863.4580325      0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[59405.74540588 23344.73803738 30896.493593       0.        ]
------
Step:16, Action:North
State  180
Old Q Values:  [ -122.403451    8050.99445658 16316.70279614 -4966.32149798]
New Q values:  [ 1.56350891e+01  8.05099446e+03  1.63167028e+04 -4.96632150e+03]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -4378.67418527   217.32156498  -180.6       ]
------
Step:17, Action:East
State  99
Old Q Values:  [    0.         40163.80270646 38863.4580325      0.        ]
New Q values:  [    0.         40163.80270646 51580.7370385      0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   26400.13275764 120119.84608498]
------
Step:18, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   638.37242391   673.3919808 ]
New Q values:  [ -281.736      -1150.91067548   638.37242391   333.95326182]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -4378.67418527   217.32156498  -180.6       ]
------
Step:19, Action:East
State  108
Old Q Values:  [-8463.16477134  2610.77248427  1377.26533419     0.        ]
New Q values:  [-8463.16477134  2610.77248427  1480.68674198     0.        ]
Reward: -1  Episode Reward:  41
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  3101.26869435  1110.80173994]
------
Step:20, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3101.26869435  1110.80173994]
New Q values:  [-9594.56523706 -8069.05606225  1952.29303875  1110.80173994]
Reward: -1  Episode Reward:  40
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1888.26659401 -2383.80019164  2374.6185367 ]
------
Step:21, Action:South
State  136
Old Q Values:  [ -170.77177351  1888.26659401 -2383.80019164  2374.6185367 ]
New Q values:  [ -170.77177351   912.15509922 -2383.80019164  2374.6185367 ]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4614.69377242   524.82820539 -4584.50430574 -1713.91177491]
------
Step:22, Action:South
State  208
Old Q Values:  [-4614.69377242   524.82820539 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242  2405.88573142 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966  7321.84816423]
------
Step:23, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966  7321.84816423]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966  6471.07118506]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 8464.34321948 -8521.23367799  2205.73600612 11809.77306455]
------
Step:24, Action:North
State  272
Old Q Values:  [ 8464.34321948 -8521.23367799  2205.73600612 11809.77306455]
New Q values:  [66512.46215589 -8521.23367799  2205.73600612 11809.77306455]
Reward: 100009  Episode Reward:  100046
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  1.04044162e+04  1.03161518e+03]
------
Step:1, Action:East
State  195
Old Q Values:  [   38.85388605  8794.98853981 12179.78378091  1169.39963074]
New Q values:  [  38.85388605 8794.98853981 6726.56315938 1169.39963074]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6164.16549005  218.87434471  790.72804752 1050.85266124]
------
Step:2, Action:North
State  210
Old Q Values:  [6164.16549005  218.87434471  790.72804752 1050.85266124]
New Q values:  [2688.74757957  218.87434471  790.72804752 1050.85266124]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  7.25604612e+02]
------
Step:3, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  7.25604612e+02]
New Q values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  6.40764911e+02]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1150.41021969   766.61551311]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1150.41021969   766.61551311]
New Q values:  [ -253.44886264 -1902.20915811   651.79356106   766.61551311]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  6.40764911e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  6.40764911e+02]
New Q values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  4.47217691e+02]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   638.37242391   333.95326182]
------
Step:6, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   651.79356106   766.61551311]
New Q values:  [ -253.44886264 -1902.20915811   394.28273185   766.61551311]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  4.47217691e+02]
------
Step:7, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  4.47217691e+02]
New Q values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  3.69798804e+02]
Reward: -1  Episode Reward:  23
xxxxx
x.a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   638.37242391   333.95326182]
------
Step:8, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   26400.13275764 120119.84608498]
New Q values:  [  -180.6          3557.6642036   10670.39274418 120119.84608498]
Reward: -1  Episode Reward:  22
xxxxx
x. ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  3.69798804e+02]
------
Step:9, Action:West
State  136
Old Q Values:  [ -170.77177351   912.15509922 -2383.80019164  2374.6185367 ]
New Q values:  [ -170.77177351   912.15509922 -2383.80019164  1534.9353263 ]
Reward: -1  Episode Reward:  21
xxxxx
xga x
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1952.29303875  1110.80173994]
------
Step:10, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   10670.39274418 120119.84608498]
New Q values:  [  -180.6          3557.6642036    4378.49673879 120119.84608498]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  3.69798804e+02]
------
Step:11, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  3.69798804e+02]
New Q values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  3.61832733e+04]
Reward: -1  Episode Reward:  19
xxxxx
x.a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036    4378.49673879 120119.84608498]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   638.37242391   333.95326182]
New Q values:  [ -281.736      -1150.91067548   638.37242391   210.47853695]
Reward: 9  Episode Reward:  28
xxxxx
xa  x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   238.32410743  -180.6       ]
------
Step:13, Action:East
State  107
Old Q Values:  [-252.35169558  424.30030728 2597.05968238 -252.78192178]
New Q values:  [-252.35169558  424.30030728 1229.73560012 -252.78192178]
Reward: -1  Episode Reward:  27
xxxxx
x a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   638.37242391   210.47853695]
------
Step:14, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1952.29303875  1110.80173994]
New Q values:  [-9594.56523706 -8069.05606225  1240.79781339  1110.80173994]
Reward: -1  Episode Reward:  26
xxxxx
x gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   912.15509922 -2383.80019164  1534.9353263 ]
------
Step:15, Action:South
State  138
Old Q Values:  [ 8.43634063e+00 -3.19314245e+03 -3.22965309e-01  3.61832733e+04]
New Q values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  3.61832733e+04]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2199.66514233  4172.07484994 -8896.20691497   637.30368728]
------
Step:16, Action:South
State  208
Old Q Values:  [-4614.69377242  2405.88573142 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242  2909.07564809 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966  6471.07118506]
------
Step:17, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966  6471.07118506]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966 22541.56712079]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[66512.46215589 -8521.23367799  2205.73600612 11809.77306455]
------
Step:18, Action:West
State  272
Old Q Values:  [66512.46215589 -8521.23367799  2205.73600612 11809.77306455]
New Q values:  [66512.46215589 -8521.23367799  2205.73600612 17258.41780573]
Reward: 9  Episode Reward:  42
xxxxx
x g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41763.69526638 12764.58618105 11203.9437825   1875.31501677]
------
Step:19, Action:North
State  256
Old Q Values:  [33845.23946664 14051.33618952  7407.2956525    644.94785455]
New Q values:  [104674.5704315   14051.33618952   7407.2956525     644.94785455]
Reward: 100009  Episode Reward:  100051
xxxxx
xg  x
xa  x
x   x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1240.79781339  1110.80173994]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1240.79781339  1110.80173994]
New Q values:  [-9594.56523706 -8069.05606225   962.19972325  1110.80173994]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351   912.15509922 -2383.80019164  1534.9353263 ]
------
Step:2, Action:South
State  136
Old Q Values:  [ -170.77177351   912.15509922 -2383.80019164  1534.9353263 ]
New Q values:  [ -170.77177351  1621.88449467 -2383.80019164  1534.9353263 ]
Reward: 9  Episode Reward:  18
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2199.66514233  4172.07484994 -8896.20691497   637.30368728]
------
Step:3, Action:South
State  208
Old Q Values:  [-4614.69377242  2909.07564809 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242  7931.50039547 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966 22541.56712079]
------
Step:4, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966 22541.56712079]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966 28975.76549508]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[66512.46215589 -8521.23367799  2205.73600612 17258.41780573]
------
Step:5, Action:North
State  272
Old Q Values:  [66512.46215589 -8521.23367799  2205.73600612 17258.41780573]
New Q values:  [29731.70973046 -8521.23367799  2205.73600612 17258.41780573]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x.a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  1.04044162e+04  1.03161518e+03]
------
Step:6, Action:East
State  200
Old Q Values:  [   62.8218634  10024.2293375   2338.0737649    568.38654082]
New Q values:  [   62.8218634  10024.2293375   2186.25196094   568.38654082]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2199.66514233  4172.07484994 -8896.20691497   637.30368728]
------
Step:7, Action:South
State  208
Old Q Values:  [-4614.69377242  7931.50039547 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242 11864.72980671 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966 28975.76549508]
------
Step:8, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966 28975.76549508]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966 20509.21911717]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[29731.70973046 -8521.23367799  2205.73600612 17258.41780573]
------
Step:9, Action:North
State  272
Old Q Values:  [29731.70973046 -8521.23367799  2205.73600612 17258.41780573]
New Q values:  [11747.68315326 -8521.23367799  2205.73600612 17258.41780573]
Reward: -10001  Episode Reward:  -9959
xxxxx
x   x
x.g x
x.  x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9939.07165629 12656.98806325  1460.9765133 ]
------
Step:1, Action:South
State  195
Old Q Values:  [  38.85388605 8794.98853981 6726.56315938 1169.39963074]
New Q values:  [   38.85388605 11933.48173365  6726.56315938  1169.39963074]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 28033.6210591 ]
------
Step:2, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 28033.6210591 ]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 23747.95700355]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.  x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41763.69526638 12764.58618105 11203.9437825   1875.31501677]
------
Step:3, Action:North
State  261
Old Q Values:  [ 3096.65200653    26.73544252 -1446.30860596   123.6214372 ]
New Q values:  [ 3154.69878115    26.73544252 -1446.30860596   123.6214372 ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 861.48670303 1686.11868129 6368.79326178 1554.80203889]
------
Step:4, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243  6601.94514623     0.        ]
New Q values:  [    0.         -5536.05678243  5761.50292659     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  1.04044162e+04  1.03161518e+03]
------
Step:5, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  1.04044162e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.02593231e+03  4.96779076e+03  1.03161518e+03]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2688.74757957  218.87434471  790.72804752 1050.85266124]
------
Step:6, Action:North
State  210
Old Q Values:  [2688.74757957  218.87434471  790.72804752 1050.85266124]
New Q values:  [20642.5233585    218.87434471   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  34
xxxxx
x..ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    4304.24368673  -180.00807518 65205.41442224]
------
Step:7, Action:West
State  130
Old Q Values:  [26266.584521    4304.24368673  -180.00807518 65205.41442224]
New Q values:  [26266.584521    4304.24368673  -180.00807518 62123.51959439]
Reward: 9  Episode Reward:  43
xxxxx
x.a x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036    4378.49673879 120119.84608498]
------
Step:8, Action:West
State  126
Old Q Values:  [   0.          331.64678262 6806.92715127  825.64977511]
New Q values:  [   0.          331.64678262 6806.92715127  400.85637954]
Reward: 9  Episode Reward:  52
xxxxx
xa  x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -4378.67418527   217.32156498  -180.6       ]
------
Step:9, Action:East
State  111
Old Q Values:  [-177.44732869 2327.61934342  520.46511977 -120.29354603]
New Q values:  [-177.44732869 2327.61934342 2249.66419329 -120.29354603]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 6806.92715127  400.85637954]
------
Step:10, Action:East
State  115
Old Q Values:  [-1.80600000e+02  1.11748598e+03  6.47656656e+00  1.46749470e+05]
New Q values:  [  -180.6          1117.48597573  18639.04650494 146749.46962978]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    4304.24368673  -180.00807518 62123.51959439]
------
Step:11, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  3.61832733e+04]
New Q values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  1.46833909e+04]
Reward: -1  Episode Reward:  49
xxxxx
x a x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SE
[  0.           1.67014986 384.73306724 702.27198793]
------
Step:12, Action:West
State  126
Old Q Values:  [   0.          331.64678262 6806.92715127  400.85637954]
New Q values:  [   0.          331.64678262 6806.92715127  858.02835484]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2327.61934342 2249.66419329 -120.29354603]
------
Step:13, Action:South
State  110
Old Q Values:  [ -239.29051573 -4378.67418527   217.32156498  -180.6       ]
New Q values:  [ -239.29051573 -6813.79421945   217.32156498  -180.6       ]
Reward: -10001  Episode Reward:  -9953
xxxxx
x   x
xg  x
x  .x
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966 20509.21911717]
------
Step:1, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966 20509.21911717]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966 13386.61298859]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[11747.68315326 -8521.23367799  2205.73600612 17258.41780573]
------
Step:2, Action:North
State  272
Old Q Values:  [11747.68315326 -8521.23367799  2205.73600612 17258.41780573]
New Q values:  [ 6194.8104907  -8521.23367799  2205.73600612 17258.41780573]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.02593231e+03  4.96779076e+03  1.03161518e+03]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.20995314e+04 1.95186642e+04 1.48950596e+04]
New Q values:  [3.89777037e-01 1.20995314e+04 1.13722846e+04 1.48950596e+04]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4614.69377242 11864.72980671 -4584.50430574 -1713.91177491]
------
Step:4, Action:South
State  208
Old Q Values:  [-4614.69377242 11864.72980671 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242  8761.27581926 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x.  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966 13386.61298859]
------
Step:5, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966 13386.61298859]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966 10531.57053716]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6194.8104907  -8521.23367799  2205.73600612 17258.41780573]
------
Step:6, Action:West
State  272
Old Q Values:  [ 6194.8104907  -8521.23367799  2205.73600612 17258.41780573]
New Q values:  [ 6194.8104907  -8521.23367799  2205.73600612 11005.51415813]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1612.46541007 -5704.51612281 13675.82345278 -5679.36893145]
------
Step:7, Action:East
State  257
Old Q Values:  [41763.69526638 12764.58618105 11203.9437825   1875.31501677]
New Q values:  [41763.69526638 12764.58618105  7782.63176044  1875.31501677]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6194.8104907  -8521.23367799  2205.73600612 11005.51415813]
------
Step:8, Action:West
State  272
Old Q Values:  [ 6194.8104907  -8521.23367799  2205.73600612 11005.51415813]
New Q values:  [ 6194.8104907  -8521.23367799  2205.73600612 16930.71424316]
Reward: -1  Episode Reward:  22
xxxxx
x.g.x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41763.69526638 12764.58618105  7782.63176044  1875.31501677]
------
Step:9, Action:North
State  261
Old Q Values:  [ 3154.69878115    26.73544252 -1446.30860596   123.6214372 ]
New Q values:  [ 1779.05201133    26.73544252 -1446.30860596   123.6214372 ]
Reward: 9  Episode Reward:  31
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1311.84636847  927.40283941 1705.90832957  262.76946019]
------
Step:10, Action:North
State  181
Old Q Values:  [1311.84636847  927.40283941 1705.90832957  262.76946019]
New Q values:  [1214.47925072  927.40283941 1705.90832957  262.76946019]
Reward: 9  Episode Reward:  40
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2281.13567777  238.35800069    0.        ]
------
Step:11, Action:South
State  101
Old Q Values:  [   0.         3731.33077154    0.            0.        ]
New Q values:  [   0.         2003.70480749    0.            0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1214.47925072  927.40283941 1705.90832957  262.76946019]
------
Step:12, Action:East
State  181
Old Q Values:  [1214.47925072  927.40283941 1705.90832957  262.76946019]
New Q values:  [1214.47925072  927.40283941 4478.8597508   262.76946019]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9939.07165629 12656.98806325  1460.9765133 ]
------
Step:13, Action:South
State  195
Old Q Values:  [   38.85388605 11933.48173365  6726.56315938  1169.39963074]
New Q values:  [   38.85388605 11897.17979453  6726.56315938  1169.39963074]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 23747.95700355]
------
Step:14, Action:West
State  272
Old Q Values:  [ 6194.8104907  -8521.23367799  2205.73600612 16930.71424316]
New Q values:  [ 6194.8104907  -8521.23367799  2205.73600612  7305.40130066]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1779.05201133    26.73544252 -1446.30860596   123.6214372 ]
------
Step:15, Action:North
State  261
Old Q Values:  [ 1779.05201133    26.73544252 -1446.30860596   123.6214372 ]
New Q values:  [ 2621.65878307    26.73544252 -1446.30860596   123.6214372 ]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 861.48670303 1686.11868129 6368.79326178 1554.80203889]
------
Step:16, Action:East
State  181
Old Q Values:  [1214.47925072  927.40283941 4478.8597508   262.76946019]
New Q values:  [1214.47925072  927.40283941 5588.04031929  262.76946019]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9939.07165629 12656.98806325  1460.9765133 ]
------
Step:17, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.20995314e+04 1.13722846e+04 1.48950596e+04]
New Q values:  [3.89777037e-01 7.03083296e+03 1.13722846e+04 1.48950596e+04]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6194.8104907  -8521.23367799  2205.73600612  7305.40130066]
------
Step:18, Action:West
State  276
Old Q Values:  [ 1716.23598158 -5807.06396197   855.18004824  1620.79632404]
New Q values:  [ 1716.23598158 -5807.06396197   855.18004824  1434.21616454]
Reward: -1  Episode Reward:  32
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2621.65878307    26.73544252 -1446.30860596   123.6214372 ]
------
Step:19, Action:North
State  261
Old Q Values:  [ 2621.65878307    26.73544252 -1446.30860596   123.6214372 ]
New Q values:  [ 2724.47560902    26.73544252 -1446.30860596   123.6214372 ]
Reward: -1  Episode Reward:  31
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1214.47925072  927.40283941 5588.04031929  262.76946019]
------
Step:20, Action:East
State  181
Old Q Values:  [1214.47925072  927.40283941 5588.04031929  262.76946019]
New Q values:  [1214.47925072  927.40283941 6031.71254669  262.76946019]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  9939.07165629 12656.98806325  1460.9765133 ]
------
Step:21, Action:South
State  193
Old Q Values:  [-5922.26708831  9939.07165629 12656.98806325  1460.9765133 ]
New Q values:  [-5922.26708831 11099.41576358 12656.98806325  1460.9765133 ]
Reward: -1  Episode Reward:  29
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 23747.95700355]
------
Step:22, Action:West
State  276
Old Q Values:  [ 1716.23598158 -5807.06396197   855.18004824  1434.21616454]
New Q values:  [ 1716.23598158 -5807.06396197   855.18004824  1390.42914852]
Reward: -1  Episode Reward:  28
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2724.47560902    26.73544252 -1446.30860596   123.6214372 ]
------
Step:23, Action:North
State  261
Old Q Values:  [ 2724.47560902    26.73544252 -1446.30860596   123.6214372 ]
New Q values:  [ 2898.70400761    26.73544252 -1446.30860596   123.6214372 ]
Reward: -1  Episode Reward:  27
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1214.47925072  927.40283941 6031.71254669  262.76946019]
------
Step:24, Action:East
State  181
Old Q Values:  [1214.47925072  927.40283941 6031.71254669  262.76946019]
New Q values:  [1214.47925072  927.40283941 8856.11102244  262.76946019]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6635.39049193 21480.08667922  3512.17261526]
------
Step:25, Action:East
State  192
Old Q Values:  [3.89777037e-01 7.03083296e+03 1.13722846e+04 1.48950596e+04]
New Q values:  [3.89777037e-01 7.03083296e+03 7.17669660e+03 1.48950596e+04]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4614.69377242  8761.27581926 -4584.50430574 -1713.91177491]
------
Step:26, Action:South
State  208
Old Q Values:  [-4614.69377242  8761.27581926 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242  6663.38148885 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966 10531.57053716]
------
Step:27, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966 10531.57053716]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966  6403.64860506]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6194.8104907  -8521.23367799  2205.73600612  7305.40130066]
------
Step:28, Action:West
State  272
Old Q Values:  [ 6194.8104907  -8521.23367799  2205.73600612  7305.40130066]
New Q values:  [ 6194.8104907  -8521.23367799  2205.73600612  3791.17172255]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2898.70400761    26.73544252 -1446.30860596   123.6214372 ]
------
Step:29, Action:North
State  260
Old Q Values:  [ 1612.46541007 -5704.51612281 13675.82345278 -5679.36893145]
New Q values:  [ 2372.83704201 -5704.51612281 13675.82345278 -5679.36893145]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  5761.50292659     0.        ]
------
Step:30, Action:East
State  180
Old Q Values:  [ 1.56350891e+01  8.05099446e+03  1.63167028e+04 -4.96632150e+03]
New Q values:  [   15.6350891   8050.99445658 10994.59899149 -4966.32149798]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 7.03083296e+03 7.17669660e+03 1.48950596e+04]
------
Step:31, Action:East
State  192
Old Q Values:  [3.89777037e-01 7.03083296e+03 7.17669660e+03 1.48950596e+04]
New Q values:  [3.89777037e-01 7.03083296e+03 4.86909308e+03 1.48950596e+04]
Reward: -1  Episode Reward:  19
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4614.69377242  6663.38148885 -4584.50430574 -1713.91177491]
------
Step:32, Action:South
State  208
Old Q Values:  [-4614.69377242  6663.38148885 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242  4585.84717706 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  18
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966  6403.64860506]
------
Step:33, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966  6403.64860506]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966  4419.30258924]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6194.8104907  -8521.23367799  2205.73600612  3791.17172255]
------
Step:34, Action:West
State  272
Old Q Values:  [ 6194.8104907  -8521.23367799  2205.73600612  3791.17172255]
New Q values:  [ 6194.8104907  -8521.23367799  2205.73600612  5618.61572485]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2372.83704201 -5704.51612281 13675.82345278 -5679.36893145]
------
Step:35, Action:East
State  260
Old Q Values:  [ 2372.83704201 -5704.51612281 13675.82345278 -5679.36893145]
New Q values:  [ 2372.83704201 -5704.51612281  7328.17252832 -5679.36893145]
Reward: -1  Episode Reward:  15
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6194.8104907  -8521.23367799  2205.73600612  5618.61572485]
------
Step:36, Action:North
State  272
Old Q Values:  [ 6194.8104907  -8521.23367799  2205.73600612  5618.61572485]
New Q values:  [ 6945.84206932 -8521.23367799  2205.73600612  5618.61572485]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 7.03083296e+03 4.86909308e+03 1.48950596e+04]
------
Step:37, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -7.02593231e+03  4.96779076e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.27220304e+02  4.96779076e+03  1.03161518e+03]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6945.84206932 -8521.23367799  2205.73600612  5618.61572485]
------
Step:38, Action:North
State  272
Old Q Values:  [ 6945.84206932 -8521.23367799  2205.73600612  5618.61572485]
New Q values:  [ 4268.07405713 -8521.23367799  2205.73600612  5618.61572485]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01 -7.27220304e+02  4.96779076e+03  1.03161518e+03]
------
Step:39, Action:East
State  194
Old Q Values:  [-6.00000000e-01 -7.27220304e+02  4.96779076e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01 -7.27220304e+02  8.17927331e+03  1.03161518e+03]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[20642.5233585    218.87434471   790.72804752  1050.85266124]
------
Step:40, Action:North
State  210
Old Q Values:  [20642.5233585    218.87434471   790.72804752  1050.85266124]
New Q values:  [26899.46522172   218.87434471   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  20
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    4304.24368673  -180.00807518 62123.51959439]
------
Step:41, Action:West
State  130
Old Q Values:  [26266.584521    4304.24368673  -180.00807518 62123.51959439]
New Q values:  [ 26266.584521     4304.24368673   -180.00807518 128879.64872669]
Reward: 100009  Episode Reward:  100029
xxxxx
x a x
x   x
x  gx
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2898.70400761    26.73544252 -1446.30860596   123.6214372 ]
------
Step:1, Action:North
State  260
Old Q Values:  [ 2372.83704201 -5704.51612281  7328.17252832 -5679.36893145]
New Q values:  [ 2682.98569478 -5704.51612281  7328.17252832 -5679.36893145]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  5761.50292659     0.        ]
------
Step:2, Action:East
State  180
Old Q Values:  [   15.6350891   8050.99445658 10994.59899149 -4966.32149798]
New Q values:  [   15.6350891   8050.99445658  8871.75746964 -4966.32149798]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 7.03083296e+03 4.86909308e+03 1.48950596e+04]
------
Step:3, Action:South
State  194
Old Q Values:  [-6.00000000e-01 -7.27220304e+02  8.17927331e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  2.23382673e+02  8.17927331e+03  1.03161518e+03]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 1716.23598158 -5807.06396197   855.18004824  1390.42914852]
------
Step:4, Action:North
State  276
Old Q Values:  [ 1716.23598158 -5807.06396197   855.18004824  1390.42914852]
New Q values:  [ 5154.41226567 -5807.06396197   855.18004824  1390.42914852]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 7.03083296e+03 4.86909308e+03 1.48950596e+04]
------
Step:5, Action:South
State  194
Old Q Values:  [-6.00000000e-01  2.23382673e+02  8.17927331e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  8.17927331e+03  1.03161518e+03]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 5154.41226567 -5807.06396197   855.18004824  1390.42914852]
------
Step:6, Action:North
State  276
Old Q Values:  [ 5154.41226567 -5807.06396197   855.18004824  1390.42914852]
New Q values:  [ 4514.94690029 -5807.06396197   855.18004824  1390.42914852]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  8.17927331e+03  1.03161518e+03]
------
Step:7, Action:East
State  192
Old Q Values:  [3.89777037e-01 7.03083296e+03 4.86909308e+03 1.48950596e+04]
New Q values:  [3.89777037e-01 7.03083296e+03 3.32879139e+03 1.48950596e+04]
Reward: 9  Episode Reward:  23
xxxxx
x...x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4614.69377242  4585.84717706 -4584.50430574 -1713.91177491]
------
Step:8, Action:South
State  208
Old Q Values:  [-4614.69377242  4585.84717706 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242  3165.52964759 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  32
xxxxx
x.g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966  4419.30258924]
------
Step:9, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966  4419.30258924]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966  3452.70575315]
Reward: -1  Episode Reward:  31
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4268.07405713 -8521.23367799  2205.73600612  5618.61572485]
------
Step:10, Action:West
State  272
Old Q Values:  [ 4268.07405713 -8521.23367799  2205.73600612  5618.61572485]
New Q values:  [ 4268.07405713 -8521.23367799  2205.73600612 14775.95486986]
Reward: -1  Episode Reward:  30
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41763.69526638 12764.58618105  7782.63176044  1875.31501677]
------
Step:11, Action:North
State  261
Old Q Values:  [ 2898.70400761    26.73544252 -1446.30860596   123.6214372 ]
New Q values:  [ 3815.71490978    26.73544252 -1446.30860596   123.6214372 ]
Reward: -1  Episode Reward:  29
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1214.47925072  927.40283941 8856.11102244  262.76946019]
------
Step:12, Action:North
State  181
Old Q Values:  [1214.47925072  927.40283941 8856.11102244  262.76946019]
New Q values:  [1092.30314253  927.40283941 8856.11102244  262.76946019]
Reward: 9  Episode Reward:  38
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         2003.70480749    0.            0.        ]
------
Step:13, Action:South
State  101
Old Q Values:  [   0.         2003.70480749    0.            0.        ]
New Q values:  [   0.         3457.71522973    0.            0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1092.30314253  927.40283941 8856.11102244  262.76946019]
------
Step:14, Action:East
State  181
Old Q Values:  [1092.30314253  927.40283941 8856.11102244  262.76946019]
New Q values:  [1092.30314253  927.40283941 9985.87041274  262.76946019]
Reward: -1  Episode Reward:  36
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6635.39049193 21480.08667922  3512.17261526]
------
Step:15, Action:East
State  192
Old Q Values:  [3.89777037e-01 7.03083296e+03 3.32879139e+03 1.48950596e+04]
New Q values:  [3.89777037e-01 7.03083296e+03 2.28057545e+03 1.48950596e+04]
Reward: -1  Episode Reward:  35
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4614.69377242  3165.52964759 -4584.50430574 -1713.91177491]
------
Step:16, Action:South
State  208
Old Q Values:  [-4614.69377242  3165.52964759 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242  2301.42358498 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966  3452.70575315]
------
Step:17, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966  3452.70575315]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966  5813.26876222]
Reward: -1  Episode Reward:  33
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4268.07405713 -8521.23367799  2205.73600612 14775.95486986]
------
Step:18, Action:West
State  272
Old Q Values:  [ 4268.07405713 -8521.23367799  2205.73600612 14775.95486986]
New Q values:  [ 4268.07405713 -8521.23367799  2205.73600612  8108.23370644]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2682.98569478 -5704.51612281  7328.17252832 -5679.36893145]
------
Step:19, Action:East
State  261
Old Q Values:  [ 3815.71490978    26.73544252 -1446.30860596   123.6214372 ]
New Q values:  [3815.71490978   26.73544252 1853.34666955  123.6214372 ]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4268.07405713 -8521.23367799  2205.73600612  8108.23370644]
------
Step:20, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 23747.95700355]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 10643.29727435]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3815.71490978   26.73544252 1853.34666955  123.6214372 ]
------
Step:21, Action:North
State  261
Old Q Values:  [3815.71490978   26.73544252 1853.34666955  123.6214372 ]
New Q values:  [4521.44708773   26.73544252 1853.34666955  123.6214372 ]
Reward: -1  Episode Reward:  29
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1092.30314253  927.40283941 9985.87041274  262.76946019]
------
Step:22, Action:East
State  181
Old Q Values:  [1092.30314253  927.40283941 9985.87041274  262.76946019]
New Q values:  [ 1092.30314253   927.40283941 10437.77416887   262.76946019]
Reward: -1  Episode Reward:  28
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6635.39049193 21480.08667922  3512.17261526]
------
Step:23, Action:East
State  192
Old Q Values:  [3.89777037e-01 7.03083296e+03 2.28057545e+03 1.48950596e+04]
New Q values:  [3.89777037e-01 7.03083296e+03 1.60205726e+03 1.48950596e+04]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4614.69377242  2301.42358498 -4584.50430574 -1713.91177491]
------
Step:24, Action:South
State  208
Old Q Values:  [-4614.69377242  2301.42358498 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242  2663.95006266 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966  5813.26876222]
------
Step:25, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966  5813.26876222]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966  4757.17761682]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4268.07405713 -8521.23367799  2205.73600612  8108.23370644]
------
Step:26, Action:West
State  276
Old Q Values:  [ 4514.94690029 -5807.06396197   855.18004824  1390.42914852]
New Q values:  [ 4514.94690029 -5807.06396197   855.18004824  1912.00578573]
Reward: -1  Episode Reward:  24
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4521.44708773   26.73544252 1853.34666955  123.6214372 ]
------
Step:27, Action:North
State  261
Old Q Values:  [4521.44708773   26.73544252 1853.34666955  123.6214372 ]
New Q values:  [4939.31108575   26.73544252 1853.34666955  123.6214372 ]
Reward: -1  Episode Reward:  23
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1092.30314253   927.40283941 10437.77416887   262.76946019]
------
Step:28, Action:East
State  181
Old Q Values:  [ 1092.30314253   927.40283941 10437.77416887   262.76946019]
New Q values:  [1092.30314253  927.40283941 7971.60608652  262.76946019]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 11099.41576358 12656.98806325  1460.9765133 ]
------
Step:29, Action:South
State  193
Old Q Values:  [-5922.26708831 11099.41576358 12656.98806325  1460.9765133 ]
New Q values:  [-5922.26708831  7632.15548774 12656.98806325  1460.9765133 ]
Reward: -1  Episode Reward:  21
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 10643.29727435]
------
Step:30, Action:West
State  276
Old Q Values:  [ 4514.94690029 -5807.06396197   855.18004824  1912.00578573]
New Q values:  [ 4514.94690029 -5807.06396197   855.18004824  2245.99564002]
Reward: -1  Episode Reward:  20
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4939.31108575   26.73544252 1853.34666955  123.6214372 ]
------
Step:31, Action:North
State  261
Old Q Values:  [4939.31108575   26.73544252 1853.34666955  123.6214372 ]
New Q values:  [4366.60626026   26.73544252 1853.34666955  123.6214372 ]
Reward: -1  Episode Reward:  19
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1092.30314253  927.40283941 7971.60608652  262.76946019]
------
Step:32, Action:North
State  180
Old Q Values:  [   15.6350891   8050.99445658  8871.75746964 -4966.32149798]
New Q values:  [  391.06337573  8050.99445658  8871.75746964 -4966.32149798]
Reward: -1  Episode Reward:  18
xxxxx
xa..x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        1284.69780031  522.17020433 -180.6       ]
------
Step:33, Action:East
State  100
Old Q Values:  [   0.         8676.39736464  318.34177431    0.        ]
New Q values:  [   0.         8676.39736464  443.65288404    0.        ]
Reward: 9  Episode Reward:  27
xxxxx
xga.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:NW
[   0.         1036.38724771    0.            0.        ]
------
Step:34, Action:South
State  118
Old Q Values:  [ 617.06804554 5827.23341958    0.          503.49427758]
New Q values:  [ 617.06804554 8774.3193716     0.          503.49427758]
Reward: -1  Episode Reward:  26
xxxxx
x  .x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6635.39049193 21480.08667922  3512.17261526]
------
Step:35, Action:East
State  196
Old Q Values:  [-2469.90645144  6635.39049193 21480.08667922  3512.17261526]
New Q values:  [-2469.90645144  6635.39049193  9390.61969049  3512.17261526]
Reward: -1  Episode Reward:  25
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4614.69377242  2663.95006266 -4584.50430574 -1713.91177491]
------
Step:36, Action:South
State  208
Old Q Values:  [-4614.69377242  2663.95006266 -4584.50430574 -1713.91177491]
New Q values:  [-4614.69377242  2492.13331011 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  24
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966  4757.17761682]
------
Step:37, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966  4757.17761682]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966  3256.75511682]
Reward: -1  Episode Reward:  23
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4514.94690029 -5807.06396197   855.18004824  2245.99564002]
------
Step:38, Action:West
State  276
Old Q Values:  [ 4514.94690029 -5807.06396197   855.18004824  2245.99564002]
New Q values:  [ 4514.94690029 -5807.06396197   855.18004824  3096.2500145 ]
Reward: -1  Episode Reward:  22
xxxxx
x  .x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2682.98569478 -5704.51612281  7328.17252832 -5679.36893145]
------
Step:39, Action:East
State  261
Old Q Values:  [4366.60626026   26.73544252 1853.34666955  123.6214372 ]
New Q values:  [4366.60626026   26.73544252 2095.22273791  123.6214372 ]
Reward: -1  Episode Reward:  21
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4514.94690029 -5807.06396197   855.18004824  3096.2500145 ]
------
Step:40, Action:West
State  276
Old Q Values:  [ 4514.94690029 -5807.06396197   855.18004824  3096.2500145 ]
New Q values:  [ 4514.94690029 -5807.06396197   855.18004824  2547.88188388]
Reward: -1  Episode Reward:  20
xxxxx
x  .x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4366.60626026   26.73544252 2095.22273791  123.6214372 ]
------
Step:41, Action:North
State  261
Old Q Values:  [4366.60626026   26.73544252 2095.22273791  123.6214372 ]
New Q values:  [4137.52433006   26.73544252 2095.22273791  123.6214372 ]
Reward: -1  Episode Reward:  19
xxxxx
x  .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1092.30314253  927.40283941 7971.60608652  262.76946019]
------
Step:42, Action:North
State  181
Old Q Values:  [1092.30314253  927.40283941 7971.60608652  262.76946019]
New Q values:  [1473.63582593  927.40283941 7971.60608652  262.76946019]
Reward: -1  Episode Reward:  18
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         3457.71522973    0.            0.        ]
------
Step:43, Action:South
State  97
Old Q Values:  [    0.         36687.68320385     0.             0.        ]
New Q values:  [    0.         32496.19690331     0.             0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x  gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[59405.74540588 23344.73803738 30896.493593       0.        ]
------
Step:44, Action:North
State  177
Old Q Values:  [59405.74540588 23344.73803738 30896.493593       0.        ]
New Q values:  [33510.55723334 23344.73803738 30896.493593       0.        ]
Reward: -1  Episode Reward:  16
xxxxx
xa gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:NE
[    0.         32496.19690331     0.             0.        ]
------
Step:45, Action:South
State  101
Old Q Values:  [   0.         3457.71522973    0.            0.        ]
New Q values:  [   0.         3773.96791785    0.            0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1473.63582593  927.40283941 7971.60608652  262.76946019]
------
Step:46, Action:East
State  177
Old Q Values:  [33510.55723334 23344.73803738 30896.493593       0.        ]
New Q values:  [33510.55723334 23344.73803738 16155.09385617     0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x  gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  7632.15548774 12656.98806325  1460.9765133 ]
------
Step:47, Action:East
State  197
Old Q Values:  [-5833.78831344  1495.56024905  -713.36493463   403.06255908]
New Q values:  [-5833.78831344  1495.56024905 -5538.30598082   403.06255908]
Reward: -10001  Episode Reward:  -9987
xxxxx
x  .x
x  gx
x   x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 861.48670303 1686.11868129 6368.79326178 1554.80203889]
------
Step:1, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243  5761.50292659     0.        ]
New Q values:  [    0.         -5536.05678243  4763.78316466     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  8.17927331e+03  1.03161518e+03]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 7.03083296e+03 1.60205726e+03 1.48950596e+04]
New Q values:  [3.89777037e-01 7.03083296e+03 1.39386290e+03 1.48950596e+04]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4614.69377242  2492.13331011 -4584.50430574 -1713.91177491]
------
Step:3, Action:South
State  210
Old Q Values:  [26899.46522172   218.87434471   790.72804752  1050.85266124]
New Q values:  [26899.46522172  1069.97627293   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966  3256.75511682]
------
Step:4, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966  3256.75511682]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966  3734.57215866]
Reward: -1  Episode Reward:  26
xxxxx
x...x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4268.07405713 -8521.23367799  2205.73600612  8108.23370644]
------
Step:5, Action:West
State  272
Old Q Values:  [ 4268.07405713 -8521.23367799  2205.73600612  8108.23370644]
New Q values:  [ 4268.07405713 -8521.23367799  2205.73600612  5447.14524107]
Reward: 9  Episode Reward:  35
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2682.98569478 -5704.51612281  7328.17252832 -5679.36893145]
------
Step:6, Action:East
State  260
Old Q Values:  [ 2682.98569478 -5704.51612281  7328.17252832 -5679.36893145]
New Q values:  [ 2682.98569478 -5704.51612281  4564.81258365 -5679.36893145]
Reward: -1  Episode Reward:  34
xxxxx
x...x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4268.07405713 -8521.23367799  2205.73600612  5447.14524107]
------
Step:7, Action:West
State  272
Old Q Values:  [ 4268.07405713 -8521.23367799  2205.73600612  5447.14524107]
New Q values:  [ 4268.07405713 -8521.23367799  2205.73600612  3547.70187152]
Reward: -1  Episode Reward:  33
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2682.98569478 -5704.51612281  4564.81258365 -5679.36893145]
------
Step:8, Action:East
State  260
Old Q Values:  [ 2682.98569478 -5704.51612281  4564.81258365 -5679.36893145]
New Q values:  [ 2682.98569478 -5704.51612281  3105.7472506  -5679.36893145]
Reward: -1  Episode Reward:  32
xxxxx
x...x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4268.07405713 -8521.23367799  2205.73600612  3547.70187152]
------
Step:9, Action:North
State  272
Old Q Values:  [ 4268.07405713 -8521.23367799  2205.73600612  3547.70187152]
New Q values:  [  175.14749589 -8521.23367799  2205.73600612  3547.70187152]
Reward: -10001  Episode Reward:  -9969
xxxxx
x...x
x g x
x   x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4137.52433006   26.73544252 2095.22273791  123.6214372 ]
------
Step:1, Action:North
State  261
Old Q Values:  [4137.52433006   26.73544252 2095.22273791  123.6214372 ]
New Q values:  [4051.89155798   26.73544252 2095.22273791  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1473.63582593  927.40283941 7971.60608652  262.76946019]
------
Step:2, Action:East
State  189
Old Q Values:  [ 275.08817949 1931.93971436 1139.33720814  154.04646645]
New Q values:  [ 275.08817949 1931.93971436 4258.23130223  154.04646645]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  7632.15548774 12656.98806325  1460.9765133 ]
------
Step:3, Action:South
State  192
Old Q Values:  [3.89777037e-01 7.03083296e+03 1.39386290e+03 1.48950596e+04]
New Q values:  [3.89777037e-01 3.88204374e+03 1.39386290e+03 1.48950596e+04]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2205.73600612  3547.70187152]
------
Step:4, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2205.73600612  3547.70187152]
New Q values:  [  175.14749589 -8521.23367799  2205.73600612  2634.048216  ]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4051.89155798   26.73544252 2095.22273791  123.6214372 ]
------
Step:5, Action:North
State  261
Old Q Values:  [4051.89155798   26.73544252 2095.22273791  123.6214372 ]
New Q values:  [3530.79460173   26.73544252 2095.22273791  123.6214372 ]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 861.48670303 1686.11868129 6368.79326178 1554.80203889]
------
Step:6, Action:East
State  183
Old Q Values:  [ 861.48670303 1686.11868129 6368.79326178 1554.80203889]
New Q values:  [ 861.48670303 1686.11868129 5000.69929874 1554.80203889]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  8.17927331e+03  1.03161518e+03]
------
Step:7, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  8.17927331e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  1.13469489e+04  1.03161518e+03]
Reward: 9  Episode Reward:  33
xxxxx
x.. x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[26899.46522172  1069.97627293   790.72804752  1050.85266124]
------
Step:8, Action:North
State  208
Old Q Values:  [-4614.69377242  2492.13331011 -4584.50430574 -1713.91177491]
New Q values:  [36817.41710904  2492.13331011 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  32
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     4304.24368673   -180.00807518 128879.64872669]
------
Step:9, Action:West
State  136
Old Q Values:  [ -170.77177351  1621.88449467 -2383.80019164  1534.9353263 ]
New Q values:  [ -170.77177351  1621.88449467 -2383.80019164  2125.6311762 ]
Reward: 9  Episode Reward:  41
xxxxx
xga x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 5020.85681892  850.80874877]
------
Step:10, Action:East
State  112
Old Q Values:  [     0.           3629.92591876  19074.79557429 110949.76306292]
New Q values:  [     0.           3629.92591876  33200.89794593 110949.76306292]
Reward: -1  Episode Reward:  40
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[41632.18074983  6365.91482369 -8652.84       85238.59905405]
------
Step:11, Action:North
State  128
Old Q Values:  [41632.18074983  6365.91482369 -8652.84       85238.59905405]
New Q values:  [36043.85201615  6365.91482369 -8652.84       85238.59905405]
Reward: -10301  Episode Reward:  -10261
xxxxx
x. gx
x   x
x  .x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1621.88449467 -2383.80019164  2125.6311762 ]
------
Step:1, Action:South
State  136
Old Q Values:  [ -170.77177351  1621.88449467 -2383.80019164  2125.6311762 ]
New Q values:  [ -170.77177351  1905.77625285 -2383.80019164  2125.6311762 ]
Reward: 9  Episode Reward:  9
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2199.66514233  4172.07484994 -8896.20691497   637.30368728]
------
Step:2, Action:South
State  208
Old Q Values:  [36817.41710904  2492.13331011 -4584.50430574 -1713.91177491]
New Q values:  [36817.41710904  2122.62497164 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966  3734.57215866]
------
Step:3, Action:West
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966  3734.57215866]
New Q values:  [-3752.17958693 -6442.16912869 -8192.20126966  2289.44332826]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2205.73600612  2634.048216  ]
------
Step:4, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2205.73600612  2634.048216  ]
New Q values:  [  175.14749589 -8521.23367799  2205.73600612 32461.39041585]
Reward: 9  Episode Reward:  36
xxxxx
xg  x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[104674.5704315   14051.33618952   7407.2956525     644.94785455]
------
Step:5, Action:North
State  256
Old Q Values:  [104674.5704315   14051.33618952   7407.2956525     644.94785455]
New Q values:  [67006.30281745 14051.33618952  7407.2956525    644.94785455]
Reward: -9991  Episode Reward:  -9955
xxxxx
x.  x
xg. x
x   x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3752.17958693 -6442.16912869 -8192.20126966  2289.44332826]
------
Step:1, Action:North
State  288
Old Q Values:  [-3752.17958693 -6442.16912869 -8192.20126966  2289.44332826]
New Q values:  [ 6574.36773174 -6442.16912869 -8192.20126966  2289.44332826]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[26899.46522172  1069.97627293   790.72804752  1050.85266124]
------
Step:2, Action:North
State  208
Old Q Values:  [36817.41710904  2122.62497164 -4584.50430574 -1713.91177491]
New Q values:  [19137.38412417  2122.62497164 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  1.46833909e+04]
------
Step:3, Action:West
State  136
Old Q Values:  [ -170.77177351  1905.77625285 -2383.80019164  2125.6311762 ]
New Q values:  [ -170.77177351  1905.77625285 -2383.80019164   980.26872159]
Reward: 9  Episode Reward:  27
xxxxx
x.agx
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:4, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   394.28273185   766.61551311]
New Q values:  [ -253.44886264 -1902.20915811   394.28273185   680.96688528]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  424.30030728 1229.73560012 -252.78192178]
------
Step:5, Action:East
State  105
Old Q Values:  [-180.6         469.44311696  158.81480179    0.        ]
New Q values:  [-180.6         469.44311696  187.54217183    0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x agx
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:6, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   962.19972325  1110.80173994]
New Q values:  [-9594.56523706 -8069.05606225   962.19972325   584.55363106]
Reward: -1  Episode Reward:  34
xxxxx
xag x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         469.44311696  187.54217183    0.        ]
------
Step:7, Action:South
State  107
Old Q Values:  [-252.35169558  424.30030728 1229.73560012 -252.78192178]
New Q values:  [-252.35169558  401.09334621 1229.73560012 -252.78192178]
Reward: 9  Episode Reward:  43
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 334.99458669    0.          753.24407764 -178.98      ]
------
Step:8, Action:North
State  180
Old Q Values:  [  391.06337573  8050.99445658  8871.75746964 -4966.32149798]
New Q values:  [  227.32258252  8050.99445658  8871.75746964 -4966.32149798]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   238.32410743  -180.6       ]
------
Step:9, Action:East
State  110
Old Q Values:  [ -239.29051573 -6813.79421945   217.32156498  -180.6       ]
New Q values:  [ -239.29051573 -6813.79421945   277.84035317  -180.6       ]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   638.37242391   210.47853695]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   638.37242391   210.47853695]
New Q values:  [ -281.736      -1150.91067548  4659.76625012   210.47853695]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  1.46833909e+04]
------
Step:11, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  1.46833909e+04]
New Q values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  6.07704644e+03]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x . x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   394.28273185   680.96688528]
------
Step:12, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   394.28273185   680.96688528]
New Q values:  [ -253.44886264 -1902.20915811   394.28273185   640.70743415]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
x .gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  401.09334621 1229.73560012 -252.78192178]
------
Step:13, Action:East
State  107
Old Q Values:  [-252.35169558  401.09334621 1229.73560012 -252.78192178]
New Q values:  [-252.35169558  401.09334621  683.50647029 -252.78192178]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x . x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   394.28273185   640.70743415]
------
Step:14, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   394.28273185   640.70743415]
New Q values:  [ -253.44886264 -1902.20915811   394.28273185   460.73491475]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
x .gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  401.09334621  683.50647029 -252.78192178]
------
Step:15, Action:East
State  105
Old Q Values:  [-180.6         469.44311696  187.54217183    0.        ]
New Q values:  [-180.6         469.44311696  199.03311984    0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x agx
x . x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:16, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   962.19972325   584.55363106]
New Q values:  [-9594.56523706 -8069.05606225   962.19972325   374.05438751]
Reward: -1  Episode Reward:  34
xxxxx
xag x
x . x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         469.44311696  199.03311984    0.        ]
------
Step:17, Action:South
State  105
Old Q Values:  [-180.6         469.44311696  199.03311984    0.        ]
New Q values:  [-180.6        2578.65907274  199.03311984    0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1473.63582593  927.40283941 7971.60608652  262.76946019]
------
Step:18, Action:East
State  185
Old Q Values:  [ 334.99458669    0.          753.24407764 -178.98      ]
New Q values:  [ 334.99458669    0.         3313.96643231 -178.98      ]
Reward: 9  Episode Reward:  42
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  10024.2293375   2186.25196094   568.38654082]
------
Step:19, Action:South
State  200
Old Q Values:  [   62.8218634  10024.2293375   2186.25196094   568.38654082]
New Q values:  [   62.8218634  13747.50885976  2186.25196094   568.38654082]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2205.73600612 32461.39041585]
------
Step:20, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2205.73600612 32461.39041585]
New Q values:  [  175.14749589 -8521.23367799  2205.73600612 85519.06474625]
Reward: 100009  Episode Reward:  100050
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[26899.46522172  1069.97627293   790.72804752  1050.85266124]
------
Step:1, Action:North
State  210
Old Q Values:  [26899.46522172  1069.97627293   790.72804752  1050.85266124]
New Q values:  [12588.30002058  1069.97627293   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  6.07704644e+03]
------
Step:2, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  6.07704644e+03]
New Q values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  3.83414845e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4659.76625012   210.47853695]
------
Step:3, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   962.19972325   374.05438751]
New Q values:  [-9594.56523706 -8069.05606225   956.01276515   374.05438751]
Reward: -1  Episode Reward:  17
xxxxx
x.gax
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  1905.77625285 -2383.80019164   980.26872159]
------
Step:4, Action:South
State  136
Old Q Values:  [ -170.77177351  1905.77625285 -2383.80019164   980.26872159]
New Q values:  [ -170.77177351  6502.92573839 -2383.80019164   980.26872159]
Reward: -1  Episode Reward:  16
xxxxx
x. gx
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19137.38412417  2122.62497164 -4584.50430574 -1713.91177491]
------
Step:5, Action:South
State  208
Old Q Values:  [19137.38412417  2122.62497164 -4584.50430574 -1713.91177491]
New Q values:  [19137.38412417  2826.76030818 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  25
xxxxx
x.g x
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.36773174 -6442.16912869 -8192.20126966  2289.44332826]
------
Step:6, Action:North
State  288
Old Q Values:  [ 6574.36773174 -6442.16912869 -8192.20126966  2289.44332826]
New Q values:  [ 8370.36232995 -6442.16912869 -8192.20126966  2289.44332826]
Reward: -1  Episode Reward:  24
xxxxx
x. gx
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19137.38412417  2826.76030818 -4584.50430574 -1713.91177491]
------
Step:7, Action:South
State  208
Old Q Values:  [19137.38412417  2826.76030818 -4584.50430574 -1713.91177491]
New Q values:  [19137.38412417  3641.21282226 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x..gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8370.36232995 -6442.16912869 -8192.20126966  2289.44332826]
------
Step:8, Action:West
State  288
Old Q Values:  [ 8370.36232995 -6442.16912869 -8192.20126966  2289.44332826]
New Q values:  [ 8370.36232995 -6442.16912869 -8192.20126966 26576.89675518]
Reward: 9  Episode Reward:  32
xxxxx
x.  x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2205.73600612 85519.06474625]
------
Step:9, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2205.73600612 85519.06474625]
New Q values:  [  175.14749589 -8521.23367799  2205.73600612 46736.13447842]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41763.69526638 12764.58618105  7782.63176044  1875.31501677]
------
Step:10, Action:North
State  256
Old Q Values:  [67006.30281745 14051.33618952  7407.2956525    644.94785455]
New Q values:  [53166.89272928 14051.33618952  7407.2956525    644.94785455]
Reward: 9  Episode Reward:  40
xxxxx
x.  x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:11, Action:East
State  176
Old Q Values:  [103770.24881615   1621.55095326  56149.96710536      0.        ]
New Q values:  [103770.24881615   1621.55095326  26933.90471518      0.        ]
Reward: 9  Episode Reward:  49
xxxxx
x.  x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.88204374e+03 1.39386290e+03 1.48950596e+04]
------
Step:12, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.88204374e+03 1.39386290e+03 1.48950596e+04]
New Q values:  [3.89777037e-01 1.55730578e+04 1.39386290e+03 1.48950596e+04]
Reward: -1  Episode Reward:  48
xxxxx
xg  x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2205.73600612 46736.13447842]
------
Step:13, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2205.73600612 46736.13447842]
New Q values:  [  175.14749589 -8521.23367799  2205.73600612 34643.92161015]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[53166.89272928 14051.33618952  7407.2956525    644.94785455]
------
Step:14, Action:South
State  256
Old Q Values:  [53166.89272928 14051.33618952  7407.2956525    644.94785455]
New Q values:  [53166.89272928 21390.00229459  7407.2956525    644.94785455]
Reward: -301  Episode Reward:  -254
xxxxx
xg  x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[53166.89272928 21390.00229459  7407.2956525    644.94785455]
------
Step:15, Action:North
State  256
Old Q Values:  [53166.89272928 21390.00229459  7407.2956525    644.94785455]
New Q values:  [46397.23173656 21390.00229459  7407.2956525    644.94785455]
Reward: -10001  Episode Reward:  -10255
xxxxx
x.  x
xg  x
x   x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2327.61934342 2249.66419329 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 2281.13567777  238.35800069    0.        ]
New Q values:  [ 221.30610858 3309.33609706  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xag.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1473.63582593  927.40283941 7971.60608652  262.76946019]
------
Step:2, Action:North
State  183
Old Q Values:  [ 861.48670303 1686.11868129 5000.69929874 1554.80203889]
New Q values:  [1042.28048424 1686.11868129 5000.69929874 1554.80203889]
Reward: -1  Episode Reward:  8
xxxxx
xa..x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2327.61934342 2249.66419329 -120.29354603]
------
Step:3, Action:South
State  110
Old Q Values:  [ -239.29051573 -6813.79421945   277.84035317  -180.6       ]
New Q values:  [ -239.29051573 -1296.98273838   277.84035317  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xa..x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  4763.78316466     0.        ]
------
Step:4, Action:East
State  180
Old Q Values:  [  227.32258252  8050.99445658  8871.75746964 -4966.32149798]
New Q values:  [  227.32258252  8050.99445658  6371.288895   -4966.32149798]
Reward: 9  Episode Reward:  16
xxxxx
x ..x
xga.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6635.39049193  9390.61969049  3512.17261526]
------
Step:5, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.55730578e+04 1.39386290e+03 1.48950596e+04]
New Q values:  [3.89777037e-01 1.55730578e+04 6.30416040e+03 1.48950596e+04]
Reward: 9  Episode Reward:  25
xxxxx
x ..x
x gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19137.38412417  3641.21282226 -4584.50430574 -1713.91177491]
------
Step:6, Action:North
State  218
Old Q Values:  [2657.79679428 3989.89183125    0.         1847.21017375]
New Q values:  [2218.76325298 3989.89183125    0.         1847.21017375]
Reward: 9  Episode Reward:  34
xxxxx
x .ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  3.83414845e+03]
------
Step:7, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  3.83414845e+03]
New Q values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  1.67727985e+03]
Reward: 9  Episode Reward:  43
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   394.28273185   460.73491475]
------
Step:8, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   394.28273185   460.73491475]
New Q values:  [ -253.44886264 -1902.20915811   394.28273185   388.74590699]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  401.09334621  683.50647029 -252.78192178]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558  401.09334621  683.50647029 -252.78192178]
New Q values:  [-252.35169558  401.09334621  391.08740767 -252.78192178]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   394.28273185   388.74590699]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  4659.76625012   210.47853695]
New Q values:  [ -281.736      -1150.91067548  2366.49045648   210.47853695]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  1.67727985e+03]
------
Step:11, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  1.67727985e+03]
New Q values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  2.71239009e+03]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 6806.92715127  858.02835484]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2366.49045648   210.47853695]
New Q values:  [ -281.736      -1150.91067548  1759.71320878   210.47853695]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  2.71239009e+03]
------
Step:13, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  2.71239009e+03]
New Q values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  3.12643418e+03]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 6806.92715127  858.02835484]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1759.71320878   210.47853695]
New Q values:  [ -281.736      -1150.91067548  1641.2155376    210.47853695]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  3.12643418e+03]
------
Step:15, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  3.12643418e+03]
New Q values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  3.29205182e+03]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 6806.92715127  858.02835484]
------
Step:16, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1641.2155376    210.47853695]
New Q values:  [ -281.736      -1150.91067548  1643.50176029   210.47853695]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  3.29205182e+03]
------
Step:17, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  3.29205182e+03]
New Q values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  1.43450555e+03]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   394.28273185   388.74590699]
------
Step:18, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   394.28273185   388.74590699]
New Q values:  [ -253.44886264 -1902.20915811   587.46475671   388.74590699]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  1.43450555e+03]
------
Step:19, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  1.43450555e+03]
New Q values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  1.06625275e+03]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1643.50176029   210.47853695]
------
Step:20, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225   956.01276515   374.05438751]
New Q values:  [-9594.56523706 -8069.05606225  2332.68282758   374.05438751]
Reward: -1  Episode Reward:  30
xxxxx
x gax
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  6502.92573839 -2383.80019164   980.26872159]
------
Step:21, Action:South
State  138
Old Q Values:  [ 8.43634063e+00 -2.62345266e+01 -3.22965309e-01  1.06625275e+03]
New Q values:  [ 8.43634063e+00  1.24052864e+03 -3.22965309e-01  1.06625275e+03]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2199.66514233  4172.07484994 -8896.20691497   637.30368728]
------
Step:22, Action:South
State  216
Old Q Values:  [ 2199.66514233  4172.07484994 -8896.20691497   637.30368728]
New Q values:  [ 2199.66514233  9647.29896653 -8896.20691497   637.30368728]
Reward: 9  Episode Reward:  38
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8370.36232995 -6442.16912869 -8192.20126966 26576.89675518]
------
Step:23, Action:West
State  288
Old Q Values:  [ 8370.36232995 -6442.16912869 -8192.20126966 26576.89675518]
New Q values:  [ 8370.36232995 -6442.16912869 -8192.20126966 13823.14788438]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 10643.29727435]
------
Step:24, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2205.73600612 34643.92161015]
New Q values:  [  175.14749589 -8521.23367799  2205.73600612 86392.07722397]
Reward: 100009  Episode Reward:  100046
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3530.79460173   26.73544252 2095.22273791  123.6214372 ]
------
Step:1, Action:North
State  261
Old Q Values:  [3530.79460173   26.73544252 2095.22273791  123.6214372 ]
New Q values:  [3809.19966665   26.73544252 2095.22273791  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1473.63582593  927.40283941 7971.60608652  262.76946019]
------
Step:2, Action:East
State  181
Old Q Values:  [1473.63582593  927.40283941 7971.60608652  262.76946019]
New Q values:  [1473.63582593  927.40283941 1865.95978694  262.76946019]
Reward: -9991  Episode Reward:  -9982
xxxxx
x.. x
x g.x
x ..x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  7632.15548774 12656.98806325  1460.9765133 ]
------
Step:1, Action:South
State  195
Old Q Values:  [   38.85388605 11897.17979453  6726.56315938  1169.39963074]
New Q values:  [  38.85388605 7957.26110012 6726.56315938 1169.39963074]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 10643.29727435]
------
Step:2, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2205.73600612 86392.07722397]
New Q values:  [  175.14749589 -8521.23367799  2205.73600612 35704.99078958]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.  x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3809.19966665   26.73544252 2095.22273791  123.6214372 ]
------
Step:3, Action:North
State  261
Old Q Values:  [3809.19966665   26.73544252 2095.22273791  123.6214372 ]
New Q values:  [2088.86780274   26.73544252 2095.22273791  123.6214372 ]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1473.63582593  927.40283941 1865.95978694  262.76946019]
------
Step:4, Action:North
State  181
Old Q Values:  [1473.63582593  927.40283941 1865.95978694  262.76946019]
New Q values:  [1587.65515949  927.40283941 1865.95978694  262.76946019]
Reward: 9  Episode Reward:  36
xxxxx
xa..x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 3309.33609706  238.35800069    0.        ]
------
Step:5, Action:South
State  103
Old Q Values:  [ 221.30610858 3309.33609706  238.35800069    0.        ]
New Q values:  [ 221.30610858 2823.34422845  238.35800069    0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1042.28048424 1686.11868129 5000.69929874 1554.80203889]
------
Step:6, Action:East
State  181
Old Q Values:  [1587.65515949  927.40283941 1865.95978694  262.76946019]
New Q values:  [1587.65515949  927.40283941 4542.88033375  262.76946019]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  7632.15548774 12656.98806325  1460.9765133 ]
------
Step:7, Action:South
State  195
Old Q Values:  [  38.85388605 7957.26110012 6726.56315938 1169.39963074]
New Q values:  [  38.85388605 6375.29362235 6726.56315938 1169.39963074]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 10643.29727435]
------
Step:8, Action:West
State  276
Old Q Values:  [ 4514.94690029 -5807.06396197   855.18004824  2547.88188388]
New Q values:  [ 4514.94690029 -5807.06396197   855.18004824  1647.11957492]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2088.86780274   26.73544252 2095.22273791  123.6214372 ]
------
Step:9, Action:North
State  260
Old Q Values:  [ 2682.98569478 -5704.51612281  3105.7472506  -5679.36893145]
New Q values:  [ 2501.72922731 -5704.51612281  3105.7472506  -5679.36893145]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  4763.78316466     0.        ]
------
Step:10, Action:East
State  183
Old Q Values:  [1042.28048424 1686.11868129 5000.69929874 1554.80203889]
New Q values:  [1042.28048424 1686.11868129 5403.76438706 1554.80203889]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.13469489e+04  1.03161518e+03]
------
Step:11, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.55730578e+04 6.30416040e+03 1.48950596e+04]
New Q values:  [3.89777037e-01 1.55730578e+04 8.26227940e+03 1.48950596e+04]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19137.38412417  3641.21282226 -4584.50430574 -1713.91177491]
------
Step:12, Action:North
State  216
Old Q Values:  [ 2199.66514233  9647.29896653 -8896.20691497   637.30368728]
New Q values:  [ 2836.14377845  9647.29896653 -8896.20691497   637.30368728]
Reward: 9  Episode Reward:  38
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  6502.92573839 -2383.80019164   980.26872159]
------
Step:13, Action:South
State  128
Old Q Values:  [36043.85201615  6365.91482369 -8652.84       85238.59905405]
New Q values:  [36043.85201615  8286.98116673 -8652.84       85238.59905405]
Reward: -1  Episode Reward:  37
xxxxx
xg. x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19137.38412417  3641.21282226 -4584.50430574 -1713.91177491]
------
Step:14, Action:North
State  216
Old Q Values:  [ 2836.14377845  9647.29896653 -8896.20691497   637.30368728]
New Q values:  [ 3084.7352329   9647.29896653 -8896.20691497   637.30368728]
Reward: -1  Episode Reward:  36
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  6502.92573839 -2383.80019164   980.26872159]
------
Step:15, Action:South
State  130
Old Q Values:  [ 26266.584521     4304.24368673   -180.00807518 128879.64872669]
New Q values:  [ 26266.584521     7462.31271194   -180.00807518 128879.64872669]
Reward: -1  Episode Reward:  35
xxxxx
x . x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19137.38412417  3641.21282226 -4584.50430574 -1713.91177491]
------
Step:16, Action:North
State  216
Old Q Values:  [ 3084.7352329   9647.29896653 -8896.20691497   637.30368728]
New Q values:  [ 3184.17181468  9647.29896653 -8896.20691497   637.30368728]
Reward: -1  Episode Reward:  34
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  6502.92573839 -2383.80019164   980.26872159]
------
Step:17, Action:South
State  130
Old Q Values:  [ 26266.584521     7462.31271194   -180.00807518 128879.64872669]
New Q values:  [ 26266.584521     8725.54032203   -180.00807518 128879.64872669]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19137.38412417  3641.21282226 -4584.50430574 -1713.91177491]
------
Step:18, Action:North
State  210
Old Q Values:  [12588.30002058  1069.97627293   790.72804752  1050.85266124]
New Q values:  [43698.61462624  1069.97627293   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  32
xxxxx
x .ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     8725.54032203   -180.00807518 128879.64872669]
------
Step:19, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.24052864e+03 -3.22965309e-01  1.06625275e+03]
New Q values:  [ 8.43634063e+00  1.24052864e+03 -3.22965309e-01  2.47397924e+03]
Reward: 9  Episode Reward:  41
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 6806.92715127  858.02835484]
------
Step:20, Action:East
State  127
Old Q Values:  [  0.           1.67014986 384.73306724 702.27198793]
New Q values:  [  0.           1.67014986 895.48700012 702.27198793]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.24052864e+03 -3.22965309e-01  2.47397924e+03]
------
Step:21, Action:West
State  130
Old Q Values:  [ 26266.584521     8725.54032203   -180.00807518 128879.64872669]
New Q values:  [26266.584521    8725.54032203  -180.00807518 95576.10037961]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 146749.46962978]
------
Step:22, Action:West
State  126
Old Q Values:  [   0.          331.64678262 6806.92715127  858.02835484]
New Q values:  [   0.          331.64678262 6806.92715127 1040.89714496]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2327.61934342 2249.66419329 -120.29354603]
------
Step:23, Action:South
State  111
Old Q Values:  [-177.44732869 2327.61934342 2249.66419329 -120.29354603]
New Q values:  [-177.44732869 2207.91712804 2249.66419329 -120.29354603]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1931.93971436 4258.23130223  154.04646645]
------
Step:24, Action:South
State  189
Old Q Values:  [ 275.08817949 1931.93971436 4258.23130223  154.04646645]
New Q values:  [ 275.08817949 1400.74270712 4258.23130223  154.04646645]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2088.86780274   26.73544252 2095.22273791  123.6214372 ]
------
Step:25, Action:East
State  261
Old Q Values:  [2088.86780274   26.73544252 2095.22273791  123.6214372 ]
New Q values:  [2088.86780274   26.73544252 2191.97316525  123.6214372 ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4514.94690029 -5807.06396197   855.18004824  1647.11957492]
------
Step:26, Action:West
State  277
Old Q Values:  [   1.64433       0.          315.20206867 1305.93576652]
New Q values:  [   1.64433       0.          315.20206867 1179.36625618]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2088.86780274   26.73544252 2191.97316525  123.6214372 ]
------
Step:27, Action:East
State  261
Old Q Values:  [2088.86780274   26.73544252 2191.97316525  123.6214372 ]
New Q values:  [2088.86780274   26.73544252 2230.67333619  123.6214372 ]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4514.94690029 -5807.06396197   855.18004824  1647.11957492]
------
Step:28, Action:West
State  277
Old Q Values:  [   1.64433       0.          315.20206867 1179.36625618]
New Q values:  [   1.64433       0.          315.20206867 1140.34850333]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2088.86780274   26.73544252 2230.67333619  123.6214372 ]
------
Step:29, Action:East
State  261
Old Q Values:  [2088.86780274   26.73544252 2230.67333619  123.6214372 ]
New Q values:  [2088.86780274   26.73544252 2246.15340456  123.6214372 ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4514.94690029 -5807.06396197   855.18004824  1647.11957492]
------
Step:30, Action:West
State  276
Old Q Values:  [ 4514.94690029 -5807.06396197   855.18004824  1647.11957492]
New Q values:  [ 4514.94690029 -5807.06396197   855.18004824  1332.09385134]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2088.86780274   26.73544252 2246.15340456  123.6214372 ]
------
Step:31, Action:North
State  260
Old Q Values:  [ 2501.72922731 -5704.51612281  3105.7472506  -5679.36893145]
New Q values:  [ 1538.55279863 -5704.51612281  3105.7472506  -5679.36893145]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  1.79487036e+03  0.00000000e+00]
------
Step:32, Action:East
State  188
Old Q Values:  [-6523.78898263  3127.58484886  2251.62396068     0.        ]
New Q values:  [-6523.78898263  3127.58484886  1485.98871489     0.        ]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         1682.60895446 1953.13043538  441.58769553]
------
Step:33, Action:East
State  204
Old Q Values:  [   0.         1682.60895446 1953.13043538  441.58769553]
New Q values:  [   0.         1682.60895446 3674.84186411  441.58769553]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3184.17181468  9647.29896653 -8896.20691497   637.30368728]
------
Step:34, Action:South
State  208
Old Q Values:  [19137.38412417  3641.21282226 -4584.50430574 -1713.91177491]
New Q values:  [19137.38412417 65608.82949422 -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100036
xxxxx
x   x
x  gx
x  ax
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1587.65515949  927.40283941 4542.88033375  262.76946019]
------
Step:1, Action:East
State  189
Old Q Values:  [ 275.08817949 1400.74270712 4258.23130223  154.04646645]
New Q values:  [ 275.08817949 1400.74270712 5832.94517882  154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[   62.8218634  13747.50885976  2186.25196094   568.38654082]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831  7632.15548774 12656.98806325  1460.9765133 ]
New Q values:  [-5922.26708831  6251.2513774  12656.98806325  1460.9765133 ]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 10643.29727435]
------
Step:3, Action:West
State  276
Old Q Values:  [ 4514.94690029 -5807.06396197   855.18004824  1332.09385134]
New Q values:  [ 4514.94690029 -5807.06396197   855.18004824  1212.0835619 ]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2088.86780274   26.73544252 2246.15340456  123.6214372 ]
------
Step:4, Action:East
State  261
Old Q Values:  [2088.86780274   26.73544252 2246.15340456  123.6214372 ]
New Q values:  [2088.86780274   26.73544252 4090.85054413  123.6214372 ]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 10643.29727435]
------
Step:5, Action:West
State  276
Old Q Values:  [ 4514.94690029 -5807.06396197   855.18004824  1212.0835619 ]
New Q values:  [ 4514.94690029 -5807.06396197   855.18004824  1711.488588  ]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2088.86780274   26.73544252 4090.85054413  123.6214372 ]
------
Step:6, Action:East
State  260
Old Q Values:  [ 1538.55279863 -5704.51612281  3105.7472506  -5679.36893145]
New Q values:  [ 1538.55279863 -5704.51612281  2596.18297033 -5679.36893145]
Reward: -1  Episode Reward:  24
xxxxx
xg. x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4514.94690029 -5807.06396197   855.18004824  1711.488588  ]
------
Step:7, Action:North
State  276
Old Q Values:  [ 4514.94690029 -5807.06396197   855.18004824  1711.488588  ]
New Q values:  [ 6477.29611245 -5807.06396197   855.18004824  1711.488588  ]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.55730578e+04 8.26227940e+03 1.48950596e+04]
------
Step:8, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.55730578e+04 8.26227940e+03 1.48950596e+04]
New Q values:  [3.89777037e-01 8.17181197e+03 8.26227940e+03 1.48950596e+04]
Reward: -1  Episode Reward:  22
xxxxx
xg. x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 6477.29611245 -5807.06396197   855.18004824  1711.488588  ]
------
Step:9, Action:North
State  276
Old Q Values:  [ 6477.29611245 -5807.06396197   855.18004824  1711.488588  ]
New Q values:  [ 7058.83631802 -5807.06396197   855.18004824  1711.488588  ]
Reward: -1  Episode Reward:  21
xxxxx
x.g x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 8.17181197e+03 8.26227940e+03 1.48950596e+04]
------
Step:10, Action:West
State  192
Old Q Values:  [3.89777037e-01 8.17181197e+03 8.26227940e+03 1.48950596e+04]
New Q values:  [3.89777037e-01 8.17181197e+03 8.26227940e+03 7.32028793e+03]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1587.65515949  927.40283941 4542.88033375  262.76946019]
------
Step:11, Action:North
State  180
Old Q Values:  [  227.32258252  8050.99445658  6371.288895   -4966.32149798]
New Q values:  [  179.68113896  8050.99445658  6371.288895   -4966.32149798]
Reward: 9  Episode Reward:  29
xxxxx
xa. x
xg .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -1296.98273838   277.84035317  -180.6       ]
------
Step:12, Action:East
State  108
Old Q Values:  [-8463.16477134  2610.77248427  1480.68674198     0.        ]
New Q values:  [-8463.16477134  2610.77248427  2103.93174247     0.        ]
Reward: 9  Episode Reward:  38
xxxxx
xga x
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 5020.85681892  850.80874877]
------
Step:13, Action:East
State  126
Old Q Values:  [   0.          331.64678262 6806.92715127 1040.89714496]
New Q values:  [   0.          331.64678262 3464.36463373 1040.89714496]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
xg .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.24052864e+03 -3.22965309e-01  2.47397924e+03]
------
Step:14, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.24052864e+03 -3.22965309e-01  2.47397924e+03]
New Q values:  [ 8.43634063e+00  1.24052864e+03 -3.22965309e-01  2.02830109e+03]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x  .x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 3464.36463373 1040.89714496]
------
Step:15, Action:East
State  126
Old Q Values:  [   0.          331.64678262 3464.36463373 1040.89714496]
New Q values:  [   0.          331.64678262 1993.63617981 1040.89714496]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
xg .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.24052864e+03 -3.22965309e-01  2.02830109e+03]
------
Step:16, Action:West
State  136
Old Q Values:  [ -170.77177351  6502.92573839 -2383.80019164   980.26872159]
New Q values:  [ -170.77177351  6502.92573839 -2383.80019164  1897.76453431]
Reward: -1  Episode Reward:  34
xxxxx
xga x
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 5020.85681892  850.80874877]
------
Step:17, Action:East
State  126
Old Q Values:  [   0.          331.64678262 1993.63617981 1040.89714496]
New Q values:  [   0.          331.64678262 1405.34479825 1040.89714496]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
xg .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.24052864e+03 -3.22965309e-01  2.02830109e+03]
------
Step:18, Action:West
State  136
Old Q Values:  [ -170.77177351  6502.92573839 -2383.80019164  1897.76453431]
New Q values:  [ -170.77177351  6502.92573839 -2383.80019164  2264.7628594 ]
Reward: -1  Episode Reward:  32
xxxxx
xga x
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 5020.85681892  850.80874877]
------
Step:19, Action:East
State  124
Old Q Values:  [   0.         1166.51141701 5020.85681892  850.80874877]
New Q values:  [   0.         1166.51141701 3958.62044909  850.80874877]
Reward: -1  Episode Reward:  31
xxxxx
x gax
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  6502.92573839 -2383.80019164  2264.7628594 ]
------
Step:20, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.24052864e+03 -3.22965309e-01  2.02830109e+03]
New Q values:  [ 8.43634063e+00  3.39580115e+03 -3.22965309e-01  2.02830109e+03]
Reward: 9  Episode Reward:  40
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 3184.17181468  9647.29896653 -8896.20691497   637.30368728]
------
Step:21, Action:South
State  208
Old Q Values:  [19137.38412417 65608.82949422 -4584.50430574 -1713.91177491]
New Q values:  [19137.38412417 90395.876163   -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100049
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2088.86780274   26.73544252 4090.85054413  123.6214372 ]
------
Step:1, Action:East
State  261
Old Q Values:  [2088.86780274   26.73544252 4090.85054413  123.6214372 ]
New Q values:  [2088.86780274   26.73544252 4834.72939996  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 10643.29727435]
------
Step:2, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2205.73600612 35704.99078958]
New Q values:  [  175.14749589 -8521.23367799  2205.73600612 15731.81513582]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x.. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2088.86780274   26.73544252 4834.72939996  123.6214372 ]
------
Step:3, Action:East
State  261
Old Q Values:  [2088.86780274   26.73544252 4834.72939996  123.6214372 ]
New Q values:  [2088.86780274   26.73544252 6652.83630073  123.6214372 ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x.g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2205.73600612 15731.81513582]
------
Step:4, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 10643.29727435]
New Q values:  [3915.56039739 -168.92307549 4979.82966255 6252.56979996]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x..gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2088.86780274   26.73544252 6652.83630073  123.6214372 ]
------
Step:5, Action:East
State  261
Old Q Values:  [2088.86780274   26.73544252 6652.83630073  123.6214372 ]
New Q values:  [2088.86780274   26.73544252 7380.07906104  123.6214372 ]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x.g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2205.73600612 15731.81513582]
------
Step:6, Action:West
State  273
Old Q Values:  [3915.56039739 -168.92307549 4979.82966255 6252.56979996]
New Q values:  [3915.56039739 -168.92307549 4979.82966255 4714.4516383 ]
Reward: -1  Episode Reward:  4
xxxxx
x...x
x..gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2088.86780274   26.73544252 7380.07906104  123.6214372 ]
------
Step:7, Action:East
State  261
Old Q Values:  [2088.86780274   26.73544252 7380.07906104  123.6214372 ]
New Q values:  [2088.86780274   26.73544252 4445.38052318  123.6214372 ]
Reward: -1  Episode Reward:  3
xxxxx
x...x
x.. x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 4979.82966255 4714.4516383 ]
------
Step:8, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2205.73600612 15731.81513582]
New Q values:  [  175.14749589 -8521.23367799  2205.73600612  7625.74021128]
Reward: -1  Episode Reward:  2
xxxxx
x...x
x.. x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2088.86780274   26.73544252 4445.38052318  123.6214372 ]
------
Step:9, Action:North
State  261
Old Q Values:  [2088.86780274   26.73544252 4445.38052318  123.6214372 ]
New Q values:  [2203.81122122   26.73544252 4445.38052318  123.6214372 ]
Reward: 9  Episode Reward:  11
xxxxx
x...x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1587.65515949  927.40283941 4542.88033375  262.76946019]
------
Step:10, Action:North
State  181
Old Q Values:  [1587.65515949  927.40283941 4542.88033375  262.76946019]
New Q values:  [1487.46533233  927.40283941 4542.88033375  262.76946019]
Reward: 9  Episode Reward:  20
xxxxx
xa..x
x .gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2823.34422845  238.35800069    0.        ]
------
Step:11, Action:South
State  109
Old Q Values:  [ -241.10880094  3384.66213638 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  2716.12895468 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  19
xxxxx
x .gx
xa. x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1487.46533233  927.40283941 4542.88033375  262.76946019]
------
Step:12, Action:East
State  181
Old Q Values:  [1487.46533233  927.40283941 4542.88033375  262.76946019]
New Q values:  [1487.46533233  927.40283941 4639.73804065  262.76946019]
Reward: 9  Episode Reward:  28
xxxxx
x g.x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6635.39049193  9390.61969049  3512.17261526]
------
Step:13, Action:East
State  193
Old Q Values:  [-5922.26708831  6251.2513774  12656.98806325  1460.9765133 ]
New Q values:  [-5922.26708831  6251.2513774  32180.9580742   1460.9765133 ]
Reward: -1  Episode Reward:  27
xxxxx
x .gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19137.38412417 90395.876163   -4584.50430574 -1713.91177491]
------
Step:14, Action:South
State  208
Old Q Values:  [19137.38412417 90395.876163   -4584.50430574 -1713.91177491]
New Q values:  [19137.38412417 40310.69483051 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  36
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8370.36232995 -6442.16912869 -8192.20126966 13823.14788438]
------
Step:15, Action:West
State  288
Old Q Values:  [ 8370.36232995 -6442.16912869 -8192.20126966 13823.14788438]
New Q values:  [ 8370.36232995 -6442.16912869 -8192.20126966  7022.60805252]
Reward: -1  Episode Reward:  35
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 4979.82966255 4714.4516383 ]
------
Step:16, Action:East
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197   855.18004824  1711.488588  ]
New Q values:  [ 7058.83631802 -5807.06396197  2852.58071828  1711.488588  ]
Reward: -1  Episode Reward:  34
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8370.36232995 -6442.16912869 -8192.20126966  7022.60805252]
------
Step:17, Action:North
State  288
Old Q Values:  [ 8370.36232995 -6442.16912869 -8192.20126966  7022.60805252]
New Q values:  [15440.75338113 -6442.16912869 -8192.20126966  7022.60805252]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[19137.38412417 40310.69483051 -4584.50430574 -1713.91177491]
------
Step:18, Action:South
State  208
Old Q Values:  [19137.38412417 40310.69483051 -4584.50430574 -1713.91177491]
New Q values:  [19137.38412417 20755.90394655 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15440.75338113 -6442.16912869 -8192.20126966  7022.60805252]
------
Step:19, Action:North
State  288
Old Q Values:  [15440.75338113 -6442.16912869 -8192.20126966  7022.60805252]
New Q values:  [19285.28574033 -6442.16912869 -8192.20126966  7022.60805252]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[43698.61462624  1069.97627293   790.72804752  1050.85266124]
------
Step:20, Action:North
State  208
Old Q Values:  [19137.38412417 20755.90394655 -4584.50430574 -1713.91177491]
New Q values:  [36333.18376355 20755.90394655 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  40
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    8725.54032203  -180.00807518 95576.10037961]
------
Step:21, Action:West
State  130
Old Q Values:  [26266.584521    8725.54032203  -180.00807518 95576.10037961]
New Q values:  [ 26266.584521     8725.54032203   -180.00807518 134271.79397734]
Reward: 100009  Episode Reward:  100049
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2203.81122122   26.73544252 4445.38052318  123.6214372 ]
------
Step:1, Action:East
State  261
Old Q Values:  [2203.81122122   26.73544252 4445.38052318  123.6214372 ]
New Q values:  [2203.81122122   26.73544252 3277.50110804  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 4979.82966255 4714.4516383 ]
------
Step:2, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2205.73600612  7625.74021128]
New Q values:  [  175.14749589 -8521.23367799  6667.28012454  7625.74021128]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19285.28574033 -6442.16912869 -8192.20126966  7022.60805252]
------
Step:3, Action:North
State  288
Old Q Values:  [19285.28574033 -6442.16912869 -8192.20126966  7022.60805252]
New Q values:  [12619.4694252  -6442.16912869 -8192.20126966  7022.60805252]
Reward: -9991  Episode Reward:  -9983
xxxxx
x...x
x..gx
x   x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2332.68282758   374.05438751]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1643.50176029   210.47853695]
New Q values:  [ -281.736      -1150.91067548  1681.54104843   210.47853695]
Reward: 9  Episode Reward:  9
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.39580115e+03 -3.22965309e-01  2.02830109e+03]
------
Step:2, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.39580115e+03 -3.22965309e-01  2.02830109e+03]
New Q values:  [ 8.43634063e+00  4.25791015e+03 -3.22965309e-01  2.02830109e+03]
Reward: 9  Episode Reward:  18
xxxxx
x   x
x.gax
x...x
xxxxx
Step:3, Action:North
State  210
Old Q Values:  [43698.61462624  1069.97627293   790.72804752  1050.85266124]
New Q values:  [18756.21889521  1069.97627293   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  17
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.25791015e+03 -3.22965309e-01  2.02830109e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  4.25791015e+03 -3.22965309e-01  2.02830109e+03]
New Q values:  [ 8.43634063e+00  7.32942973e+03 -3.22965309e-01  2.02830109e+03]
Reward: -1  Episode Reward:  16
xxxxx
x   x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[18756.21889521  1069.97627293   790.72804752  1050.85266124]
------
Step:5, Action:North
State  216
Old Q Values:  [ 3184.17181468  9647.29896653 -8896.20691497   637.30368728]
New Q values:  [ 3471.89764432  9647.29896653 -8896.20691497   637.30368728]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.32942973e+03 -3.22965309e-01  2.02830109e+03]
------
Step:6, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  7.32942973e+03 -3.22965309e-01  2.02830109e+03]
New Q values:  [ 8.43634063e+00  7.32942973e+03 -3.22965309e-01  9.86959862e+02]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   587.46475671   388.74590699]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1681.54104843   210.47853695]
New Q values:  [ -281.736      -1150.91067548  2870.84533782   210.47853695]
Reward: -1  Episode Reward:  13
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  7.32942973e+03 -3.22965309e-01  9.86959862e+02]
------
Step:8, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  7.32942973e+03 -3.22965309e-01  9.86959862e+02]
New Q values:  [ 8.43634063e+00  8.55803756e+03 -3.22965309e-01  9.86959862e+02]
Reward: -1  Episode Reward:  12
xxxxx
x   x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[18756.21889521  1069.97627293   790.72804752  1050.85266124]
------
Step:9, Action:North
State  210
Old Q Values:  [18756.21889521  1069.97627293   790.72804752  1050.85266124]
New Q values:  [10069.29882603  1069.97627293   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  11
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  8.55803756e+03 -3.22965309e-01  9.86959862e+02]
------
Step:10, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  8.55803756e+03 -3.22965309e-01  9.86959862e+02]
New Q values:  [ 8.43634063e+00  6.44340467e+03 -3.22965309e-01  9.86959862e+02]
Reward: -1  Episode Reward:  10
xxxxx
x   x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[10069.29882603  1069.97627293   790.72804752  1050.85266124]
------
Step:11, Action:North
State  210
Old Q Values:  [10069.29882603  1069.97627293   790.72804752  1050.85266124]
New Q values:  [5960.14093194 1069.97627293  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  9
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  6.44340467e+03 -3.22965309e-01  9.86959862e+02]
------
Step:12, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  6.44340467e+03 -3.22965309e-01  9.86959862e+02]
New Q values:  [ 8.43634063e+00  4.36480415e+03 -3.22965309e-01  9.86959862e+02]
Reward: -1  Episode Reward:  8
xxxxx
x   x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5960.14093194 1069.97627293  790.72804752 1050.85266124]
------
Step:13, Action:North
State  210
Old Q Values:  [5960.14093194 1069.97627293  790.72804752 1050.85266124]
New Q values:  [3692.89761726 1069.97627293  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  7
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  4.36480415e+03 -3.22965309e-01  9.86959862e+02]
------
Step:14, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  4.36480415e+03 -3.22965309e-01  9.86959862e+02]
New Q values:  [ 8.43634063e+00  2.85319094e+03 -3.22965309e-01  9.86959862e+02]
Reward: -1  Episode Reward:  6
xxxxx
x   x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3692.89761726 1069.97627293  790.72804752 1050.85266124]
------
Step:15, Action:North
State  210
Old Q Values:  [3692.89761726 1069.97627293  790.72804752 1050.85266124]
New Q values:  [2332.51633025 1069.97627293  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  5
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.85319094e+03 -3.22965309e-01  9.86959862e+02]
------
Step:16, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.85319094e+03 -3.22965309e-01  9.86959862e+02]
New Q values:  [ 8.43634063e+00  1.84043128e+03 -3.22965309e-01  9.86959862e+02]
Reward: -1  Episode Reward:  4
xxxxx
x   x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2332.51633025 1069.97627293  790.72804752 1050.85266124]
------
Step:17, Action:North
State  216
Old Q Values:  [ 3471.89764432  9647.29896653 -8896.20691497   637.30368728]
New Q values:  [ 1940.28844079  9647.29896653 -8896.20691497   637.30368728]
Reward: -1  Episode Reward:  3
xxxxx
x  ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.84043128e+03 -3.22965309e-01  9.86959862e+02]
------
Step:18, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.84043128e+03 -3.22965309e-01  9.86959862e+02]
New Q values:  [ 8.43634063e+00  1.84043128e+03 -3.22965309e-01  5.70423372e+02]
Reward: -1  Episode Reward:  2
xxxxx
x a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   587.46475671   388.74590699]
------
Step:19, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2870.84533782   210.47853695]
New Q values:  [ -281.736      -1150.91067548  1699.86751819   210.47853695]
Reward: -1  Episode Reward:  1
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.84043128e+03 -3.22965309e-01  5.70423372e+02]
------
Step:20, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.84043128e+03 -3.22965309e-01  5.70423372e+02]
New Q values:  [ 8.43634063e+00  3.62976220e+03 -3.22965309e-01  5.70423372e+02]
Reward: -1  Episode Reward:  0
xxxxx
x   x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1940.28844079  9647.29896653 -8896.20691497   637.30368728]
------
Step:21, Action:South
State  208
Old Q Values:  [36333.18376355 20755.90394655 -4584.50430574 -1713.91177491]
New Q values:  [36333.18376355 12093.60240618 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12619.4694252  -6442.16912869 -8192.20126966  7022.60805252]
------
Step:22, Action:North
State  288
Old Q Values:  [12619.4694252  -6442.16912869 -8192.20126966  7022.60805252]
New Q values:  [15947.14289914 -6442.16912869 -8192.20126966  7022.60805252]
Reward: -1  Episode Reward:  8
xxxxx
x  gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36333.18376355 12093.60240618 -4584.50430574 -1713.91177491]
------
Step:23, Action:South
State  208
Old Q Values:  [36333.18376355 12093.60240618 -4584.50430574 -1713.91177491]
New Q values:  [36333.18376355  9620.98383221 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  7
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15947.14289914 -6442.16912869 -8192.20126966  7022.60805252]
------
Step:24, Action:North
State  288
Old Q Values:  [15947.14289914 -6442.16912869 -8192.20126966  7022.60805252]
New Q values:  [17278.21228872 -6442.16912869 -8192.20126966  7022.60805252]
Reward: -1  Episode Reward:  6
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36333.18376355  9620.98383221 -4584.50430574 -1713.91177491]
------
Step:25, Action:North
State  208
Old Q Values:  [36333.18376355  9620.98383221 -4584.50430574 -1713.91177491]
New Q values:  [16483.55122694  9620.98383221 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  5
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  6502.92573839 -2383.80019164  2264.7628594 ]
------
Step:26, Action:South
State  136
Old Q Values:  [ -170.77177351  6502.92573839 -2383.80019164  2264.7628594 ]
New Q values:  [ -170.77177351  7545.63566344 -2383.80019164  2264.7628594 ]
Reward: -1  Episode Reward:  4
xxxxx
x  gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16483.55122694  9620.98383221 -4584.50430574 -1713.91177491]
------
Step:27, Action:South
State  208
Old Q Values:  [16483.55122694  9620.98383221 -4584.50430574 -1713.91177491]
New Q values:  [16483.55122694  9031.2572195  -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  3
xxxxx
x   x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17278.21228872 -6442.16912869 -8192.20126966  7022.60805252]
------
Step:28, Action:West
State  288
Old Q Values:  [17278.21228872 -6442.16912869 -8192.20126966  7022.60805252]
New Q values:  [17278.21228872 -6442.16912869 -8192.20126966  4308.39211977]
Reward: 9  Episode Reward:  12
xxxxx
x   x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 4979.82966255 4714.4516383 ]
------
Step:29, Action:West
State  273
Old Q Values:  [3915.56039739 -168.92307549 4979.82966255 4714.4516383 ]
New Q values:  [ 3915.56039739  -168.92307549  4979.82966255 14420.28923523]
Reward: 9  Episode Reward:  21
xxxxx
x   x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41763.69526638 12764.58618105  7782.63176044  1875.31501677]
------
Step:30, Action:North
State  257
Old Q Values:  [41763.69526638 12764.58618105  7782.63176044  1875.31501677]
New Q values:  [26764.04527655 12764.58618105  7782.63176044  1875.31501677]
Reward: 9  Episode Reward:  30
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 23344.73803738 16155.09385617     0.        ]
------
Step:31, Action:North
State  181
Old Q Values:  [1487.46533233  927.40283941 4639.73804065  262.76946019]
New Q values:  [1269.28539092  927.40283941 4639.73804065  262.76946019]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2207.91712804 2249.66419329 -120.29354603]
------
Step:32, Action:East
State  111
Old Q Values:  [-177.44732869 2207.91712804 2249.66419329 -120.29354603]
New Q values:  [-177.44732869 2207.91712804 1075.50510433 -120.29354603]
Reward: -1  Episode Reward:  28
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   587.46475671   388.74590699]
------
Step:33, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1699.86751819   210.47853695]
New Q values:  [ -281.736      -1150.91067548  1768.27566749   210.47853695]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.62976220e+03 -3.22965309e-01  5.70423372e+02]
------
Step:34, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.62976220e+03 -3.22965309e-01  5.70423372e+02]
New Q values:  [ 8.43634063e+00  2.15105978e+03 -3.22965309e-01  5.70423372e+02]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2332.51633025 1069.97627293  790.72804752 1050.85266124]
------
Step:35, Action:North
State  208
Old Q Values:  [16483.55122694  9031.2572195  -4584.50430574 -1713.91177491]
New Q values:  [ 7238.13842458  9031.2572195  -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  25
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.15105978e+03 -3.22965309e-01  5.70423372e+02]
------
Step:36, Action:West
State  130
Old Q Values:  [ 26266.584521     8725.54032203   -180.00807518 134271.79397734]
New Q values:  [26266.584521    8725.54032203  -180.00807518 89744.07141643]
Reward: -1  Episode Reward:  24
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036    4378.49673879 120119.84608498]
------
Step:37, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2332.68282758   374.05438751]
New Q values:  [-9594.56523706 -8069.05606225  2332.68282758   963.86044141]
Reward: -1  Episode Reward:  23
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  2716.12895468 -2165.66138672  -180.6       ]
------
Step:38, Action:South
State  109
Old Q Values:  [ -241.10880094  2716.12895468 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  2477.77299406 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  22
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1269.28539092  927.40283941 4639.73804065  262.76946019]
------
Step:39, Action:East
State  177
Old Q Values:  [33510.55723334 23344.73803738 16155.09385617     0.        ]
New Q values:  [33510.55723334 23344.73803738 76121.72496473     0.        ]
Reward: 100009  Episode Reward:  100031
xxxxx
x   x
x agx
x   x
xxxxx
Episode # 1100
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  7545.63566344 -2383.80019164  2264.7628594 ]
------
Step:1, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.15105978e+03 -3.22965309e-01  5.70423372e+02]
New Q values:  [ 8.43634063e+00  3.76001360e+03 -3.22965309e-01  5.70423372e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1940.28844079  9647.29896653 -8896.20691497   637.30368728]
------
Step:2, Action:South
State  210
Old Q Values:  [2332.51633025 1069.97627293  790.72804752 1050.85266124]
New Q values:  [2332.51633025 5616.85419579  790.72804752 1050.85266124]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17278.21228872 -6442.16912869 -8192.20126966  4308.39211977]
------
Step:3, Action:North
State  288
Old Q Values:  [17278.21228872 -6442.16912869 -8192.20126966  4308.39211977]
New Q values:  [ 9620.06208134 -6442.16912869 -8192.20126966  4308.39211977]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7238.13842458  9031.2572195  -4584.50430574 -1713.91177491]
------
Step:4, Action:South
State  210
Old Q Values:  [2332.51633025 5616.85419579  790.72804752 1050.85266124]
New Q values:  [2332.51633025 5132.16030272  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9620.06208134 -6442.16912869 -8192.20126966  4308.39211977]
------
Step:5, Action:North
State  288
Old Q Values:  [ 9620.06208134 -6442.16912869 -8192.20126966  4308.39211977]
New Q values:  [ 6556.80199839 -6442.16912869 -8192.20126966  4308.39211977]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7238.13842458  9031.2572195  -4584.50430574 -1713.91177491]
------
Step:6, Action:South
State  208
Old Q Values:  [ 7238.13842458  9031.2572195  -4584.50430574 -1713.91177491]
New Q values:  [ 7238.13842458  5578.94348732 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  14
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6556.80199839 -6442.16912869 -8192.20126966  4308.39211977]
------
Step:7, Action:North
State  288
Old Q Values:  [ 6556.80199839 -6442.16912869 -8192.20126966  4308.39211977]
New Q values:  [ 4161.76889017 -6442.16912869 -8192.20126966  4308.39211977]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2332.51633025 5132.16030272  790.72804752 1050.85266124]
------
Step:8, Action:South
State  208
Old Q Values:  [ 7238.13842458  5578.94348732 -4584.50430574 -1713.91177491]
New Q values:  [ 7238.13842458  3523.49503086 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4161.76889017 -6442.16912869 -8192.20126966  4308.39211977]
------
Step:9, Action:West
State  288
Old Q Values:  [ 4161.76889017 -6442.16912869 -8192.20126966  4308.39211977]
New Q values:  [ 4161.76889017 -6442.16912869 -8192.20126966  4016.47891129]
Reward: 9  Episode Reward:  21
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  6667.28012454  7625.74021128]
------
Step:10, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  6667.28012454  7625.74021128]
New Q values:  [  175.14749589 -8521.23367799  6667.28012454 11084.90966748]
Reward: 9  Episode Reward:  30
xxxxx
x.  x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[26764.04527655 12764.58618105  7782.63176044  1875.31501677]
------
Step:11, Action:North
State  257
Old Q Values:  [26764.04527655 12764.58618105  7782.63176044  1875.31501677]
New Q values:  [33547.53560004 12764.58618105  7782.63176044  1875.31501677]
Reward: 9  Episode Reward:  39
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 23344.73803738 76121.72496473     0.        ]
------
Step:12, Action:North
State  180
Old Q Values:  [  179.68113896  8050.99445658  6371.288895   -4966.32149798]
New Q values:  [  160.62456153  8050.99445658  6371.288895   -4966.32149798]
Reward: 9  Episode Reward:  48
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -1296.98273838   277.84035317  -180.6       ]
------
Step:13, Action:East
State  110
Old Q Values:  [ -239.29051573 -1296.98273838   277.84035317  -180.6       ]
New Q values:  [ -239.29051573 -1296.98273838   641.01884151  -180.6       ]
Reward: -1  Episode Reward:  47
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1768.27566749   210.47853695]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1768.27566749   210.47853695]
New Q values:  [ -281.736      -1150.91067548  1834.71434751   210.47853695]
Reward: -1  Episode Reward:  46
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.76001360e+03 -3.22965309e-01  5.70423372e+02]
------
Step:15, Action:South
State  130
Old Q Values:  [26266.584521    8725.54032203  -180.00807518 89744.07141643]
New Q values:  [26266.584521    5661.05765619  -180.00807518 89744.07141643]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7238.13842458  3523.49503086 -4584.50430574 -1713.91177491]
------
Step:16, Action:North
State  208
Old Q Values:  [ 7238.13842458  3523.49503086 -4584.50430574 -1713.91177491]
New Q values:  [ 4022.65945034  3523.49503086 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  44
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.76001360e+03 -3.22965309e-01  5.70423372e+02]
------
Step:17, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.76001360e+03 -3.22965309e-01  5.70423372e+02]
New Q values:  [ 8.43634063e+00  3.04305353e+03 -3.22965309e-01  5.70423372e+02]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2332.51633025 5132.16030272  790.72804752 1050.85266124]
------
Step:18, Action:South
State  210
Old Q Values:  [2332.51633025 5132.16030272  790.72804752 1050.85266124]
New Q values:  [2332.51633025 3300.79478814  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4161.76889017 -6442.16912869 -8192.20126966  4016.47891129]
------
Step:19, Action:North
State  288
Old Q Values:  [ 4161.76889017 -6442.16912869 -8192.20126966  4016.47891129]
New Q values:  [ 2654.34599251 -6442.16912869 -8192.20126966  4016.47891129]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2332.51633025 3300.79478814  790.72804752 1050.85266124]
------
Step:20, Action:South
State  210
Old Q Values:  [2332.51633025 3300.79478814  790.72804752 1050.85266124]
New Q values:  [2332.51633025 2524.66158864  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2654.34599251 -6442.16912869 -8192.20126966  4016.47891129]
------
Step:21, Action:North
State  288
Old Q Values:  [ 2654.34599251 -6442.16912869 -8192.20126966  4016.47891129]
New Q values:  [ 1818.5368736  -6442.16912869 -8192.20126966  4016.47891129]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x .ax
x  gx
xxxxx
Step:22, Action:North
State  208
Old Q Values:  [ 4022.65945034  3523.49503086 -4584.50430574 -1713.91177491]
New Q values:  [ 2521.37983959  3523.49503086 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.04305353e+03 -3.22965309e-01  5.70423372e+02]
------
Step:23, Action:West
State  136
Old Q Values:  [ -170.77177351  7545.63566344 -2383.80019164  2264.7628594 ]
New Q values:  [ -170.77177351  7545.63566344 -2383.80019164  1029.92139487]
Reward: -1  Episode Reward:  37
xxxxx
x agx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:24, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   587.46475671   388.74590699]
New Q values:  [ -253.44886264 -1902.20915811   587.46475671   817.27350121]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2207.91712804 1075.50510433 -120.29354603]
------
Step:25, Action:South
State  109
Old Q Values:  [ -241.10880094  2477.77299406 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  2382.43060982 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1269.28539092  927.40283941 4639.73804065  262.76946019]
------
Step:26, Action:East
State  177
Old Q Values:  [33510.55723334 23344.73803738 76121.72496473     0.        ]
New Q values:  [33510.55723334 23344.73803738 92932.7738045      0.        ]
Reward: 100009  Episode Reward:  100044
xxxxx
x g x
x a x
x   x
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   587.46475671   817.27350121]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1834.71434751   210.47853695]
New Q values:  [ -281.736      -1150.91067548  1834.71434751   209.91941864]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x...x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  401.09334621  391.08740767 -252.78192178]
------
Step:2, Action:South
State  110
Old Q Values:  [ -239.29051573 -1296.98273838   641.01884151  -180.6       ]
New Q values:  [-239.29051573  915.74185405  641.01884151 -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa..x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  4763.78316466     0.        ]
------
Step:3, Action:East
State  180
Old Q Values:  [  160.62456153  8050.99445658  6371.288895   -4966.32149798]
New Q values:  [  160.62456153  8050.99445658  5371.10146515 -4966.32149798]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
xga.x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  6635.39049193  9390.61969049  3512.17261526]
------
Step:4, Action:East
State  200
Old Q Values:  [   62.8218634  13747.50885976  2186.25196094   568.38654082]
New Q values:  [   62.8218634  13747.50885976  3774.09047434   568.38654082]
Reward: 9  Episode Reward:  36
xxxxx
xg .x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1940.28844079  9647.29896653 -8896.20691497   637.30368728]
------
Step:5, Action:South
State  216
Old Q Values:  [ 1940.28844079  9647.29896653 -8896.20691497   637.30368728]
New Q values:  [ 1940.28844079  5063.26326    -8896.20691497   637.30368728]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1818.5368736  -6442.16912869 -8192.20126966  4016.47891129]
------
Step:6, Action:West
State  288
Old Q Values:  [ 1818.5368736  -6442.16912869 -8192.20126966  4016.47891129]
New Q values:  [ 1818.5368736  -6442.16912869 -8192.20126966  4937.46446476]
Reward: 9  Episode Reward:  44
xxxxx
xg .x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  6667.28012454 11084.90966748]
------
Step:7, Action:West
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197  2852.58071828  1711.488588  ]
New Q values:  [ 7058.83631802 -5807.06396197  2852.58071828  1468.8503263 ]
Reward: 9  Episode Reward:  53
xxxxx
x  .x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1538.55279863 -5704.51612281  2596.18297033 -5679.36893145]
------
Step:8, Action:East
State  261
Old Q Values:  [2203.81122122   26.73544252 3277.50110804  123.6214372 ]
New Q values:  [2203.81122122   26.73544252 3428.05133862  123.6214372 ]
Reward: -1  Episode Reward:  52
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197  2852.58071828  1468.8503263 ]
------
Step:9, Action:East
State  277
Old Q Values:  [   1.64433       0.          315.20206867 1140.34850333]
New Q values:  [   1.64433       0.         1606.7201669  1140.34850333]
Reward: -1  Episode Reward:  51
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1818.5368736  -6442.16912869 -8192.20126966  4937.46446476]
------
Step:10, Action:West
State  288
Old Q Values:  [ 1818.5368736  -6442.16912869 -8192.20126966  4937.46446476]
New Q values:  [ 1818.5368736  -6442.16912869 -8192.20126966  4092.03668131]
Reward: -1  Episode Reward:  50
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197  2852.58071828  1468.8503263 ]
------
Step:11, Action:East
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197  2852.58071828  1468.8503263 ]
New Q values:  [ 7058.83631802 -5807.06396197  2368.0432917   1468.8503263 ]
Reward: -1  Episode Reward:  49
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1818.5368736  -6442.16912869 -8192.20126966  4092.03668131]
------
Step:12, Action:West
State  288
Old Q Values:  [ 1818.5368736  -6442.16912869 -8192.20126966  4092.03668131]
New Q values:  [ 1818.5368736  -6442.16912869 -8192.20126966  3753.86556793]
Reward: -1  Episode Reward:  48
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197  2368.0432917   1468.8503263 ]
------
Step:13, Action:East
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197  2368.0432917   1468.8503263 ]
New Q values:  [ 7058.83631802 -5807.06396197  2072.77698706  1468.8503263 ]
Reward: -1  Episode Reward:  47
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1818.5368736  -6442.16912869 -8192.20126966  3753.86556793]
------
Step:14, Action:West
State  288
Old Q Values:  [ 1818.5368736  -6442.16912869 -8192.20126966  3753.86556793]
New Q values:  [ 1818.5368736  -6442.16912869 -8192.20126966  3618.59712258]
Reward: -1  Episode Reward:  46
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197  2072.77698706  1468.8503263 ]
------
Step:15, Action:East
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197  2072.77698706  1468.8503263 ]
New Q values:  [ 7058.83631802 -5807.06396197  1914.0899316   1468.8503263 ]
Reward: -1  Episode Reward:  45
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1818.5368736  -6442.16912869 -8192.20126966  3618.59712258]
------
Step:16, Action:West
State  288
Old Q Values:  [ 1818.5368736  -6442.16912869 -8192.20126966  3618.59712258]
New Q values:  [ 1818.5368736  -6442.16912869 -8192.20126966  5772.9256196 ]
Reward: -1  Episode Reward:  44
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 14420.28923523]
------
Step:17, Action:West
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197  1914.0899316   1468.8503263 ]
New Q values:  [ 7058.83631802 -5807.06396197  1914.0899316   1615.35553211]
Reward: -1  Episode Reward:  43
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2203.81122122   26.73544252 3428.05133862  123.6214372 ]
------
Step:18, Action:East
State  257
Old Q Values:  [33547.53560004 12764.58618105  7782.63176044  1875.31501677]
New Q values:  [33547.53560004 12764.58618105  7438.53947475  1875.31501677]
Reward: -1  Episode Reward:  42
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 14420.28923523]
------
Step:19, Action:West
State  277
Old Q Values:  [   1.64433       0.         1606.7201669  1140.34850333]
New Q values:  [   1.64433       0.         1606.7201669  1483.95480292]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2203.81122122   26.73544252 3428.05133862  123.6214372 ]
------
Step:20, Action:East
State  261
Old Q Values:  [2203.81122122   26.73544252 3428.05133862  123.6214372 ]
New Q values:  [2203.81122122   26.73544252 3488.27143085  123.6214372 ]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197  1914.0899316   1615.35553211]
------
Step:21, Action:East
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197  1914.0899316   1615.35553211]
New Q values:  [ 7058.83631802 -5807.06396197  2496.91365852  1615.35553211]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1818.5368736  -6442.16912869 -8192.20126966  5772.9256196 ]
------
Step:22, Action:North
State  288
Old Q Values:  [ 1818.5368736  -6442.16912869 -8192.20126966  5772.9256196 ]
New Q values:  [ 1484.21322603 -6442.16912869 -8192.20126966  5772.9256196 ]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2332.51633025 2524.66158864  790.72804752 1050.85266124]
------
Step:23, Action:South
State  210
Old Q Values:  [2332.51633025 2524.66158864  790.72804752 1050.85266124]
New Q values:  [2332.51633025 2741.14232134  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  37
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1484.21322603 -6442.16912869 -8192.20126966  5772.9256196 ]
------
Step:24, Action:North
State  288
Old Q Values:  [ 1484.21322603 -6442.16912869 -8192.20126966  5772.9256196 ]
New Q values:  [ 1650.13379967 -6442.16912869 -8192.20126966  5772.9256196 ]
Reward: -1  Episode Reward:  36
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2521.37983959  3523.49503086 -4584.50430574 -1713.91177491]
------
Step:25, Action:South
State  208
Old Q Values:  [ 2521.37983959  3523.49503086 -4584.50430574 -1713.91177491]
New Q values:  [ 2521.37983959  3140.67569822 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1650.13379967 -6442.16912869 -8192.20126966  5772.9256196 ]
------
Step:26, Action:West
State  288
Old Q Values:  [ 1650.13379967 -6442.16912869 -8192.20126966  5772.9256196 ]
New Q values:  [ 1650.13379967 -6442.16912869 -8192.20126966  6634.65701841]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  4979.82966255 14420.28923523]
------
Step:27, Action:West
State  277
Old Q Values:  [   1.64433       0.         1606.7201669  1483.95480292]
New Q values:  [   1.64433       0.         1606.7201669  1639.46335042]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2203.81122122   26.73544252 3488.27143085  123.6214372 ]
------
Step:28, Action:East
State  261
Old Q Values:  [2203.81122122   26.73544252 3488.27143085  123.6214372 ]
New Q values:  [2203.81122122   26.73544252 1886.54757747  123.6214372 ]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.         1606.7201669  1639.46335042]
------
Step:29, Action:West
State  277
Old Q Values:  [   1.64433       0.         1606.7201669  1639.46335042]
New Q values:  [   1.64433       0.         1606.7201669  1316.32870654]
Reward: -1  Episode Reward:  31
xxxxx
x  .x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2203.81122122   26.73544252 1886.54757747  123.6214372 ]
------
Step:30, Action:North
State  261
Old Q Values:  [2203.81122122   26.73544252 1886.54757747  123.6214372 ]
New Q values:  [2502.05380461   26.73544252 1886.54757747  123.6214372 ]
Reward: -1  Episode Reward:  30
xxxxx
x  .x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1042.28048424 1686.11868129 5403.76438706 1554.80203889]
------
Step:31, Action:East
State  181
Old Q Values:  [1269.28539092  927.40283941 4639.73804065  262.76946019]
New Q values:  [ 1269.28539092   927.40283941 -1327.5188766    262.76946019]
Reward: -10001  Episode Reward:  -9971
xxxxx
x  .x
x g x
x   x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:1, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   587.46475671   817.27350121]
New Q values:  [ -253.44886264 -1902.20915811   587.46475671   994.68453889]
Reward: 9  Episode Reward:  9
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2207.91712804 1075.50510433 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 2207.91712804 1075.50510433 -120.29354603]
New Q values:  [-177.44732869 2638.45040486 1075.50510433 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1400.74270712 5832.94517882  154.04646645]
------
Step:3, Action:South
State  181
Old Q Values:  [ 1269.28539092   927.40283941 -1327.5188766    262.76946019]
New Q values:  [ 1269.28539092  1126.97727715 -1327.5188766    262.76946019]
Reward: 9  Episode Reward:  27
xxxxx
x g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2502.05380461   26.73544252 1886.54757747  123.6214372 ]
------
Step:4, Action:North
State  261
Old Q Values:  [2502.05380461   26.73544252 1886.54757747  123.6214372 ]
New Q values:  [2750.10507549   26.73544252 1886.54757747  123.6214372 ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1400.74270712 5832.94517882  154.04646645]
------
Step:5, Action:South
State  189
Old Q Values:  [ 275.08817949 1400.74270712 5832.94517882  154.04646645]
New Q values:  [ 275.08817949 1384.72860549 5832.94517882  154.04646645]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2750.10507549   26.73544252 1886.54757747  123.6214372 ]
------
Step:6, Action:North
State  261
Old Q Values:  [2750.10507549   26.73544252 1886.54757747  123.6214372 ]
New Q values:  [2849.32558384   26.73544252 1886.54757747  123.6214372 ]
Reward: -1  Episode Reward:  24
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1384.72860549 5832.94517882  154.04646645]
------
Step:7, Action:South
State  181
Old Q Values:  [ 1269.28539092  1126.97727715 -1327.5188766    262.76946019]
New Q values:  [ 1269.28539092  1304.98858601 -1327.5188766    262.76946019]
Reward: -1  Episode Reward:  23
xxxxx
x g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2849.32558384   26.73544252 1886.54757747  123.6214372 ]
------
Step:8, Action:North
State  261
Old Q Values:  [2849.32558384   26.73544252 1886.54757747  123.6214372 ]
New Q values:  [2889.01378718   26.73544252 1886.54757747  123.6214372 ]
Reward: -1  Episode Reward:  22
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1384.72860549 5832.94517882  154.04646645]
------
Step:9, Action:South
State  189
Old Q Values:  [ 275.08817949 1384.72860549 5832.94517882  154.04646645]
New Q values:  [ 275.08817949 1419.99557835 5832.94517882  154.04646645]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2889.01378718   26.73544252 1886.54757747  123.6214372 ]
------
Step:10, Action:North
State  261
Old Q Values:  [2889.01378718   26.73544252 1886.54757747  123.6214372 ]
New Q values:  [2904.88906852   26.73544252 1886.54757747  123.6214372 ]
Reward: -1  Episode Reward:  20
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1419.99557835 5832.94517882  154.04646645]
------
Step:11, Action:South
State  189
Old Q Values:  [ 275.08817949 1419.99557835 5832.94517882  154.04646645]
New Q values:  [ 275.08817949 1438.8649519  5832.94517882  154.04646645]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2904.88906852   26.73544252 1886.54757747  123.6214372 ]
------
Step:12, Action:North
State  261
Old Q Values:  [2904.88906852   26.73544252 1886.54757747  123.6214372 ]
New Q values:  [2782.48494353   26.73544252 1886.54757747  123.6214372 ]
Reward: -1  Episode Reward:  18
xxxxx
x   x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1042.28048424 1686.11868129 5403.76438706 1554.80203889]
------
Step:13, Action:East
State  189
Old Q Values:  [ 275.08817949 1438.8649519  5832.94517882  154.04646645]
New Q values:  [ 275.08817949 1438.8649519  2996.15271932  154.04646645]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[   2.33354578 2191.91549263 -501.63979658  200.3419716 ]
------
Step:14, Action:South
State  196
Old Q Values:  [-2469.90645144  6635.39049193  9390.61969049  3512.17261526]
New Q values:  [-2469.90645144  4777.20709218  9390.61969049  3512.17261526]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197  2496.91365852  1615.35553211]
------
Step:15, Action:East
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  4979.82966255 14420.28923523]
New Q values:  [ 3915.56039739  -168.92307549  3987.72897054 14420.28923523]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1650.13379967 -6442.16912869 -8192.20126966  6634.65701841]
------
Step:16, Action:West
State  288
Old Q Values:  [ 1650.13379967 -6442.16912869 -8192.20126966  6634.65701841]
New Q values:  [ 1650.13379967 -6442.16912869 -8192.20126966  4770.91370277]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197  2496.91365852  1615.35553211]
------
Step:17, Action:East
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197  2496.91365852  1615.35553211]
New Q values:  [ 7058.83631802 -5807.06396197  2429.43957424  1615.35553211]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1650.13379967 -6442.16912869 -8192.20126966  4770.91370277]
------
Step:18, Action:West
State  288
Old Q Values:  [ 1650.13379967 -6442.16912869 -8192.20126966  4770.91370277]
New Q values:  [ 1650.13379967 -6442.16912869 -8192.20126966  4025.41637651]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197  2429.43957424  1615.35553211]
------
Step:19, Action:East
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197  2429.43957424  1615.35553211]
New Q values:  [ 7058.83631802 -5807.06396197  2178.80074265  1615.35553211]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1650.13379967 -6442.16912869 -8192.20126966  4025.41637651]
------
Step:20, Action:West
State  288
Old Q Values:  [ 1650.13379967 -6442.16912869 -8192.20126966  4025.41637651]
New Q values:  [ 1650.13379967 -6442.16912869 -8192.20126966  2091.58260067]
Reward: -1  Episode Reward:  40
xxxxx
x  gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.         1606.7201669  1316.32870654]
------
Step:21, Action:East
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  3987.72897054 14420.28923523]
New Q values:  [ 3915.56039739  -168.92307549  2221.96636842 14420.28923523]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1650.13379967 -6442.16912869 -8192.20126966  2091.58260067]
------
Step:22, Action:West
State  288
Old Q Values:  [ 1650.13379967 -6442.16912869 -8192.20126966  2091.58260067]
New Q values:  [ 1650.13379967 -6442.16912869 -8192.20126966  1318.04909034]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.         1606.7201669  1316.32870654]
------
Step:23, Action:West
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197  2178.80074265  1615.35553211]
New Q values:  [ 7058.83631802 -5807.06396197  2178.80074265  1480.2876959 ]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2782.48494353   26.73544252 1886.54757747  123.6214372 ]
------
Step:24, Action:North
State  261
Old Q Values:  [2782.48494353   26.73544252 1886.54757747  123.6214372 ]
New Q values:  [2733.52329353   26.73544252 1886.54757747  123.6214372 ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1042.28048424 1686.11868129 5403.76438706 1554.80203889]
------
Step:25, Action:East
State  183
Old Q Values:  [1042.28048424 1686.11868129 5403.76438706 1554.80203889]
New Q values:  [1042.28048424 1686.11868129 7294.46423529 1554.80203889]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.71118616e+04  0.00000000e+00]
------
Step:26, Action:East
State  192
Old Q Values:  [3.89777037e-01 8.17181197e+03 8.26227940e+03 7.32028793e+03]
New Q values:  [3.89777037e-01 8.17181197e+03 6.42525145e+04 7.32028793e+03]
Reward: 100009  Episode Reward:  100044
xxxxx
x   x
x gax
x   x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1042.28048424 1686.11868129 7294.46423529 1554.80203889]
------
Step:1, Action:East
State  181
Old Q Values:  [ 1269.28539092  1304.98858601 -1327.5188766    262.76946019]
New Q values:  [ 1269.28539092  1304.98858601 12750.14678965   262.76946019]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x g.x
x. .x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2332.51633025 2741.14232134  790.72804752 1050.85266124]
------
Step:1, Action:South
State  210
Old Q Values:  [2332.51633025 2741.14232134  790.72804752 1050.85266124]
New Q values:  [2332.51633025 1596.89706844  790.72804752 1050.85266124]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1650.13379967 -6442.16912869 -8192.20126966  1318.04909034]
------
Step:2, Action:North
State  288
Old Q Values:  [ 1650.13379967 -6442.16912869 -8192.20126966  1318.04909034]
New Q values:  [ 1359.20841894 -6442.16912869 -8192.20126966  1318.04909034]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2332.51633025 1596.89706844  790.72804752 1050.85266124]
------
Step:3, Action:North
State  208
Old Q Values:  [ 2521.37983959  3140.67569822 -4584.50430574 -1713.91177491]
New Q values:  [27937.17336076  3140.67569822 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  17
xxxxx
x..ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 89744.07141643]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  3.04305353e+03 -3.22965309e-01  5.70423372e+02]
New Q values:  [ 8.43634063e+00  3.04305353e+03 -3.22965309e-01  7.83983653e+02]
Reward: 9  Episode Reward:  26
xxxxx
x.a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1834.71434751   209.91941864]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1834.71434751   209.91941864]
New Q values:  [ -281.736      -1150.91067548  1646.20179845   209.91941864]
Reward: -1  Episode Reward:  25
xxxxx
x. ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.04305353e+03 -3.22965309e-01  7.83983653e+02]
------
Step:6, Action:South
State  136
Old Q Values:  [ -170.77177351  7545.63566344 -2383.80019164  1029.92139487]
New Q values:  [ -170.77177351 11398.8062736  -2383.80019164  1029.92139487]
Reward: -1  Episode Reward:  24
xxxxx
x.g x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27937.17336076  3140.67569822 -4584.50430574 -1713.91177491]
------
Step:7, Action:North
State  208
Old Q Values:  [27937.17336076  3140.67569822 -4584.50430574 -1713.91177491]
New Q values:  [12087.18540375  3140.67569822 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  23
xxxxx
x. ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.04305353e+03 -3.22965309e-01  7.83983653e+02]
------
Step:8, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.04305353e+03 -3.22965309e-01  7.83983653e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  7.83983653e+02]
Reward: -10001  Episode Reward:  -9978
xxxxx
x.  x
x..gx
x.  x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1359.20841894 -6442.16912869 -8192.20126966  1318.04909034]
------
Step:1, Action:North
State  288
Old Q Values:  [ 1359.20841894 -6442.16912869 -8192.20126966  1318.04909034]
New Q values:  [ 1248.83826665 -6442.16912869 -8192.20126966  1318.04909034]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2332.51633025 1596.89706844  790.72804752 1050.85266124]
------
Step:2, Action:North
State  210
Old Q Values:  [2332.51633025 1596.89706844  790.72804752 1050.85266124]
New Q values:  [27861.62795703  1596.89706844   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 89744.07141643]
------
Step:3, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  7.83983653e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  8.12854001e+02]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1646.20179845   209.91941864]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1646.20179845   209.91941864]
New Q values:  [ -281.736      -1150.91067548   901.7369196    209.91941864]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  8.12854001e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  8.12854001e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.95062676e+02]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   901.7369196    209.91941864]
------
Step:6, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2332.68282758   963.86044141]
New Q values:  [-9594.56523706 -8069.05606225  4352.11501311   963.86044141]
Reward: -1  Episode Reward:  24
xxxxx
xg ax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 11398.8062736  -2383.80019164  1029.92139487]
------
Step:7, Action:South
State  136
Old Q Values:  [ -170.77177351 11398.8062736  -2383.80019164  1029.92139487]
New Q values:  [ -170.77177351  8185.07813057 -2383.80019164  1029.92139487]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12087.18540375  3140.67569822 -4584.50430574 -1713.91177491]
------
Step:8, Action:North
State  208
Old Q Values:  [12087.18540375  3140.67569822 -4584.50430574 -1713.91177491]
New Q values:  [ 1289.79760067  3140.67569822 -4584.50430574 -1713.91177491]
Reward: -10001  Episode Reward:  -9978
xxxxx
x. gx
x.  x
x.. x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  6667.28012454 11084.90966748]
------
Step:1, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  6667.28012454 11084.90966748]
New Q values:  [  175.14749589 -8521.23367799  3067.72677692 11084.90966748]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1248.83826665 -6442.16912869 -8192.20126966  1318.04909034]
------
Step:2, Action:West
State  288
Old Q Values:  [ 1248.83826665 -6442.16912869 -8192.20126966  1318.04909034]
New Q values:  [ 1248.83826665 -6442.16912869 -8192.20126966  3852.09253638]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x...x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  3067.72677692 11084.90966748]
------
Step:3, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  3067.72677692 11084.90966748]
New Q values:  [  175.14749589 -8521.23367799  2382.11847168 11084.90966748]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1248.83826665 -6442.16912869 -8192.20126966  3852.09253638]
------
Step:4, Action:North
State  288
Old Q Values:  [ 1248.83826665 -6442.16912869 -8192.20126966  3852.09253638]
New Q values:  [ 8863.42369377 -6442.16912869 -8192.20126966  3852.09253638]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[27861.62795703  1596.89706844   790.72804752  1050.85266124]
------
Step:5, Action:North
State  208
Old Q Values:  [ 1289.79760067  3140.67569822 -4584.50430574 -1713.91177491]
New Q values:  [27444.5404652   3140.67569822 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  25
xxxxx
x..ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 89744.07141643]
------
Step:6, Action:West
State  136
Old Q Values:  [ -170.77177351  8185.07813057 -2383.80019164  1029.92139487]
New Q values:  [ -170.77177351  8185.07813057 -2383.80019164   541.98480906]
Reward: 9  Episode Reward:  34
xxxxx
x.agx
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   415.3875037 ]
------
Step:7, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4352.11501311   963.86044141]
New Q values:  [-9594.56523706 -8069.05606225  4352.11501311  1164.54189839]
Reward: 9  Episode Reward:  43
xxxxx
xag x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        2578.65907274  199.03311984    0.        ]
------
Step:8, Action:South
State  108
Old Q Values:  [-8463.16477134  2610.77248427  2103.93174247     0.        ]
New Q values:  [-8463.16477134  3465.00733068  2103.93174247     0.        ]
Reward: 9  Episode Reward:  52
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  160.62456153  8050.99445658  5371.10146515 -4966.32149798]
------
Step:9, Action:South
State  180
Old Q Values:  [  160.62456153  8050.99445658  5371.10146515 -4966.32149798]
New Q values:  [  160.62456153  3998.65267373  5371.10146515 -4966.32149798]
Reward: -1  Episode Reward:  51
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1538.55279863 -5704.51612281  2596.18297033 -5679.36893145]
------
Step:10, Action:East
State  260
Old Q Values:  [ 1538.55279863 -5704.51612281  2596.18297033 -5679.36893145]
New Q values:  [ 1538.55279863 -5704.51612281  4363.34608838 -5679.36893145]
Reward: -1  Episode Reward:  50
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2382.11847168 11084.90966748]
------
Step:11, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2382.11847168 11084.90966748]
New Q values:  [  175.14749589 -8521.23367799  2382.11847168  5253.42085505]
Reward: -1  Episode Reward:  49
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2733.52329353   26.73544252 1886.54757747  123.6214372 ]
------
Step:12, Action:North
State  257
Old Q Values:  [33547.53560004 12764.58618105  7438.53947475  1875.31501677]
New Q values:  [41298.24638137 12764.58618105  7438.53947475  1875.31501677]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 23344.73803738 92932.7738045      0.        ]
------
Step:13, Action:North
State  181
Old Q Values:  [ 1269.28539092  1304.98858601 12750.14678965   262.76946019]
New Q values:  [ 1221.84333931  1304.98858601 12750.14678965   262.76946019]
Reward: -1  Episode Reward:  47
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  2382.43060982 -2165.66138672  -180.6       ]
------
Step:14, Action:South
State  109
Old Q Values:  [ -241.10880094  2382.43060982 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  4777.41628082 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  46
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1221.84333931  1304.98858601 12750.14678965   262.76946019]
------
Step:15, Action:East
State  177
Old Q Values:  [33510.55723334 23344.73803738 92932.7738045      0.        ]
New Q values:  [ 33510.55723334  23344.73803738 106832.79694406      0.        ]
Reward: 100009  Episode Reward:  100055
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x.a.x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   901.7369196    209.91941864]
------
Step:1, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   587.46475671   994.68453889]
New Q values:  [ -253.44886264 -1902.20915811   418.90470554   994.68453889]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.95062676e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.95062676e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.35830432e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   418.90470554   994.68453889]
------
Step:3, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   418.90470554   994.68453889]
New Q values:  [ -253.44886264 -1902.20915811   418.90470554   523.60181942]
Reward: 9  Episode Reward:  17
xxxxx
xa  x
x...x
x. gx
xxxxx
Step:4, Action:East
State  107
Old Q Values:  [-252.35169558  401.09334621  391.08740767 -252.78192178]
New Q values:  [-252.35169558  401.09334621  312.9155089  -252.78192178]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   418.90470554   523.60181942]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   901.7369196    209.91941864]
New Q values:  [ -281.736      -1150.91067548   901.7369196    203.69577132]
Reward: -1  Episode Reward:  15
xxxxx
xa  x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  401.09334621  312.9155089  -252.78192178]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869 2638.45040486 1075.50510433 -120.29354603]
New Q values:  [-177.44732869 4885.82419884 1075.50510433 -120.29354603]
Reward: 9  Episode Reward:  24
xxxxx
x   x
xa.gx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1221.84333931  1304.98858601 12750.14678965   262.76946019]
------
Step:7, Action:East
State  181
Old Q Values:  [ 1221.84333931  1304.98858601 12750.14678965   262.76946019]
New Q values:  [1221.84333931 1304.98858601 1922.644623    262.76946019]
Reward: -9991  Episode Reward:  -9967
xxxxx
x   x
x g.x
x. .x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27444.5404652   3140.67569822 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [27444.5404652   3140.67569822 -4584.50430574 -1713.91177491]
New Q values:  [27444.5404652   3920.69738742 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8863.42369377 -6442.16912869 -8192.20126966  3852.09253638]
------
Step:2, Action:West
State  288
Old Q Values:  [ 8863.42369377 -6442.16912869 -8192.20126966  3852.09253638]
New Q values:  [ 8863.42369377 -6442.16912869 -8192.20126966  3122.26327107]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2382.11847168  5253.42085505]
------
Step:3, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2382.11847168  5253.42085505]
New Q values:  [  175.14749589 -8521.23367799  2382.11847168 14496.24225643]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41298.24638137 12764.58618105  7438.53947475  1875.31501677]
------
Step:4, Action:North
State  261
Old Q Values:  [2733.52329353   26.73544252 1886.54757747  123.6214372 ]
New Q values:  [1675.60270431   26.73544252 1886.54757747  123.6214372 ]
Reward: 9  Episode Reward:  36
xxxxx
x..gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1221.84333931 1304.98858601 1922.644623    262.76946019]
------
Step:5, Action:East
State  177
Old Q Values:  [ 33510.55723334  23344.73803738 106832.79694406      0.        ]
New Q values:  [33510.55723334 23344.73803738 52392.80619988     0.        ]
Reward: 9  Episode Reward:  45
xxxxx
x.. x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  6251.2513774  32180.9580742   1460.9765133 ]
------
Step:6, Action:South
State  195
Old Q Values:  [  38.85388605 6375.29362235 6726.56315938 1169.39963074]
New Q values:  [  38.85388605 6875.60421951 6726.56315938 1169.39963074]
Reward: -1  Episode Reward:  44
xxxxx
x.. x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  2221.96636842 14420.28923523]
------
Step:7, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2382.11847168 14496.24225643]
New Q values:  [  175.14749589 -8521.23367799  2382.11847168 18187.37081698]
Reward: -1  Episode Reward:  43
xxxxx
x.. x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41298.24638137 12764.58618105  7438.53947475  1875.31501677]
------
Step:8, Action:North
State  256
Old Q Values:  [46397.23173656 21390.00229459  7407.2956525    644.94785455]
New Q values:  [44917.26429693 21390.00229459  7407.2956525    644.94785455]
Reward: -1  Episode Reward:  42
xxxxx
x.. x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         87863.23867435     0.        ]
------
Step:9, Action:East
State  179
Old Q Values:  [16872.63888686 16101.90751562 94421.10502859     0.        ]
New Q values:  [16872.63888686 16101.90751562 41171.926679       0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x.. x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.13469489e+04  1.03161518e+03]
------
Step:10, Action:East
State  192
Old Q Values:  [3.89777037e-01 8.17181197e+03 6.42525145e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 8.17181197e+03 3.39337679e+04 7.32028793e+03]
Reward: -1  Episode Reward:  40
xxxxx
x.. x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27444.5404652   3920.69738742 -4584.50430574 -1713.91177491]
------
Step:11, Action:North
State  210
Old Q Values:  [27861.62795703  1596.89706844   790.72804752  1050.85266124]
New Q values:  [38067.27260774  1596.89706844   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  39
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 89744.07141643]
------
Step:12, Action:West
State  130
Old Q Values:  [26266.584521    5661.05765619  -180.00807518 89744.07141643]
New Q values:  [26266.584521    5661.05765619  -180.00807518 71938.98239207]
Reward: 9  Episode Reward:  48
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036    4378.49673879 120119.84608498]
------
Step:13, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036    4378.49673879 120119.84608498]
New Q values:  [  -180.6          3557.6642036    4378.49673879 125461.40947967]
Reward: 100009  Episode Reward:  100057
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1675.60270431   26.73544252 1886.54757747  123.6214372 ]
------
Step:1, Action:East
State  261
Old Q Values:  [1675.60270431   26.73544252 1886.54757747  123.6214372 ]
New Q values:  [1675.60270431   26.73544252 6216.23027608  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2382.11847168 18187.37081698]
------
Step:2, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2382.11847168 18187.37081698]
New Q values:  [  175.14749589 -8521.23367799  2382.11847168  9139.21740962]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x.. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1675.60270431   26.73544252 6216.23027608  123.6214372 ]
------
Step:3, Action:East
State  260
Old Q Values:  [ 1538.55279863 -5704.51612281  4363.34608838 -5679.36893145]
New Q values:  [ 1538.55279863 -5704.51612281  4486.50365824 -5679.36893145]
Reward: -1  Episode Reward:  7
xxxxx
xg..x
x.. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2382.11847168  9139.21740962]
------
Step:4, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2382.11847168  9139.21740962]
New Q values:  [  175.14749589 -8521.23367799  2382.11847168  5001.03806132]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xg. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1538.55279863 -5704.51612281  4486.50365824 -5679.36893145]
------
Step:5, Action:East
State  260
Old Q Values:  [ 1538.55279863 -5704.51612281  4486.50365824 -5679.36893145]
New Q values:  [ 1538.55279863 -5704.51612281  3294.31288169 -5679.36893145]
Reward: -1  Episode Reward:  5
xxxxx
xg..x
x.. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2382.11847168  5001.03806132]
------
Step:6, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2382.11847168  5001.03806132]
New Q values:  [  175.14749589 -8521.23367799  2382.11847168  2988.10908903]
Reward: -1  Episode Reward:  4
xxxxx
x...x
xg. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1538.55279863 -5704.51612281  3294.31288169 -5679.36893145]
------
Step:7, Action:East
State  260
Old Q Values:  [ 1538.55279863 -5704.51612281  3294.31288169 -5679.36893145]
New Q values:  [ 1538.55279863 -5704.51612281  2213.55787939 -5679.36893145]
Reward: -1  Episode Reward:  3
xxxxx
xg..x
x.. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2382.11847168  2988.10908903]
------
Step:8, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2382.11847168  2988.10908903]
New Q values:  [  175.14749589 -8521.23367799  2382.11847168  3059.51271844]
Reward: -1  Episode Reward:  2
xxxxx
x.g.x
x.. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1675.60270431   26.73544252 6216.23027608  123.6214372 ]
------
Step:9, Action:East
State  261
Old Q Values:  [1675.60270431   26.73544252 6216.23027608  123.6214372 ]
New Q values:  [1675.60270431   26.73544252 3403.74592596  123.6214372 ]
Reward: -1  Episode Reward:  1
xxxxx
x...x
x.g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2382.11847168  3059.51271844]
------
Step:10, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2382.11847168  3059.51271844]
New Q values:  [  175.14749589 -8521.23367799  2382.11847168  2244.32886516]
Reward: -1  Episode Reward:  0
xxxxx
x.g.x
x.. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1675.60270431   26.73544252 3403.74592596  123.6214372 ]
------
Step:11, Action:East
State  261
Old Q Values:  [1675.60270431   26.73544252 3403.74592596  123.6214372 ]
New Q values:  [1675.60270431   26.73544252 5686.98514096  123.6214372 ]
Reward: -1  Episode Reward:  -1
xxxxx
x..gx
x.. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 3915.56039739  -168.92307549  2221.96636842 14420.28923523]
------
Step:12, Action:West
State  273
Old Q Values:  [ 3915.56039739  -168.92307549  2221.96636842 14420.28923523]
New Q values:  [3915.56039739 -168.92307549 2221.96636842 7473.61123638]
Reward: -1  Episode Reward:  -2
xxxxx
x..gx
x.. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1675.60270431   26.73544252 5686.98514096  123.6214372 ]
------
Step:13, Action:East
State  261
Old Q Values:  [1675.60270431   26.73544252 5686.98514096  123.6214372 ]
New Q values:  [1675.60270431   26.73544252 2988.82959789  123.6214372 ]
Reward: -1  Episode Reward:  -3
xxxxx
x.g.x
x.. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  2382.11847168  2244.32886516]
------
Step:14, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  2382.11847168  2244.32886516]
New Q values:  [  175.14749589 -8521.23367799  3617.2744968   2244.32886516]
Reward: 9  Episode Reward:  6
xxxxx
x...x
x.g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8863.42369377 -6442.16912869 -8192.20126966  3122.26327107]
------
Step:15, Action:North
State  288
Old Q Values:  [ 8863.42369377 -6442.16912869 -8192.20126966  3122.26327107]
New Q values:  [ 5778.13161707 -6442.16912869 -8192.20126966  3122.26327107]
Reward: -10001  Episode Reward:  -9995
xxxxx
x...x
x..gx
x   x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5778.13161707 -6442.16912869 -8192.20126966  3122.26327107]
------
Step:1, Action:North
State  288
Old Q Values:  [ 5778.13161707 -6442.16912869 -8192.20126966  3122.26327107]
New Q values:  [10550.01478639 -6442.16912869 -8192.20126966  3122.26327107]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27444.5404652   3920.69738742 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  210
Old Q Values:  [38067.27260774  1596.89706844   790.72804752  1050.85266124]
New Q values:  [15393.05817274  1596.89706844   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  18
xxxxx
x .ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.35830432e+02]
------
Step:3, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.35830432e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.90253249e+02]
Reward: 9  Episode Reward:  27
xxxxx
x a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   901.7369196    203.69577132]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   418.90470554   523.60181942]
New Q values:  [ -253.44886264 -1902.20915811   314.03785683   523.60181942]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.90253249e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.90253249e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.66022375e+02]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   901.7369196    203.69577132]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   901.7369196    203.69577132]
New Q values:  [ -281.736      -1150.91067548   499.90148045   203.69577132]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.66022375e+02]
------
Step:7, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.66022375e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.42889496e+02]
Reward: -1  Episode Reward:  23
xxxxx
x a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   314.03785683   523.60181942]
------
Step:8, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   314.03785683   523.60181942]
New Q values:  [ -253.44886264 -1902.20915811   314.03785683   329.16873163]
Reward: -1  Episode Reward:  22
xxxxx
xa  x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  401.09334621  312.9155089  -252.78192178]
------
Step:9, Action:South
State  111
Old Q Values:  [-177.44732869 4885.82419884 1075.50510433 -120.29354603]
New Q values:  [-177.44732869 2536.52306644 1075.50510433 -120.29354603]
Reward: 9  Episode Reward:  31
xxxxx
x   x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1221.84333931 1304.98858601 1922.644623    262.76946019]
------
Step:10, Action:East
State  187
Old Q Values:  [ 320.07341842    0.         2827.90809619    0.        ]
New Q values:  [ 320.07341842    0.         3857.01124989    0.        ]
Reward: 9  Episode Reward:  40
xxxxx
x   x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 6.99551834e+03 3.50642584e+03 9.06816004e+03]
------
Step:11, Action:West
State  201
Old Q Values:  [   2.33354578 2191.91549263 -501.63979658  200.3419716 ]
New Q values:  [   2.33354578 2191.91549263 -501.63979658 1073.72671833]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xa gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 334.99458669    0.         3313.96643231 -178.98      ]
------
Step:12, Action:East
State  185
Old Q Values:  [ 334.99458669    0.         3313.96643231 -178.98      ]
New Q values:  [ 334.99458669    0.         -550.76076915 -178.98      ]
Reward: -10001  Episode Reward:  -9962
xxxxx
x   x
x g x
x.. x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1221.84333931 1304.98858601 1922.644623    262.76946019]
------
Step:1, Action:South
State  183
Old Q Values:  [1042.28048424 1686.11868129 7294.46423529 1554.80203889]
New Q values:  [1042.28048424 1576.49635188 7294.46423529 1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1675.60270431   26.73544252 2988.82959789  123.6214372 ]
------
Step:2, Action:North
State  261
Old Q Values:  [1675.60270431   26.73544252 2988.82959789  123.6214372 ]
New Q values:  [1246.43446863   26.73544252 2988.82959789  123.6214372 ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1221.84333931 1304.98858601 1922.644623    262.76946019]
------
Step:3, Action:South
State  181
Old Q Values:  [1221.84333931 1304.98858601 1922.644623    262.76946019]
New Q values:  [1221.84333931 1418.04431377 1922.644623    262.76946019]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1246.43446863   26.73544252 2988.82959789  123.6214372 ]
------
Step:4, Action:East
State  261
Old Q Values:  [1246.43446863   26.73544252 2988.82959789  123.6214372 ]
New Q values:  [1246.43446863   26.73544252 3443.01521007  123.6214372 ]
Reward: 9  Episode Reward:  16
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 2221.96636842 7473.61123638]
------
Step:5, Action:West
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197  2178.80074265  1480.2876959 ]
New Q values:  [ 7058.83631802 -5807.06396197  2178.80074265  1624.41964138]
Reward: -1  Episode Reward:  15
xxxxx
x.g.x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1246.43446863   26.73544252 3443.01521007  123.6214372 ]
------
Step:6, Action:East
State  261
Old Q Values:  [1246.43446863   26.73544252 3443.01521007  123.6214372 ]
New Q values:  [1246.43446863   26.73544252 3618.68945494  123.6214372 ]
Reward: -1  Episode Reward:  14
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 2221.96636842 7473.61123638]
------
Step:7, Action:West
State  273
Old Q Values:  [3915.56039739 -168.92307549 2221.96636842 7473.61123638]
New Q values:  [3915.56039739 -168.92307549 2221.96636842 4074.45133103]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1246.43446863   26.73544252 3618.68945494  123.6214372 ]
------
Step:8, Action:East
State  261
Old Q Values:  [1246.43446863   26.73544252 3618.68945494  123.6214372 ]
New Q values:  [1246.43446863   26.73544252 2669.21118129  123.6214372 ]
Reward: -1  Episode Reward:  12
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 2221.96636842 4074.45133103]
------
Step:9, Action:West
State  273
Old Q Values:  [3915.56039739 -168.92307549 2221.96636842 4074.45133103]
New Q values:  [3915.56039739 -168.92307549 2221.96636842 2429.9438868 ]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1246.43446863   26.73544252 2669.21118129  123.6214372 ]
------
Step:10, Action:East
State  261
Old Q Values:  [1246.43446863   26.73544252 2669.21118129  123.6214372 ]
New Q values:  [1246.43446863   26.73544252 3184.73536792  123.6214372 ]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197  2178.80074265  1624.41964138]
------
Step:11, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  3617.2744968   2244.32886516]
New Q values:  [  175.14749589 -8521.23367799  4617.31423464  2244.32886516]
Reward: 9  Episode Reward:  19
xxxxx
x...x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10550.01478639 -6442.16912869 -8192.20126966  3122.26327107]
------
Step:12, Action:North
State  288
Old Q Values:  [10550.01478639 -6442.16912869 -8192.20126966  3122.26327107]
New Q values:  [ 8843.32336638 -6442.16912869 -8192.20126966  3122.26327107]
Reward: 9  Episode Reward:  28
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[15393.05817274  1596.89706844   790.72804752  1050.85266124]
------
Step:13, Action:North
State  210
Old Q Values:  [15393.05817274  1596.89706844   790.72804752  1050.85266124]
New Q values:  [27744.31798672  1596.89706844   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  37
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 71938.98239207]
------
Step:14, Action:West
State  130
Old Q Values:  [26266.584521    5661.05765619  -180.00807518 71938.98239207]
New Q values:  [26266.584521    5661.05765619  -180.00807518 72805.83384576]
Reward: 9  Episode Reward:  46
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 146749.46962978]
------
Step:15, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036    4378.49673879 125461.40947967]
New Q values:  [  -180.6          3557.6642036    4378.49673879 125664.18490342]
Reward: 100009  Episode Reward:  100055
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.42889496e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.42889496e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  2.41306418e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   314.03785683   329.16873163]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   499.90148045   203.69577132]
New Q values:  [ -281.736      -1150.91067548   499.90148045   847.83522846]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2536.52306644 1075.50510433 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 2536.52306644 1075.50510433 -120.29354603]
New Q values:  [-177.44732869 3208.34849716 1075.50510433 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1042.28048424 1576.49635188 7294.46423529 1554.80203889]
------
Step:4, Action:East
State  183
Old Q Values:  [1042.28048424 1576.49635188 7294.46423529 1554.80203889]
New Q values:  [1042.28048424 1576.49635188 8056.74417458 1554.80203889]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.71118616e+04  0.00000000e+00]
------
Step:5, Action:East
State  200
Old Q Values:  [   62.8218634  13747.50885976  3774.09047434   568.38654082]
New Q values:  [   62.8218634  13747.50885976  3034.01516773   568.38654082]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x gax
x. .x
xxxxx
Step:6, Action:East
State  216
Old Q Values:  [ 1940.28844079  5063.26326    -8896.20691497   637.30368728]
New Q values:  [ 1940.28844079  5063.26326    -8220.10378799   637.30368728]
Reward: -10301  Episode Reward:  -10256
xxxxx
x   x
x  gx
x. .x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  2.41306418e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  2.41306418e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.56273136e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   499.90148045   847.83522846]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   314.03785683   329.16873163]
New Q values:  [ -253.44886264 -1902.20915811   314.03785683   257.39549651]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  401.09334621  312.9155089  -252.78192178]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 3208.34849716 1075.50510433 -120.29354603]
New Q values:  [-177.44732869 1865.53278577 1075.50510433 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xag.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1221.84333931 1418.04431377 1922.644623    262.76946019]
------
Step:4, Action:South
State  183
Old Q Values:  [1042.28048424 1576.49635188 8056.74417458 1554.80203889]
New Q values:  [1042.28048424 1591.41915113 8056.74417458 1554.80203889]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x ..x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1246.43446863   26.73544252 3184.73536792  123.6214372 ]
------
Step:5, Action:North
State  261
Old Q Values:  [1246.43446863   26.73544252 3184.73536792  123.6214372 ]
New Q values:  [2914.99703982   26.73544252 3184.73536792  123.6214372 ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1042.28048424 1591.41915113 8056.74417458 1554.80203889]
------
Step:6, Action:East
State  183
Old Q Values:  [1042.28048424 1591.41915113 8056.74417458 1554.80203889]
New Q values:  [1042.28048424 1591.41915113 8361.6561503  1554.80203889]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.71118616e+04  0.00000000e+00]
------
Step:7, Action:East
State  195
Old Q Values:  [  38.85388605 6875.60421951 6726.56315938 1169.39963074]
New Q values:  [   38.85388605  6875.60421951 11019.32065977  1169.39963074]
Reward: 9  Episode Reward:  53
xxxxx
x   x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[27744.31798672  1596.89706844   790.72804752  1050.85266124]
------
Step:8, Action:North
State  216
Old Q Values:  [ 1940.28844079  5063.26326    -8220.10378799   637.30368728]
New Q values:  [  882.39731702  5063.26326    -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  52
xxxxx
x  ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.56273136e+02]
------
Step:9, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.56273136e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.63512694e+02]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 1405.34479825 1040.89714496]
------
Step:10, Action:East
State  126
Old Q Values:  [   0.          331.64678262 1405.34479825 1040.89714496]
New Q values:  [   0.          331.64678262  730.59172742 1040.89714496]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.63512694e+02]
------
Step:11, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.63512694e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.37074221e+02]
Reward: -1  Episode Reward:  49
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262  730.59172742 1040.89714496]
------
Step:12, Action:West
State  124
Old Q Values:  [   0.         1166.51141701 3958.62044909  850.80874877]
New Q values:  [   0.         1166.51141701 3958.62044909 1772.94838375]
Reward: -1  Episode Reward:  48
xxxxx
xag x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  4777.41628082 -2165.66138672  -180.6       ]
------
Step:13, Action:South
State  109
Old Q Values:  [ -241.10880094  4777.41628082 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  2809.21232812 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1438.8649519  2996.15271932  154.04646645]
------
Step:14, Action:East
State  189
Old Q Values:  [ 275.08817949 1438.8649519  2996.15271932  154.04646645]
New Q values:  [ 275.08817949 1438.8649519  1524.75341865  154.04646645]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[   0.         1089.6411031     0.          198.38683706]
------
Step:15, Action:South
State  204
Old Q Values:  [   0.         1682.60895446 3674.84186411  441.58769553]
New Q values:  [   0.         2790.09447719 3674.84186411  441.58769553]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197  2178.80074265  1624.41964138]
------
Step:16, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  4617.31423464  2244.32886516]
New Q values:  [  175.14749589 -8521.23367799 64505.32270377  2244.32886516]
Reward: 100009  Episode Reward:  100054
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27444.5404652   3920.69738742 -4584.50430574 -1713.91177491]
------
Step:1, Action:North
State  216
Old Q Values:  [  882.39731702  5063.26326    -8220.10378799   637.30368728]
New Q values:  [ 2813.88236598  5063.26326    -8220.10378799   637.30368728]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  8185.07813057 -2383.80019164   541.98480906]
------
Step:2, Action:South
State  136
Old Q Values:  [ -170.77177351  8185.07813057 -2383.80019164   541.98480906]
New Q values:  [ -170.77177351 11506.79339179 -2383.80019164   541.98480906]
Reward: -1  Episode Reward:  8
xxxxx
xg. x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27444.5404652   3920.69738742 -4584.50430574 -1713.91177491]
------
Step:3, Action:North
State  216
Old Q Values:  [ 2813.88236598  5063.26326    -8220.10378799   637.30368728]
New Q values:  [ 1286.07521269  5063.26326    -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  7
xxxxx
x .ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.37074221e+02]
------
Step:4, Action:West
State  136
Old Q Values:  [ -170.77177351 11506.79339179 -2383.80019164   541.98480906]
New Q values:  [ -170.77177351 11506.79339179 -2383.80019164  1527.82842756]
Reward: 9  Episode Reward:  16
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  4352.11501311  1164.54189839]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   499.90148045   847.83522846]
New Q values:  [ -281.736      -1150.91067548   360.48285848   847.83522846]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.37074221e+02]
------
Step:6, Action:West
State  136
Old Q Values:  [ -170.77177351 11506.79339179 -2383.80019164  1527.82842756]
New Q values:  [ -170.77177351 11506.79339179 -2383.80019164  1916.16587496]
Reward: -1  Episode Reward:  14
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  4352.11501311  1164.54189839]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   360.48285848   847.83522846]
New Q values:  [ -281.736      -1150.91067548   304.71540969   847.83522846]
Reward: -1  Episode Reward:  13
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.37074221e+02]
------
Step:8, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.37074221e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.68580257e+02]
Reward: -1  Episode Reward:  12
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   304.71540969   847.83522846]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   304.71540969   847.83522846]
New Q values:  [ -281.736      -1150.91067548   304.71540969   613.2566476 ]
Reward: -1  Episode Reward:  11
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  915.74185405  641.01884151 -180.6       ]
------
Step:10, Action:East
State  110
Old Q Values:  [-239.29051573  915.74185405  641.01884151 -180.6       ]
New Q values:  [-239.29051573  915.74185405  439.78453088 -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   304.71540969   613.2566476 ]
------
Step:11, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   304.71540969   613.2566476 ]
New Q values:  [ -281.736      -1150.91067548   304.71540969   365.0306629 ]
Reward: -1  Episode Reward:  9
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  401.09334621  312.9155089  -252.78192178]
------
Step:12, Action:South
State  111
Old Q Values:  [-177.44732869 1865.53278577 1075.50510433 -120.29354603]
New Q values:  [-177.44732869 3260.1099594  1075.50510433 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x   x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1042.28048424 1591.41915113 8361.6561503  1554.80203889]
------
Step:13, Action:East
State  189
Old Q Values:  [ 275.08817949 1438.8649519  1524.75341865  154.04646645]
New Q values:  [ 275.08817949 1438.8649519  1272.87601525  154.04646645]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[   2.33354578 2191.91549263 -501.63979658 1073.72671833]
------
Step:14, Action:South
State  203
Old Q Values:  [3.60604218e+00 6.99551834e+03 3.50642584e+03 9.06816004e+03]
New Q values:  [3.60604218e+00 3.97827546e+03 3.50642584e+03 9.06816004e+03]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3915.56039739 -168.92307549 2221.96636842 2429.9438868 ]
------
Step:15, Action:North
State  273
Old Q Values:  [3915.56039739 -168.92307549 2221.96636842 2429.9438868 ]
New Q values:  [4286.07217037 -168.92307549 2221.96636842 2429.9438868 ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x a x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 3.97827546e+03 3.50642584e+03 9.06816004e+03]
------
Step:16, Action:West
State  201
Old Q Values:  [   2.33354578 2191.91549263 -501.63979658 1073.72671833]
New Q values:  [   2.33354578 2191.91549263 -501.63979658  529.38906334]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xa gx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 334.99458669    0.         -550.76076915 -178.98      ]
------
Step:17, Action:North
State  185
Old Q Values:  [ 334.99458669    0.         -550.76076915 -178.98      ]
New Q values:  [ 253.72583854    0.         -550.76076915 -178.98      ]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  401.09334621  312.9155089  -252.78192178]
------
Step:18, Action:South
State  107
Old Q Values:  [-252.35169558  401.09334621  312.9155089  -252.78192178]
New Q values:  [-252.35169558 1316.94071345  312.9155089  -252.78192178]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xa  x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         3857.01124989    0.        ]
------
Step:19, Action:East
State  187
Old Q Values:  [ 320.07341842    0.         3857.01124989    0.        ]
New Q values:  [ 320.07341842    0.         4262.65251137    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x a x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 3.97827546e+03 3.50642584e+03 9.06816004e+03]
------
Step:20, Action:West
State  202
Old Q Values:  [    0.         -8753.98842238  3675.10404147     0.        ]
New Q values:  [    0.         -8753.98842238  3675.10404147  1278.19575341]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xa  x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         4262.65251137    0.        ]
------
Step:21, Action:East
State  190
Old Q Values:  [ 1.04129094e+00 -7.77507115e+03  1.79487036e+03  0.00000000e+00]
New Q values:  [ 1.04129094e+00 -7.77507115e+03  1.46859283e+03  0.00000000e+00]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[   0.         2504.14895323 1884.45205023    0.        ]
------
Step:22, Action:South
State  200
Old Q Values:  [   62.8218634  13747.50885976  3034.01516773   568.38654082]
New Q values:  [   62.8218634  24850.00035503  3034.01516773   568.38654082]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xg  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799 64505.32270377  2244.32886516]
------
Step:23, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799 64505.32270377  2244.32886516]
New Q values:  [  175.14749589 -8521.23367799 28460.52609142  2244.32886516]
Reward: 9  Episode Reward:  37
xxxxx
x   x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8843.32336638 -6442.16912869 -8192.20126966  3122.26327107]
------
Step:24, Action:North
State  288
Old Q Values:  [ 8843.32336638 -6442.16912869 -8192.20126966  3122.26327107]
New Q values:  [ 5055.70832455 -6442.16912869 -8192.20126966  3122.26327107]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1286.07521269  5063.26326    -8220.10378799   637.30368728]
------
Step:25, Action:South
State  216
Old Q Values:  [ 1286.07521269  5063.26326    -8220.10378799   637.30368728]
New Q values:  [ 1286.07521269  3541.41780137 -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5055.70832455 -6442.16912869 -8192.20126966  3122.26327107]
------
Step:26, Action:North
State  288
Old Q Values:  [ 5055.70832455 -6442.16912869 -8192.20126966  3122.26327107]
New Q values:  [ 3084.10867023 -6442.16912869 -8192.20126966  3122.26327107]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1286.07521269  3541.41780137 -8220.10378799   637.30368728]
------
Step:27, Action:South
State  216
Old Q Values:  [ 1286.07521269  3541.41780137 -8220.10378799   637.30368728]
New Q values:  [ 1286.07521269  2352.64610187 -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3084.10867023 -6442.16912869 -8192.20126966  3122.26327107]
------
Step:28, Action:West
State  288
Old Q Values:  [ 3084.10867023 -6442.16912869 -8192.20126966  3122.26327107]
New Q values:  [ 3084.10867023 -6442.16912869 -8192.20126966  9786.46313585]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799 28460.52609142  2244.32886516]
------
Step:29, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799 28460.52609142  2244.32886516]
New Q values:  [  175.14749589 -8521.23367799 14319.54937732  2244.32886516]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3084.10867023 -6442.16912869 -8192.20126966  9786.46313585]
------
Step:30, Action:West
State  288
Old Q Values:  [ 3084.10867023 -6442.16912869 -8192.20126966  9786.46313585]
New Q values:  [ 3084.10867023 -6442.16912869 -8192.20126966  2209.85006754]
Reward: -10001  Episode Reward:  -9970
xxxxx
x   x
x   x
x.g x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  915.74185405  439.78453088 -180.6       ]
------
Step:1, Action:East
State  108
Old Q Values:  [-8463.16477134  3465.00733068  2103.93174247     0.        ]
New Q values:  [-8463.16477134  3465.00733068  2152.60720092     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xga.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  4352.11501311  1164.54189839]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   304.71540969   365.0306629 ]
New Q values:  [ -281.736      -1150.91067548   267.86024096   365.0306629 ]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.68580257e+02]
------
Step:3, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.68580257e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  2.96341302e+02]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   267.86024096   365.0306629 ]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   267.86024096   365.0306629 ]
New Q values:  [ -281.736      -1150.91067548   267.86024096  1123.44525298]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3260.1099594  1075.50510433 -120.29354603]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 3260.1099594  1075.50510433 -120.29354603]
New Q values:  [-177.44732869 3811.94082885 1075.50510433 -120.29354603]
Reward: -1  Episode Reward:  15
xxxxx
x   x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1042.28048424 1591.41915113 8361.6561503  1554.80203889]
------
Step:6, Action:East
State  189
Old Q Values:  [ 275.08817949 1438.8649519  1272.87601525  154.04646645]
New Q values:  [ 275.08817949 1438.8649519  1172.12505389  154.04646645]
Reward: 9  Episode Reward:  24
xxxxx
x   x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[   2.33354578 2191.91549263 -501.63979658  529.38906334]
------
Step:7, Action:South
State  199
Old Q Values:  [  14.86214194  479.07551978 7748.13858789 1915.70494401]
New Q values:  [  14.86214194 1482.85185902 7748.13858789 1915.70494401]
Reward: 9  Episode Reward:  33
xxxxx
x   x
x  .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[4286.07217037 -168.92307549 2221.96636842 2429.9438868 ]
------
Step:8, Action:North
State  273
Old Q Values:  [4286.07217037 -168.92307549 2221.96636842 2429.9438868 ]
New Q values:  [2371.40351594 -168.92307549 2221.96636842 2429.9438868 ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[   2.33354578 2191.91549263 -501.63979658  529.38906334]
------
Step:9, Action:South
State  197
Old Q Values:  [-5833.78831344  1495.56024905 -5538.30598082   403.06255908]
New Q values:  [-5833.78831344  1079.64014969 -5538.30598082   403.06255908]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.         1606.7201669  1316.32870654]
------
Step:10, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799 14319.54937732  2244.32886516]
New Q values:  [  175.14749589 -8521.23367799  6658.452352    2244.32886516]
Reward: 9  Episode Reward:  40
xxxxx
x g x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3084.10867023 -6442.16912869 -8192.20126966  2209.85006754]
------
Step:11, Action:North
State  288
Old Q Values:  [ 3084.10867023 -6442.16912869 -8192.20126966  2209.85006754]
New Q values:  [ 1944.83729865 -6442.16912869 -8192.20126966  2209.85006754]
Reward: 9  Episode Reward:  49
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1286.07521269  2352.64610187 -8220.10378799   637.30368728]
------
Step:12, Action:South
State  216
Old Q Values:  [ 1286.07521269  2352.64610187 -8220.10378799   637.30368728]
New Q values:  [ 1286.07521269  1603.41346101 -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1944.83729865 -6442.16912869 -8192.20126966  2209.85006754]
------
Step:13, Action:West
State  288
Old Q Values:  [ 1944.83729865 -6442.16912869 -8192.20126966  2209.85006754]
New Q values:  [ 1944.83729865 -6442.16912869 -8192.20126966  1612.32319306]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2371.40351594 -168.92307549 2221.96636842 2429.9438868 ]
------
Step:14, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  6658.452352    2244.32886516]
New Q values:  [  175.14749589 -8521.23367799  6658.452352   73292.60546048]
Reward: 100009  Episode Reward:  100056
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  915.74185405  439.78453088 -180.6       ]
------
Step:1, Action:East
State  108
Old Q Values:  [-8463.16477134  3465.00733068  2152.60720092     0.        ]
New Q values:  [-8463.16477134  3465.00733068  2172.0773843      0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xga.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  4352.11501311  1164.54189839]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   267.86024096  1123.44525298]
New Q values:  [ -281.736      -1150.91067548   201.44648688  1123.44525298]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  2.96341302e+02]
------
Step:3, Action:West
State  136
Old Q Values:  [ -170.77177351 11506.79339179 -2383.80019164  1916.16587496]
New Q values:  [ -170.77177351 11506.79339179 -2383.80019164  2071.50085392]
Reward: -1  Episode Reward:  17
xxxxx
xga x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  4352.11501311  1164.54189839]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   201.44648688  1123.44525298]
New Q values:  [ -281.736      -1150.91067548   168.88098524  1123.44525298]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  2.96341302e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  2.96341302e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.54970097e+02]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524  1123.44525298]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   168.88098524  1123.44525298]
New Q values:  [ -281.736      -1150.91067548   168.88098524   723.50065741]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  915.74185405  439.78453088 -180.6       ]
------
Step:7, Action:East
State  110
Old Q Values:  [-239.29051573  915.74185405  439.78453088 -180.6       ]
New Q values:  [-239.29051573  915.74185405  392.36400958 -180.6       ]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524   723.50065741]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   168.88098524   723.50065741]
New Q values:  [ -281.736      -1150.91067548   168.88098524   563.52281918]
Reward: -1  Episode Reward:  12
xxxxx
xa  x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  915.74185405  392.36400958 -180.6       ]
------
Step:9, Action:East
State  111
Old Q Values:  [-177.44732869 3811.94082885 1075.50510433 -120.29354603]
New Q values:  [-177.44732869 3811.94082885  598.65888748 -120.29354603]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524   563.52281918]
------
Step:10, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   314.03785683   257.39549651]
New Q values:  [ -253.44886264 -1902.20915811   314.03785683  1245.94044726]
Reward: -1  Episode Reward:  10
xxxxx
xa  x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3811.94082885  598.65888748 -120.29354603]
------
Step:11, Action:South
State  111
Old Q Values:  [-177.44732869 3811.94082885  598.65888748 -120.29354603]
New Q values:  [-177.44732869 4032.67317663  598.65888748 -120.29354603]
Reward: -1  Episode Reward:  9
xxxxx
x   x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1042.28048424 1591.41915113 8361.6561503  1554.80203889]
------
Step:12, Action:East
State  183
Old Q Values:  [1042.28048424 1591.41915113 8361.6561503  1554.80203889]
New Q values:  [1042.28048424 1591.41915113 8483.62094058 1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x   x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.71118616e+04  0.00000000e+00]
------
Step:13, Action:East
State  202
Old Q Values:  [    0.         -8753.98842238  3675.10404147  1278.19575341]
New Q values:  [    0.         -8753.98842238  2672.40916596  1278.19575341]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2218.76325298 3989.89183125    0.         1847.21017375]
------
Step:14, Action:South
State  218
Old Q Values:  [2218.76325298 3989.89183125    0.         1847.21017375]
New Q values:  [2218.76325298 2184.8079221     0.         1847.21017375]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1944.83729865 -6442.16912869 -8192.20126966  1612.32319306]
------
Step:15, Action:North
State  288
Old Q Values:  [ 1944.83729865 -6442.16912869 -8192.20126966  1612.32319306]
New Q values:  [ 1442.96389536 -6442.16912869 -8192.20126966  1612.32319306]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2218.76325298 2184.8079221     0.         1847.21017375]
------
Step:16, Action:North
State  216
Old Q Values:  [ 1286.07521269  1603.41346101 -8220.10378799   637.30368728]
New Q values:  [  650.32111404  1603.41346101 -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.54970097e+02]
------
Step:17, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.54970097e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.50444884e+02]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524   563.52281918]
------
Step:18, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4352.11501311  1164.54189839]
New Q values:  [-9594.56523706 -8069.05606225  4352.11501311  1238.81448118]
Reward: -1  Episode Reward:  32
xxxxx
xag x
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        2578.65907274  199.03311984    0.        ]
------
Step:19, Action:South
State  105
Old Q Values:  [-180.6        2578.65907274  199.03311984    0.        ]
New Q values:  [-180.6        1106.98138066  199.03311984    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 253.72583854    0.         -550.76076915 -178.98      ]
------
Step:20, Action:North
State  185
Old Q Values:  [ 253.72583854    0.         -550.76076915 -178.98      ]
New Q values:  [ 432.98474961    0.         -550.76076915 -178.98      ]
Reward: -1  Episode Reward:  30
xxxxx
xag x
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        1106.98138066  199.03311984    0.        ]
------
Step:21, Action:South
State  107
Old Q Values:  [-252.35169558 1316.94071345  312.9155089  -252.78192178]
New Q values:  [-252.35169558  656.07171026  312.9155089  -252.78192178]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 432.98474961    0.         -550.76076915 -178.98      ]
------
Step:22, Action:North
State  185
Old Q Values:  [ 432.98474961    0.         -550.76076915 -178.98      ]
New Q values:  [ 504.68831404    0.         -550.76076915 -178.98      ]
Reward: -1  Episode Reward:  28
xxxxx
xag x
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        1106.98138066  199.03311984    0.        ]
------
Step:23, Action:South
State  105
Old Q Values:  [-180.6        1106.98138066  199.03311984    0.        ]
New Q values:  [-180.6         593.59904648  199.03311984    0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x  gx
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 504.68831404    0.         -550.76076915 -178.98      ]
------
Step:24, Action:North
State  185
Old Q Values:  [ 504.68831404    0.         -550.76076915 -178.98      ]
New Q values:  [ 379.35503956    0.         -550.76076915 -178.98      ]
Reward: -1  Episode Reward:  26
xxxxx
xag x
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         593.59904648  199.03311984    0.        ]
------
Step:25, Action:South
State  105
Old Q Values:  [-180.6         593.59904648  199.03311984    0.        ]
New Q values:  [-180.6         350.64613046  199.03311984    0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x  gx
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 379.35503956    0.         -550.76076915 -178.98      ]
------
Step:26, Action:North
State  185
Old Q Values:  [ 379.35503956    0.         -550.76076915 -178.98      ]
New Q values:  [ 256.33585496    0.         -550.76076915 -178.98      ]
Reward: -1  Episode Reward:  24
xxxxx
xag x
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         350.64613046  199.03311984    0.        ]
------
Step:27, Action:South
State  105
Old Q Values:  [-180.6         350.64613046  199.03311984    0.        ]
New Q values:  [-180.6         216.55920867  199.03311984    0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x  gx
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 256.33585496    0.         -550.76076915 -178.98      ]
------
Step:28, Action:North
State  185
Old Q Values:  [ 256.33585496    0.         -550.76076915 -178.98      ]
New Q values:  [ 166.90210459    0.         -550.76076915 -178.98      ]
Reward: -1  Episode Reward:  22
xxxxx
xa gx
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         216.55920867  199.03311984    0.        ]
------
Step:29, Action:South
State  107
Old Q Values:  [-252.35169558  656.07171026  312.9155089  -252.78192178]
New Q values:  [-252.35169558  311.89931548  312.9155089  -252.78192178]
Reward: -1  Episode Reward:  21
xxxxx
x   x
xa gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 166.90210459    0.         -550.76076915 -178.98      ]
------
Step:30, Action:North
State  185
Old Q Values:  [ 166.90210459    0.         -550.76076915 -178.98      ]
New Q values:  [ 131.12860444    0.         -550.76076915 -178.98      ]
Reward: -1  Episode Reward:  20
xxxxx
xa gx
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         216.55920867  199.03311984    0.        ]
------
Step:31, Action:South
State  105
Old Q Values:  [-180.6         216.55920867  199.03311984    0.        ]
New Q values:  [-180.6         125.3622648   199.03311984    0.        ]
Reward: -1  Episode Reward:  19
xxxxx
x g x
xa  x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 131.12860444    0.         -550.76076915 -178.98      ]
------
Step:32, Action:North
State  185
Old Q Values:  [ 131.12860444    0.         -550.76076915 -178.98      ]
New Q values:  [ 145.72609444    0.         -550.76076915 -178.98      ]
Reward: -1  Episode Reward:  18
xxxxx
xa  x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  311.89931548  312.9155089  -252.78192178]
------
Step:33, Action:East
State  105
Old Q Values:  [-180.6         125.3622648   199.03311984    0.        ]
New Q values:  [ -180.6          125.3622648  -4615.35224813     0.        ]
Reward: -10001  Episode Reward:  -9983
xxxxx
x g x
x   x
x.. x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  2809.21232812 -2165.66138672  -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094  2809.21232812 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1705.87831815 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x  gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1221.84333931 1418.04431377 1922.644623    262.76946019]
------
Step:2, Action:East
State  181
Old Q Values:  [1221.84333931 1418.04431377 1922.644623    262.76946019]
New Q values:  [1221.84333931 1418.04431377 3591.64375635  262.76946019]
Reward: 9  Episode Reward:  18
xxxxx
x g.x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4777.20709218  9390.61969049  3512.17261526]
------
Step:3, Action:East
State  200
Old Q Values:  [   62.8218634  24850.00035503  3034.01516773   568.38654082]
New Q values:  [   62.8218634  24850.00035503  1700.0301054    568.38654082]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  650.32111404  1603.41346101 -8220.10378799   637.30368728]
------
Step:4, Action:South
State  210
Old Q Values:  [27744.31798672  1596.89706844   790.72804752  1050.85266124]
New Q values:  [27744.31798672  1127.85578529   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1442.96389536 -6442.16912869 -8192.20126966  1612.32319306]
------
Step:5, Action:North
State  288
Old Q Values:  [ 1442.96389536 -6442.16912869 -8192.20126966  1612.32319306]
New Q values:  [ 1057.60959644 -6442.16912869 -8192.20126966  1612.32319306]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  650.32111404  1603.41346101 -8220.10378799   637.30368728]
------
Step:6, Action:South
State  216
Old Q Values:  [  650.32111404  1603.41346101 -8220.10378799   637.30368728]
New Q values:  [  650.32111404  1124.46234232 -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1057.60959644 -6442.16912869 -8192.20126966  1612.32319306]
------
Step:7, Action:West
State  288
Old Q Values:  [ 1057.60959644 -6442.16912869 -8192.20126966  1612.32319306]
New Q values:  [ 1057.60959644 -6442.16912869 -8192.20126966  1379.31244326]
Reward: 9  Episode Reward:  43
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2371.40351594 -168.92307549 2221.96636842 2429.9438868 ]
------
Step:8, Action:West
State  277
Old Q Values:  [   1.64433       0.         1606.7201669  1316.32870654]
New Q values:  [   1.64433       0.         1606.7201669  1487.35209299]
Reward: 9  Episode Reward:  52
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2914.99703982   26.73544252 3184.73536792  123.6214372 ]
------
Step:9, Action:East
State  261
Old Q Values:  [2914.99703982   26.73544252 3184.73536792  123.6214372 ]
New Q values:  [2914.99703982   26.73544252 1755.31019724  123.6214372 ]
Reward: -1  Episode Reward:  51
xxxxx
x  .x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.         1606.7201669  1487.35209299]
------
Step:10, Action:West
State  277
Old Q Values:  [   1.64433       0.         1606.7201669  1487.35209299]
New Q values:  [   1.64433       0.         1606.7201669  1468.83994914]
Reward: -1  Episode Reward:  50
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2914.99703982   26.73544252 1755.31019724  123.6214372 ]
------
Step:11, Action:North
State  261
Old Q Values:  [2914.99703982   26.73544252 1755.31019724  123.6214372 ]
New Q values:  [3710.48509811   26.73544252 1755.31019724  123.6214372 ]
Reward: -1  Episode Reward:  49
xxxxx
x  .x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1042.28048424 1591.41915113 8483.62094058 1554.80203889]
------
Step:12, Action:East
State  183
Old Q Values:  [1042.28048424 1591.41915113 8483.62094058 1554.80203889]
New Q values:  [1042.28048424 1591.41915113 8526.4068567  1554.80203889]
Reward: -1  Episode Reward:  48
xxxxx
x  .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.71118616e+04  0.00000000e+00]
------
Step:13, Action:East
State  196
Old Q Values:  [-2469.90645144  4777.20709218  9390.61969049  3512.17261526]
New Q values:  [-2469.90645144  4777.20709218 11989.01001575  3512.17261526]
Reward: -1  Episode Reward:  47
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27444.5404652   3920.69738742 -4584.50430574 -1713.91177491]
------
Step:14, Action:North
State  208
Old Q Values:  [27444.5404652   3920.69738742 -4584.50430574 -1713.91177491]
New Q values:  [96554.79590229  3920.69738742 -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100056
xxxxx
x gax
x   x
x   x
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 11506.79339179 -2383.80019164  2071.50085392]
------
Step:1, Action:South
State  136
Old Q Values:  [ -170.77177351 11506.79339179 -2383.80019164  2071.50085392]
New Q values:  [ -170.77177351 33574.5561274  -2383.80019164  2071.50085392]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[96554.79590229  3920.69738742 -4584.50430574 -1713.91177491]
------
Step:2, Action:South
State  208
Old Q Values:  [96554.79590229  3920.69738742 -4584.50430574 -1713.91177491]
New Q values:  [96554.79590229  1987.47268795 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1057.60959644 -6442.16912869 -8192.20126966  1379.31244326]
------
Step:3, Action:West
State  288
Old Q Values:  [ 1057.60959644 -6442.16912869 -8192.20126966  1379.31244326]
New Q values:  [ 1057.60959644 -6442.16912869 -8192.20126966 22544.90661545]
Reward: 9  Episode Reward:  27
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  6658.452352   73292.60546048]
------
Step:4, Action:West
State  273
Old Q Values:  [2371.40351594 -168.92307549 2221.96636842 2429.9438868 ]
New Q values:  [ 2371.40351594  -168.92307549  2221.96636842 13366.85146913]
Reward: 9  Episode Reward:  36
xxxxx
x.  x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41298.24638137 12764.58618105  7438.53947475  1875.31501677]
------
Step:5, Action:North
State  257
Old Q Values:  [41298.24638137 12764.58618105  7438.53947475  1875.31501677]
New Q values:  [32242.54041251 12764.58618105  7438.53947475  1875.31501677]
Reward: 9  Episode Reward:  45
xxxxx
x. gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 23344.73803738 52392.80619988     0.        ]
------
Step:6, Action:East
State  177
Old Q Values:  [33510.55723334 23344.73803738 52392.80619988     0.        ]
New Q values:  [33510.55723334 23344.73803738 30616.80990221     0.        ]
Reward: 9  Episode Reward:  54
xxxxx
x.  x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  6251.2513774  32180.9580742   1460.9765133 ]
------
Step:7, Action:South
State  192
Old Q Values:  [3.89777037e-01 8.17181197e+03 3.39337679e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 2.52559064e+04 3.39337679e+04 7.32028793e+03]
Reward: -1  Episode Reward:  53
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  6658.452352   73292.60546048]
------
Step:8, Action:West
State  273
Old Q Values:  [ 2371.40351594  -168.92307549  2221.96636842 13366.85146913]
New Q values:  [ 2371.40351594  -168.92307549  2221.96636842 15018.90271141]
Reward: -1  Episode Reward:  52
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[32242.54041251 12764.58618105  7438.53947475  1875.31501677]
------
Step:9, Action:North
State  257
Old Q Values:  [32242.54041251 12764.58618105  7438.53947475  1875.31501677]
New Q values:  [22949.58333501 12764.58618105  7438.53947475  1875.31501677]
Reward: -1  Episode Reward:  51
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 23344.73803738 30616.80990221     0.        ]
------
Step:10, Action:North
State  179
Old Q Values:  [16872.63888686 16101.90751562 41171.926679       0.        ]
New Q values:  [82228.67666629 16101.90751562 41171.926679       0.        ]
Reward: 100009  Episode Reward:  100060
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.50444884e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.50444884e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.14634800e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524   563.52281918]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   168.88098524   563.52281918]
New Q values:  [ -281.736      -1150.91067548   168.88098524   505.53168388]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  915.74185405  392.36400958 -180.6       ]
------
Step:3, Action:East
State  110
Old Q Values:  [-239.29051573  915.74185405  392.36400958 -180.6       ]
New Q values:  [-239.29051573  915.74185405  308.005109   -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524   505.53168388]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   168.88098524   505.53168388]
New Q values:  [ -281.736      -1150.91067548   168.88098524   476.33522977]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  915.74185405  308.005109   -180.6       ]
------
Step:5, Action:East
State  110
Old Q Values:  [-239.29051573  915.74185405  308.005109   -180.6       ]
New Q values:  [-239.29051573  915.74185405  265.50261253 -180.6       ]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524   476.33522977]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   168.88098524   476.33522977]
New Q values:  [ -281.736      -1150.91067548   168.88098524   464.65664812]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
xg..x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  915.74185405  265.50261253 -180.6       ]
------
Step:7, Action:East
State  107
Old Q Values:  [-252.35169558  311.89931548  312.9155089  -252.78192178]
New Q values:  [-252.35169558  311.89931548  263.96319799 -252.78192178]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524   464.65664812]
------
Step:8, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   314.03785683  1245.94044726]
New Q values:  [ -253.44886264 -1902.20915811   314.03785683   591.34597355]
Reward: -1  Episode Reward:  12
xxxxx
xa  x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  311.89931548  263.96319799 -252.78192178]
------
Step:9, Action:South
State  109
Old Q Values:  [ -241.10880094  1705.87831815 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1765.24445416 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  21
xxxxx
x  gx
xa..x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1221.84333931 1418.04431377 3591.64375635  262.76946019]
------
Step:10, Action:East
State  185
Old Q Values:  [ 145.72609444    0.         -550.76076915 -178.98      ]
New Q values:  [ 145.72609444    0.          442.67034013 -178.98      ]
Reward: 9  Episode Reward:  30
xxxxx
x   x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[   2.33354578 2191.91549263 -501.63979658  529.38906334]
------
Step:11, Action:South
State  196
Old Q Values:  [-2469.90645144  4777.20709218 11989.01001575  3512.17261526]
New Q values:  [-2469.90645144  4027.93373228 11989.01001575  3512.17261526]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197  2178.80074265  1624.41964138]
------
Step:12, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  6658.452352   73292.60546048]
New Q values:  [  175.14749589 -8521.23367799  9432.25292543 73292.60546048]
Reward: 9  Episode Reward:  38
xxxxx
x g x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1057.60959644 -6442.16912869 -8192.20126966 22544.90661545]
------
Step:13, Action:West
State  288
Old Q Values:  [ 1057.60959644 -6442.16912869 -8192.20126966 22544.90661545]
New Q values:  [ 1057.60959644 -6442.16912869 -8192.20126966 31005.14428432]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  9432.25292543 73292.60546048]
------
Step:14, Action:West
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197  2178.80074265  1624.41964138]
New Q values:  [ 7058.83631802 -5807.06396197  2178.80074265  1768.31338598]
Reward: 9  Episode Reward:  46
xxxxx
x g x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3710.48509811   26.73544252 1755.31019724  123.6214372 ]
------
Step:15, Action:North
State  260
Old Q Values:  [ 1538.55279863 -5704.51612281  2213.55787939 -5679.36893145]
New Q values:  [ 2226.151559   -5704.51612281  2213.55787939 -5679.36893145]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  160.62456153  3998.65267373  5371.10146515 -4966.32149798]
------
Step:16, Action:East
State  181
Old Q Values:  [1221.84333931 1418.04431377 3591.64375635  262.76946019]
New Q values:  [1221.84333931 1418.04431377 5032.76050727  262.76946019]
Reward: -1  Episode Reward:  44
xxxxx
x g x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4027.93373228 11989.01001575  3512.17261526]
------
Step:17, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.52559064e+04 3.39337679e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 2.52559064e+04 1.02545346e+05 7.32028793e+03]
Reward: 100009  Episode Reward:  100053
xxxxx
x   x
x gax
x   x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3710.48509811   26.73544252 1755.31019724  123.6214372 ]
------
Step:1, Action:North
State  261
Old Q Values:  [3710.48509811   26.73544252 1755.31019724  123.6214372 ]
New Q values:  [2999.42219142   26.73544252 1755.31019724  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1221.84333931 1418.04431377 5032.76050727  262.76946019]
------
Step:2, Action:East
State  181
Old Q Values:  [1221.84333931 1418.04431377 5032.76050727  262.76946019]
New Q values:  [ 1221.84333931  1418.04431377 26782.10798531   262.76946019]
Reward: -9991  Episode Reward:  -9982
xxxxx
x.. x
x g.x
x ..x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1221.84333931  1418.04431377 26782.10798531   262.76946019]
------
Step:1, Action:East
State  189
Old Q Values:  [ 275.08817949 1438.8649519  1172.12505389  154.04646645]
New Q values:  [  275.08817949  1438.8649519  10128.53744381   154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x agx
x...x
xxxxx
Step:2, Action:North
State  200
Old Q Values:  [   62.8218634  24850.00035503  1700.0301054    568.38654082]
New Q values:  [  169.9257398  24850.00035503  1700.0301054    568.38654082]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524   464.65664812]
------
Step:3, Action:West
State  126
Old Q Values:  [   0.          331.64678262  730.59172742 1040.89714496]
New Q values:  [   0.          331.64678262  730.59172742 1631.56081097]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4032.67317663  598.65888748 -120.29354603]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 4032.67317663  598.65888748 -120.29354603]
New Q values:  [-177.44732869 4651.0305038   598.65888748 -120.29354603]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  275.08817949  1438.8649519  10128.53744381   154.04646645]
------
Step:5, Action:South
State  188
Old Q Values:  [-6523.78898263  3127.58484886  1485.98871489     0.        ]
New Q values:  [-6523.78898263  1924.27940724  1485.98871489     0.        ]
Reward: 9  Episode Reward:  35
xxxxx
x   x
xg .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2226.151559   -5704.51612281  2213.55787939 -5679.36893145]
------
Step:6, Action:East
State  261
Old Q Values:  [2999.42219142   26.73544252 1755.31019724  123.6214372 ]
New Q values:  [2999.42219142   26.73544252 2825.1749743   123.6214372 ]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197  2178.80074265  1768.31338598]
------
Step:7, Action:East
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197  2178.80074265  1768.31338598]
New Q values:  [ 7058.83631802 -5807.06396197 10178.46358236  1768.31338598]
Reward: 9  Episode Reward:  53
xxxxx
x   x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1057.60959644 -6442.16912869 -8192.20126966 31005.14428432]
------
Step:8, Action:North
State  288
Old Q Values:  [ 1057.60959644 -6442.16912869 -8192.20126966 31005.14428432]
New Q values:  [68751.73923459 -6442.16912869 -8192.20126966 31005.14428432]
Reward: 100009  Episode Reward:  100062
xxxxx
x   x
x  ax
x  gx
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[96554.79590229  1987.47268795 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [96554.79590229  1987.47268795 -4584.50430574 -1713.91177491]
New Q values:  [96554.79590229 21425.91084556 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[68751.73923459 -6442.16912869 -8192.20126966 31005.14428432]
------
Step:2, Action:West
State  288
Old Q Values:  [68751.73923459 -6442.16912869 -8192.20126966 31005.14428432]
New Q values:  [68751.73923459 -6442.16912869 -8192.20126966 34395.23935187]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  9432.25292543 73292.60546048]
------
Step:3, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  9432.25292543 73292.60546048]
New Q values:  [  175.14749589 -8521.23367799  9432.25292543 36207.31718469]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[22949.58333501 12764.58618105  7438.53947475  1875.31501677]
------
Step:4, Action:North
State  257
Old Q Values:  [22949.58333501 12764.58618105  7438.53947475  1875.31501677]
New Q values:  [19238.40050401 12764.58618105  7438.53947475  1875.31501677]
Reward: 9  Episode Reward:  36
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 23344.73803738 30616.80990221     0.        ]
------
Step:5, Action:North
State  183
Old Q Values:  [1042.28048424 1591.41915113 8526.4068567  1554.80203889]
New Q values:  [1269.31546223 1591.41915113 8526.4068567  1554.80203889]
Reward: 9  Episode Reward:  45
xxxxx
xa. x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2823.34422845  238.35800069    0.        ]
------
Step:6, Action:South
State  102
Old Q Values:  [-180.6        1284.69780031  522.17020433 -180.6       ]
New Q values:  [-180.6        1942.41406952  522.17020433 -180.6       ]
Reward: -1  Episode Reward:  44
xxxxx
x . x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  4763.78316466     0.        ]
------
Step:7, Action:East
State  180
Old Q Values:  [  160.62456153  3998.65267373  5371.10146515 -4966.32149798]
New Q values:  [  160.62456153  3998.65267373 32917.44436846 -4966.32149798]
Reward: 9  Episode Reward:  53
xxxxx
x . x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.52559064e+04 1.02545346e+05 7.32028793e+03]
------
Step:8, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.52559064e+04 1.02545346e+05 7.32028793e+03]
New Q values:  [3.89777037e-01 2.52559064e+04 6.99839771e+04 7.32028793e+03]
Reward: -1  Episode Reward:  52
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[96554.79590229 21425.91084556 -4584.50430574 -1713.91177491]
------
Step:9, Action:North
State  208
Old Q Values:  [96554.79590229 21425.91084556 -4584.50430574 -1713.91177491]
New Q values:  [64192.89807713 21425.91084556 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  51
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[36043.85201615  8286.98116673 -8652.84       85238.59905405]
------
Step:10, Action:North
State  128
Old Q Values:  [36043.85201615  8286.98116673 -8652.84       85238.59905405]
New Q values:  [33808.52052267  8286.98116673 -8652.84       85238.59905405]
Reward: -10301  Episode Reward:  -10250
xxxxx
x .gx
x   x
x   x
xxxxx
xxxxx
x..ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.14634800e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.14634800e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  2.70650914e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524   464.65664812]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   168.88098524   464.65664812]
New Q values:  [ -281.736      -1150.91067548   168.88098524  1586.57181039]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4651.0305038   598.65888748 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 4651.0305038   598.65888748 -120.29354603]
New Q values:  [-177.44732869 4904.37343466  598.65888748 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  275.08817949  1438.8649519  10128.53744381   154.04646645]
------
Step:4, Action:East
State  189
Old Q Values:  [  275.08817949  1438.8649519  10128.53744381   154.04646645]
New Q values:  [ 275.08817949 1438.8649519  1653.51798225  154.04646645]
Reward: -9991  Episode Reward:  -9964
xxxxx
x   x
x g.x
x ..x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2999.42219142   26.73544252 2825.1749743   123.6214372 ]
------
Step:1, Action:North
State  261
Old Q Values:  [2999.42219142   26.73544252 2825.1749743   123.6214372 ]
New Q values:  [9239.80127216   26.73544252 2825.1749743   123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1221.84333931  1418.04431377 26782.10798531   262.76946019]
------
Step:2, Action:East
State  181
Old Q Values:  [ 1221.84333931  1418.04431377 26782.10798531   262.76946019]
New Q values:  [ 1221.84333931  1418.04431377 20366.53061638   262.76946019]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  6251.2513774  32180.9580742   1460.9765133 ]
------
Step:3, Action:East
State  193
Old Q Values:  [-5922.26708831  6251.2513774  32180.9580742   1460.9765133 ]
New Q values:  [-5922.26708831  6251.2513774  26135.65265282  1460.9765133 ]
Reward: -9991  Episode Reward:  -9983
xxxxx
x...x
x  gx
x ..x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 2371.40351594  -168.92307549  2221.96636842 15018.90271141]
------
Step:1, Action:West
State  273
Old Q Values:  [ 2371.40351594  -168.92307549  2221.96636842 15018.90271141]
New Q values:  [2371.40351594 -168.92307549 2221.96636842 8784.90146621]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[9239.80127216   26.73544252 2825.1749743   123.6214372 ]
------
Step:2, Action:North
State  261
Old Q Values:  [9239.80127216   26.73544252 2825.1749743   123.6214372 ]
New Q values:  [6259.24256587   26.73544252 2825.1749743   123.6214372 ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1269.31546223 1591.41915113 8526.4068567  1554.80203889]
------
Step:3, Action:East
State  183
Old Q Values:  [1269.31546223 1591.41915113 8526.4068567  1554.80203889]
New Q values:  [1269.31546223 1591.41915113 6820.04741024 1554.80203889]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.13469489e+04  1.03161518e+03]
------
Step:4, Action:East
State  195
Old Q Values:  [   38.85388605  6875.60421951 11019.32065977  1169.39963074]
New Q values:  [   38.85388605  6875.60421951 12736.42365992  1169.39963074]
Reward: 9  Episode Reward:  36
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[27744.31798672  1127.85578529   790.72804752  1050.85266124]
------
Step:5, Action:North
State  208
Old Q Values:  [64192.89807713 21425.91084556 -4584.50430574 -1713.91177491]
New Q values:  [47524.30938458 21425.91084556 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  45
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 72805.83384576]
------
Step:6, Action:West
State  130
Old Q Values:  [26266.584521    5661.05765619  -180.00807518 72805.83384576]
New Q values:  [26266.584521    5661.05765619  -180.00807518 73152.57442724]
Reward: 9  Episode Reward:  54
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 146749.46962978]
------
Step:7, Action:West
State  115
Old Q Values:  [  -180.6          1117.48597573  18639.04650494 146749.46962978]
New Q values:  [  -180.6          1117.48597573  18639.04650494 134179.40896346]
Reward: 100009  Episode Reward:  100063
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1221.84333931  1418.04431377 20366.53061638   262.76946019]
------
Step:1, Action:South
State  181
Old Q Values:  [ 1221.84333931  1418.04431377 20366.53061638   262.76946019]
New Q values:  [ 1221.84333931  2450.39049527 20366.53061638   262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6259.24256587   26.73544252 2825.1749743   123.6214372 ]
------
Step:2, Action:North
State  261
Old Q Values:  [6259.24256587   26.73544252 2825.1749743   123.6214372 ]
New Q values:  [4549.11124942   26.73544252 2825.1749743   123.6214372 ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xa .x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1269.31546223 1591.41915113 6820.04741024 1554.80203889]
------
Step:3, Action:East
State  181
Old Q Values:  [ 1221.84333931  2450.39049527 20366.53061638   262.76946019]
New Q values:  [ 1221.84333931  2450.39049527 15986.7080424    262.76946019]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  6251.2513774  26135.65265282  1460.9765133 ]
------
Step:4, Action:South
State  193
Old Q Values:  [-5922.26708831  6251.2513774  26135.65265282  1460.9765133 ]
New Q values:  [-5922.26708831  5141.37099082 26135.65265282  1460.9765133 ]
Reward: 9  Episode Reward:  16
xxxxx
x..gx
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2371.40351594 -168.92307549 2221.96636842 8784.90146621]
------
Step:5, Action:West
State  273
Old Q Values:  [2371.40351594 -168.92307549 2221.96636842 8784.90146621]
New Q values:  [2371.40351594 -168.92307549 2221.96636842 4878.09396131]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4549.11124942   26.73544252 2825.1749743   123.6214372 ]
------
Step:6, Action:North
State  261
Old Q Values:  [4549.11124942   26.73544252 2825.1749743   123.6214372 ]
New Q values:  [3865.05872284   26.73544252 2825.1749743   123.6214372 ]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1269.31546223 1591.41915113 6820.04741024 1554.80203889]
------
Step:7, Action:East
State  181
Old Q Values:  [ 1221.84333931  2450.39049527 15986.7080424    262.76946019]
New Q values:  [ 1221.84333931  2450.39049527 14234.7790128    262.76946019]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5141.37099082 26135.65265282  1460.9765133 ]
------
Step:8, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.52559064e+04 6.99839771e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 1.31553016e+04 6.99839771e+04 7.32028793e+03]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197 10178.46358236  1768.31338598]
------
Step:9, Action:East
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197 10178.46358236  1768.31338598]
New Q values:  [ 7058.83631802 -5807.06396197 24702.30720332  1768.31338598]
Reward: 9  Episode Reward:  21
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[68751.73923459 -6442.16912869 -8192.20126966 34395.23935187]
------
Step:10, Action:North
State  288
Old Q Values:  [68751.73923459 -6442.16912869 -8192.20126966 34395.23935187]
New Q values:  [41763.38850921 -6442.16912869 -8192.20126966 34395.23935187]
Reward: 9  Episode Reward:  30
xxxxx
x..gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[47524.30938458 21425.91084556 -4584.50430574 -1713.91177491]
------
Step:11, Action:South
State  208
Old Q Values:  [47524.30938458 21425.91084556 -4584.50430574 -1713.91177491]
New Q values:  [47524.30938458 21098.78089099 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  29
xxxxx
x.g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[41763.38850921 -6442.16912869 -8192.20126966 34395.23935187]
------
Step:12, Action:North
State  288
Old Q Values:  [41763.38850921 -6442.16912869 -8192.20126966 34395.23935187]
New Q values:  [30962.04821906 -6442.16912869 -8192.20126966 34395.23935187]
Reward: -1  Episode Reward:  28
xxxxx
x..gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[47524.30938458 21098.78089099 -4584.50430574 -1713.91177491]
------
Step:13, Action:South
State  208
Old Q Values:  [47524.30938458 21098.78089099 -4584.50430574 -1713.91177491]
New Q values:  [47524.30938458 18757.48416196 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  27
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[30962.04821906 -6442.16912869 -8192.20126966 34395.23935187]
------
Step:14, Action:West
State  288
Old Q Values:  [30962.04821906 -6442.16912869 -8192.20126966 34395.23935187]
New Q values:  [30962.04821906 -6442.16912869 -8192.20126966 15220.92392914]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2371.40351594 -168.92307549 2221.96636842 4878.09396131]
------
Step:15, Action:West
State  273
Old Q Values:  [2371.40351594 -168.92307549 2221.96636842 4878.09396131]
New Q values:  [2371.40351594 -168.92307549 2221.96636842 3110.15520138]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3865.05872284   26.73544252 2825.1749743   123.6214372 ]
------
Step:16, Action:North
State  261
Old Q Values:  [3865.05872284   26.73544252 2825.1749743   123.6214372 ]
New Q values:  [5815.85719298   26.73544252 2825.1749743   123.6214372 ]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1221.84333931  2450.39049527 14234.7790128    262.76946019]
------
Step:17, Action:South
State  177
Old Q Values:  [33510.55723334 23344.73803738 30616.80990221     0.        ]
New Q values:  [33510.55723334 15108.81536615 30616.80990221     0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[19238.40050401 12764.58618105  7438.53947475  1875.31501677]
------
Step:18, Action:North
State  257
Old Q Values:  [19238.40050401 12764.58618105  7438.53947475  1875.31501677]
New Q values:  [17747.92737161 12764.58618105  7438.53947475  1875.31501677]
Reward: -1  Episode Reward:  22
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 15108.81536615 30616.80990221     0.        ]
------
Step:19, Action:North
State  181
Old Q Values:  [ 1221.84333931  2450.39049527 14234.7790128    262.76946019]
New Q values:  [ 1341.14060426  2450.39049527 14234.7790128    262.76946019]
Reward: 9  Episode Reward:  31
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2823.34422845  238.35800069    0.        ]
------
Step:20, Action:South
State  103
Old Q Values:  [ 221.30610858 2823.34422845  238.35800069    0.        ]
New Q values:  [ 221.30610858 5399.17139522  238.35800069    0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1341.14060426  2450.39049527 14234.7790128    262.76946019]
------
Step:21, Action:South
State  183
Old Q Values:  [1269.31546223 1591.41915113 6820.04741024 1554.80203889]
New Q values:  [1269.31546223 2380.72481834 6820.04741024 1554.80203889]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5815.85719298   26.73544252 2825.1749743   123.6214372 ]
------
Step:22, Action:North
State  261
Old Q Values:  [5815.85719298   26.73544252 2825.1749743   123.6214372 ]
New Q values:  [6596.17658103   26.73544252 2825.1749743   123.6214372 ]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1341.14060426  2450.39049527 14234.7790128    262.76946019]
------
Step:23, Action:South
State  183
Old Q Values:  [1269.31546223 2380.72481834 6820.04741024 1554.80203889]
New Q values:  [1269.31546223 2930.54290165 6820.04741024 1554.80203889]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6596.17658103   26.73544252 2825.1749743   123.6214372 ]
------
Step:24, Action:North
State  260
Old Q Values:  [ 2226.151559   -5704.51612281  2213.55787939 -5679.36893145]
New Q values:  [ 2318.995573   -5704.51612281  2213.55787939 -5679.36893145]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  4763.78316466     0.        ]
------
Step:25, Action:East
State  180
Old Q Values:  [  160.62456153  3998.65267373 32917.44436846 -4966.32149798]
New Q values:  [  160.62456153  3998.65267373 34161.57089155 -4966.32149798]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.31553016e+04 6.99839771e+04 7.32028793e+03]
------
Step:26, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  1.13469489e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  1.28614750e+04  1.03161518e+03]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[27744.31798672  1127.85578529   790.72804752  1050.85266124]
------
Step:27, Action:North
State  208
Old Q Values:  [47524.30938458 18757.48416196 -4584.50430574 -1713.91177491]
New Q values:  [40960.896082   18757.48416196 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  33
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 73152.57442724]
------
Step:28, Action:West
State  130
Old Q Values:  [26266.584521    5661.05765619  -180.00807518 73152.57442724]
New Q values:  [ 26266.584521     5661.05765619   -180.00807518 126965.68524192]
Reward: 100009  Episode Reward:  100042
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[30962.04821906 -6442.16912869 -8192.20126966 15220.92392914]
------
Step:1, Action:North
State  288
Old Q Values:  [30962.04821906 -6442.16912869 -8192.20126966 15220.92392914]
New Q values:  [24678.48811222 -6442.16912869 -8192.20126966 15220.92392914]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[40960.896082   18757.48416196 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  208
Old Q Values:  [40960.896082   18757.48416196 -4584.50430574 -1713.91177491]
New Q values:  [26462.12527102 18757.48416196 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x.gax
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 33574.5561274  -2383.80019164  2071.50085392]
------
Step:3, Action:South
State  136
Old Q Values:  [ -170.77177351 33574.5561274  -2383.80019164  2071.50085392]
New Q values:  [ -170.77177351 21367.86003227 -2383.80019164  2071.50085392]
Reward: -1  Episode Reward:  17
xxxxx
x..gx
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26462.12527102 18757.48416196 -4584.50430574 -1713.91177491]
------
Step:4, Action:South
State  208
Old Q Values:  [26462.12527102 18757.48416196 -4584.50430574 -1713.91177491]
New Q values:  [26462.12527102 14905.94009845 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[24678.48811222 -6442.16912869 -8192.20126966 15220.92392914]
------
Step:5, Action:North
State  288
Old Q Values:  [24678.48811222 -6442.16912869 -8192.20126966 15220.92392914]
New Q values:  [17809.4328262  -6442.16912869 -8192.20126966 15220.92392914]
Reward: -1  Episode Reward:  15
xxxxx
x..gx
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26462.12527102 14905.94009845 -4584.50430574 -1713.91177491]
------
Step:6, Action:South
State  208
Old Q Values:  [26462.12527102 14905.94009845 -4584.50430574 -1713.91177491]
New Q values:  [26462.12527102 11304.60588724 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  14
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17809.4328262  -6442.16912869 -8192.20126966 15220.92392914]
------
Step:7, Action:North
State  288
Old Q Values:  [17809.4328262  -6442.16912869 -8192.20126966 15220.92392914]
New Q values:  [15061.81071179 -6442.16912869 -8192.20126966 15220.92392914]
Reward: -1  Episode Reward:  13
xxxxx
x..gx
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26462.12527102 11304.60588724 -4584.50430574 -1713.91177491]
------
Step:8, Action:South
State  208
Old Q Values:  [26462.12527102 11304.60588724 -4584.50430574 -1713.91177491]
New Q values:  [26462.12527102  9087.51953364 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  12
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15061.81071179 -6442.16912869 -8192.20126966 15220.92392914]
------
Step:9, Action:West
State  288
Old Q Values:  [15061.81071179 -6442.16912869 -8192.20126966 15220.92392914]
New Q values:  [15061.81071179 -6442.16912869 -8192.20126966 16955.96472706]
Reward: 9  Episode Reward:  21
xxxxx
x.. x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  9432.25292543 36207.31718469]
------
Step:10, Action:West
State  273
Old Q Values:  [2371.40351594 -168.92307549 2221.96636842 3110.15520138]
New Q values:  [2371.40351594 -168.92307549 2221.96636842 3228.31505486]
Reward: 9  Episode Reward:  30
xxxxx
x.. x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6596.17658103   26.73544252 2825.1749743   123.6214372 ]
------
Step:11, Action:North
State  257
Old Q Values:  [17747.92737161 12764.58618105  7438.53947475  1875.31501677]
New Q values:  [17151.73811864 12764.58618105  7438.53947475  1875.31501677]
Reward: -1  Episode Reward:  29
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 15108.81536615 30616.80990221     0.        ]
------
Step:12, Action:North
State  183
Old Q Values:  [1269.31546223 2930.54290165 6820.04741024 1554.80203889]
New Q values:  [2132.87760346 2930.54290165 6820.04741024 1554.80203889]
Reward: 9  Episode Reward:  38
xxxxx
xa. x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 5399.17139522  238.35800069    0.        ]
------
Step:13, Action:South
State  103
Old Q Values:  [ 221.30610858 5399.17139522  238.35800069    0.        ]
New Q values:  [ 221.30610858 4205.08278116  238.35800069    0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x . x
xa. x
x  gx
xxxxx
Step:14, Action:East
State  183
Old Q Values:  [2132.87760346 2930.54290165 6820.04741024 1554.80203889]
New Q values:  [2132.87760346 2930.54290165 6591.86144993 1554.80203889]
Reward: 9  Episode Reward:  46
xxxxx
x . x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.28614750e+04  1.03161518e+03]
------
Step:15, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  1.28614750e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  1.34672854e+04  1.03161518e+03]
Reward: -1  Episode Reward:  45
xxxxx
x . x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[27744.31798672  1127.85578529   790.72804752  1050.85266124]
------
Step:16, Action:North
State  210
Old Q Values:  [27744.31798672  1127.85578529   790.72804752  1050.85266124]
New Q values:  [49186.83276726  1127.85578529   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  44
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     5661.05765619   -180.00807518 126965.68524192]
------
Step:17, Action:West
State  130
Old Q Values:  [ 26266.584521     5661.05765619   -180.00807518 126965.68524192]
New Q values:  [ 26266.584521     5661.05765619   -180.00807518 151045.49678581]
Reward: 100009  Episode Reward:  100053
xxxxx
x a x
x   x
x  gx
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  160.62456153  3998.65267373 34161.57089155 -4966.32149798]
------
Step:1, Action:East
State  181
Old Q Values:  [ 1341.14060426  2450.39049527 14234.7790128    262.76946019]
New Q values:  [1341.14060426 2450.39049527 9296.01460985  262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4027.93373228 11989.01001575  3512.17261526]
------
Step:2, Action:East
State  200
Old Q Values:  [  169.9257398  24850.00035503  1700.0301054    568.38654082]
New Q values:  [  169.9257398  24850.00035503  1022.75074485   568.38654082]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  650.32111404  1124.46234232 -8220.10378799   637.30368728]
------
Step:3, Action:South
State  208
Old Q Values:  [26462.12527102  9087.51953364 -4584.50430574 -1713.91177491]
New Q values:  [26462.12527102  8727.19723157 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15061.81071179 -6442.16912869 -8192.20126966 16955.96472706]
------
Step:4, Action:West
State  288
Old Q Values:  [15061.81071179 -6442.16912869 -8192.20126966 16955.96472706]
New Q values:  [15061.81071179 -6442.16912869 -8192.20126966 17649.98104623]
Reward: 9  Episode Reward:  36
xxxxx
xg..x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  9432.25292543 36207.31718469]
------
Step:5, Action:West
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197 24702.30720332  1768.31338598]
New Q values:  [ 7058.83631802 -5807.06396197 24702.30720332  2691.5783287 ]
Reward: 9  Episode Reward:  45
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6596.17658103   26.73544252 2825.1749743   123.6214372 ]
------
Step:6, Action:North
State  261
Old Q Values:  [6596.17658103   26.73544252 2825.1749743   123.6214372 ]
New Q values:  [5426.67501537   26.73544252 2825.1749743   123.6214372 ]
Reward: -1  Episode Reward:  44
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1341.14060426 2450.39049527 9296.01460985  262.76946019]
------
Step:7, Action:South
State  183
Old Q Values:  [2132.87760346 2930.54290165 6591.86144993 1554.80203889]
New Q values:  [2132.87760346 2799.61966527 6591.86144993 1554.80203889]
Reward: -1  Episode Reward:  43
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5426.67501537   26.73544252 2825.1749743   123.6214372 ]
------
Step:8, Action:North
State  260
Old Q Values:  [ 2318.995573   -5704.51612281  2213.55787939 -5679.36893145]
New Q values:  [ 2356.1331786  -5704.51612281  2213.55787939 -5679.36893145]
Reward: -1  Episode Reward:  42
xxxxx
x ..x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  4763.78316466     0.        ]
------
Step:9, Action:East
State  183
Old Q Values:  [2132.87760346 2799.61966527 6591.86144993 1554.80203889]
New Q values:  [2132.87760346 2799.61966527 6676.33019311 1554.80203889]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.34672854e+04  1.03161518e+03]
------
Step:10, Action:East
State  195
Old Q Values:  [   38.85388605  6875.60421951 12736.42365992  1169.39963074]
New Q values:  [   38.85388605  6875.60421951 19850.01929415  1169.39963074]
Reward: -1  Episode Reward:  40
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[49186.83276726  1127.85578529   790.72804752  1050.85266124]
------
Step:11, Action:North
State  208
Old Q Values:  [26462.12527102  8727.19723157 -4584.50430574 -1713.91177491]
New Q values:  [55903.89914415  8727.19723157 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  49
xxxxx
x .ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     5661.05765619   -180.00807518 151045.49678581]
------
Step:12, Action:West
State  130
Old Q Values:  [ 26266.584521     5661.05765619   -180.00807518 151045.49678581]
New Q values:  [ 26266.584521     5661.05765619   -180.00807518 158122.85418535]
Reward: 100009  Episode Reward:  100058
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x...x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2356.1331786  -5704.51612281  2213.55787939 -5679.36893145]
------
Step:1, Action:East
State  260
Old Q Values:  [ 2356.1331786  -5704.51612281  2213.55787939 -5679.36893145]
New Q values:  [ 2356.1331786  -5704.51612281  8301.51531275 -5679.36893145]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197 24702.30720332  2691.5783287 ]
------
Step:2, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  9432.25292543 36207.31718469]
New Q values:  [  175.14749589 -8521.23367799  9073.29548404 36207.31718469]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15061.81071179 -6442.16912869 -8192.20126966 17649.98104623]
------
Step:3, Action:West
State  288
Old Q Values:  [15061.81071179 -6442.16912869 -8192.20126966 17649.98104623]
New Q values:  [15061.81071179 -6442.16912869 -8192.20126966 17921.5875739 ]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x ..x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  9073.29548404 36207.31718469]
------
Step:4, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  9073.29548404 36207.31718469]
New Q values:  [  175.14749589 -8521.23367799  9005.19446579 36207.31718469]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15061.81071179 -6442.16912869 -8192.20126966 17921.5875739 ]
------
Step:5, Action:West
State  288
Old Q Values:  [15061.81071179 -6442.16912869 -8192.20126966 17921.5875739 ]
New Q values:  [15061.81071179 -6442.16912869 -8192.20126966 18030.23018497]
Reward: -1  Episode Reward:  15
xxxxx
xg..x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  9005.19446579 36207.31718469]
------
Step:6, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  9005.19446579 36207.31718469]
New Q values:  [  175.14749589 -8521.23367799  9005.19446579 16110.32937849]
Reward: -1  Episode Reward:  14
xxxxx
x.g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5426.67501537   26.73544252 2825.1749743   123.6214372 ]
------
Step:7, Action:North
State  260
Old Q Values:  [ 2356.1331786  -5704.51612281  8301.51531275 -5679.36893145]
New Q values:  [11190.3245389  -5704.51612281  8301.51531275 -5679.36893145]
Reward: -1  Episode Reward:  13
xxxxx
xg..x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  160.62456153  3998.65267373 34161.57089155 -4966.32149798]
------
Step:8, Action:East
State  180
Old Q Values:  [  160.62456153  3998.65267373 34161.57089155 -4966.32149798]
New Q values:  [  160.62456153  3998.65267373 17266.73136135 -4966.32149798]
Reward: 9  Episode Reward:  22
xxxxx
xg..x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4027.93373228 11989.01001575  3512.17261526]
------
Step:9, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.31553016e+04 6.99839771e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 1.31553016e+04 4.47701606e+04 7.32028793e+03]
Reward: 9  Episode Reward:  31
xxxxx
x.g.x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[55903.89914415  8727.19723157 -4584.50430574 -1713.91177491]
------
Step:10, Action:North
State  208
Old Q Values:  [55903.89914415  8727.19723157 -4584.50430574 -1713.91177491]
New Q values:  [47938.53937387  8727.19723157 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  40
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[33808.52052267  8286.98116673 -8652.84       85238.59905405]
------
Step:11, Action:West
State  128
Old Q Values:  [33808.52052267  8286.98116673 -8652.84       85238.59905405]
New Q values:  [33808.52052267  8286.98116673 -8652.84       67385.76854049]
Reward: 9  Episode Reward:  49
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[     0.           3629.92591876  33200.89794593 110949.76306292]
------
Step:12, Action:East
State  112
Old Q Values:  [     0.           3629.92591876  33200.89794593 110949.76306292]
New Q values:  [     0.           3629.92591876  33495.48974052 110949.76306292]
Reward: -1  Episode Reward:  48
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[33808.52052267  8286.98116673 -8652.84       67385.76854049]
------
Step:13, Action:North
State  128
Old Q Values:  [33808.52052267  8286.98116673 -8652.84       67385.76854049]
New Q values:  [27558.53877122  8286.98116673 -8652.84       67385.76854049]
Reward: -10301  Episode Reward:  -10253
xxxxx
x. gx
x   x
x   x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  2.70650914e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  2.70650914e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.89631909e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524  1586.57181039]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   168.88098524  1586.57181039]
New Q values:  [ -281.736      -1150.91067548   168.88098524   914.75128037]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573  915.74185405  265.50261253 -180.6       ]
------
Step:3, Action:East
State  107
Old Q Values:  [-252.35169558  311.89931548  263.96319799 -252.78192178]
New Q values:  [-252.35169558  311.89931548  379.41066331 -252.78192178]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524   914.75128037]
------
Step:4, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4352.11501311  1238.81448118]
New Q values:  [-9594.56523706 -8069.05606225  4352.11501311  1024.49912872]
Reward: -1  Episode Reward:  16
xxxxx
xag x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1765.24445416 -2165.66138672  -180.6       ]
------
Step:5, Action:South
State  108
Old Q Values:  [-8463.16477134  3465.00733068  2172.0773843      0.        ]
New Q values:  [-8463.16477134  1968.68675445  2172.0773843      0.        ]
Reward: 9  Episode Reward:  25
xxxxx
xg  x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  1924.27940724  1485.98871489     0.        ]
------
Step:6, Action:South
State  188
Old Q Values:  [-6523.78898263  1924.27940724  1485.98871489     0.        ]
New Q values:  [-6523.78898263  4132.20912457  1485.98871489     0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x   x
xg. x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[11190.3245389  -5704.51612281  8301.51531275 -5679.36893145]
------
Step:7, Action:East
State  260
Old Q Values:  [11190.3245389  -5704.51612281  8301.51531275 -5679.36893145]
New Q values:  [11190.3245389  -5704.51612281  8159.10493865 -5679.36893145]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x . x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  9005.19446579 16110.32937849]
------
Step:8, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  9005.19446579 16110.32937849]
New Q values:  [  175.14749589 -8521.23367799  9016.54684181 16110.32937849]
Reward: 9  Episode Reward:  52
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15061.81071179 -6442.16912869 -8192.20126966 18030.23018497]
------
Step:9, Action:North
State  288
Old Q Values:  [15061.81071179 -6442.16912869 -8192.20126966 18030.23018497]
New Q values:  [20405.68609688 -6442.16912869 -8192.20126966 18030.23018497]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[47938.53937387  8727.19723157 -4584.50430574 -1713.91177491]
------
Step:10, Action:North
State  210
Old Q Values:  [49186.83276726  1127.85578529   790.72804752  1050.85266124]
New Q values:  [19851.02267955  1127.85578529   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.89631909e+02]
------
Step:11, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  5.89631909e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.12656556e+02]
Reward: -1  Episode Reward:  49
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   314.03785683   591.34597355]
------
Step:12, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   314.03785683   591.34597355]
New Q values:  [ -253.44886264 -1902.20915811   314.03785683  1707.25041982]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4904.37343466  598.65888748 -120.29354603]
------
Step:13, Action:South
State  99
Old Q Values:  [    0.         40163.80270646 51580.7370385      0.        ]
New Q values:  [    0.         26118.08825259 51580.7370385      0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 15108.81536615 30616.80990221     0.        ]
------
Step:14, Action:North
State  183
Old Q Values:  [2132.87760346 2799.61966527 6676.33019311 1554.80203889]
New Q values:  [2323.86307178 2799.61966527 6676.33019311 1554.80203889]
Reward: -1  Episode Reward:  46
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4904.37343466  598.65888748 -120.29354603]
------
Step:15, Action:South
State  110
Old Q Values:  [-239.29051573  915.74185405  265.50261253 -180.6       ]
New Q values:  [-239.29051573 1794.83169102  265.50261253 -180.6       ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  4763.78316466     0.        ]
------
Step:16, Action:East
State  179
Old Q Values:  [82228.67666629 16101.90751562 41171.926679       0.        ]
New Q values:  [82228.67666629 16101.90751562 80514.35628474     0.        ]
Reward: 100009  Episode Reward:  100054
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1794.83169102  265.50261253 -180.6       ]
------
Step:1, Action:East
State  108
Old Q Values:  [-8463.16477134  1968.68675445  2172.0773843      0.        ]
New Q values:  [-8463.16477134  1968.68675445  2179.86545765     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xga.x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  4352.11501311  1024.49912872]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4352.11501311  1024.49912872]
New Q values:  [-9594.56523706 -8069.05606225  8156.60401493  1024.49912872]
Reward: 9  Episode Reward:  18
xxxxx
x gax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351 21367.86003227 -2383.80019164  2071.50085392]
------
Step:3, Action:South
State  136
Old Q Values:  [ -170.77177351 21367.86003227 -2383.80019164  2071.50085392]
New Q values:  [ -170.77177351  8889.8827156  -2383.80019164  2071.50085392]
Reward: 9  Episode Reward:  27
xxxxx
x  gx
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  650.32111404  1124.46234232 -8220.10378799   637.30368728]
------
Step:4, Action:South
State  216
Old Q Values:  [  650.32111404  1124.46234232 -8220.10378799   637.30368728]
New Q values:  [  650.32111404  6576.89076599 -8220.10378799   637.30368728]
Reward: 9  Episode Reward:  36
xxxxx
x g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20405.68609688 -6442.16912869 -8192.20126966 18030.23018497]
------
Step:5, Action:North
State  288
Old Q Values:  [20405.68609688 -6442.16912869 -8192.20126966 18030.23018497]
New Q values:  [10134.74166855 -6442.16912869 -8192.20126966 18030.23018497]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  650.32111404  6576.89076599 -8220.10378799   637.30368728]
------
Step:6, Action:South
State  216
Old Q Values:  [  650.32111404  6576.89076599 -8220.10378799   637.30368728]
New Q values:  [  650.32111404  8039.22536189 -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x .gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10134.74166855 -6442.16912869 -8192.20126966 18030.23018497]
------
Step:7, Action:West
State  288
Old Q Values:  [10134.74166855 -6442.16912869 -8192.20126966 18030.23018497]
New Q values:  [10134.74166855 -6442.16912869 -8192.20126966 12050.59088753]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  9016.54684181 16110.32937849]
------
Step:8, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  9016.54684181 16110.32937849]
New Q values:  [  175.14749589 -8521.23367799  9016.54684181  9806.62911307]
Reward: 9  Episode Reward:  52
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[11190.3245389  -5704.51612281  8159.10493865 -5679.36893145]
------
Step:9, Action:East
State  260
Old Q Values:  [11190.3245389  -5704.51612281  8159.10493865 -5679.36893145]
New Q values:  [11190.3245389  -5704.51612281  6205.03070938 -5679.36893145]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  9016.54684181  9806.62911307]
------
Step:10, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  9016.54684181  9806.62911307]
New Q values:  [  175.14749589 -8521.23367799  7221.19600298  9806.62911307]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10134.74166855 -6442.16912869 -8192.20126966 12050.59088753]
------
Step:11, Action:North
State  288
Old Q Values:  [10134.74166855 -6442.16912869 -8192.20126966 12050.59088753]
New Q values:  [18434.85847958 -6442.16912869 -8192.20126966 12050.59088753]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[47938.53937387  8727.19723157 -4584.50430574 -1713.91177491]
------
Step:12, Action:North
State  210
Old Q Values:  [19851.02267955  1127.85578529   790.72804752  1050.85266124]
New Q values:  [8063.6060385  1127.85578529  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  48
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.12656556e+02]
------
Step:13, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  4.12656556e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  6.76637748e+02]
Reward: -1  Episode Reward:  47
xxxxx
x a x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   314.03785683  1707.25041982]
------
Step:14, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   168.88098524   914.75128037]
New Q values:  [ -281.736      -1150.91067548   168.88098524  1836.61254255]
Reward: -1  Episode Reward:  46
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4904.37343466  598.65888748 -120.29354603]
------
Step:15, Action:South
State  110
Old Q Values:  [-239.29051573 1794.83169102  265.50261253 -180.6       ]
New Q values:  [-239.29051573 2146.46762581  265.50261253 -180.6       ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  4763.78316466     0.        ]
------
Step:16, Action:East
State  179
Old Q Values:  [82228.67666629 16101.90751562 80514.35628474     0.        ]
New Q values:  [82228.67666629 16101.90751562 96251.32812703     0.        ]
Reward: 100009  Episode Reward:  100054
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18434.85847958 -6442.16912869 -8192.20126966 12050.59088753]
------
Step:1, Action:North
State  288
Old Q Values:  [18434.85847958 -6442.16912869 -8192.20126966 12050.59088753]
New Q values:  [21760.90520399 -6442.16912869 -8192.20126966 12050.59088753]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[47938.53937387  8727.19723157 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  208
Old Q Values:  [47938.53937387  8727.19723157 -4584.50430574 -1713.91177491]
New Q values:  [66617.67200515  8727.19723157 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     5661.05765619   -180.00807518 158122.85418535]
------
Step:3, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  6.76637748e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  8.27038862e+02]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   168.88098524  1836.61254255]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   168.88098524  1836.61254255]
New Q values:  [ -281.736      -1150.91067548   168.88098524   853.86821601]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x.g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  311.89931548  379.41066331 -252.78192178]
------
Step:5, Action:East
State  111
Old Q Values:  [-177.44732869 4904.37343466  598.65888748 -120.29354603]
New Q values:  [-177.44732869 4904.37343466  751.03868094 -120.29354603]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x..gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   314.03785683  1707.25041982]
------
Step:6, Action:West
State  121
Old Q Values:  [    0.             0.         -8255.55694753   415.3875037 ]
New Q values:  [    0.             0.         -8255.55694753   695.12833773]
Reward: -1  Episode Reward:  34
xxxxx
xa gx
x.. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1765.24445416 -2165.66138672  -180.6       ]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869 4904.37343466  751.03868094 -120.29354603]
New Q values:  [-177.44732869 4755.95375682  751.03868094 -120.29354603]
Reward: 9  Episode Reward:  43
xxxxx
x   x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1341.14060426 2450.39049527 9296.01460985  262.76946019]
------
Step:8, Action:East
State  189
Old Q Values:  [ 275.08817949 1438.8649519  1653.51798225  154.04646645]
New Q values:  [ 275.08817949 1438.8649519  1324.38184069  154.04646645]
Reward: 9  Episode Reward:  52
xxxxx
x  gx
x a x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[   2.33354578 2191.91549263 -501.63979658  529.38906334]
------
Step:9, Action:South
State  193
Old Q Values:  [-5922.26708831  5141.37099082 26135.65265282  1460.9765133 ]
New Q values:  [-5922.26708831 63030.44291279 26135.65265282  1460.9765133 ]
Reward: 100009  Episode Reward:  100061
xxxxx
x   x
x  gx
x a x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1341.14060426 2450.39049527 9296.01460985  262.76946019]
------
Step:1, Action:East
State  189
Old Q Values:  [ 275.08817949 1438.8649519  1324.38184069  154.04646645]
New Q values:  [  275.08817949  1438.8649519  19444.28561011   154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 63030.44291279 26135.65265282  1460.9765133 ]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831 63030.44291279 26135.65265282  1460.9765133 ]
New Q values:  [-5922.26708831 26186.07168157 26135.65265282  1460.9765133 ]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2371.40351594 -168.92307549 2221.96636842 3228.31505486]
------
Step:3, Action:West
State  273
Old Q Values:  [2371.40351594 -168.92307549 2221.96636842 3228.31505486]
New Q values:  [2371.40351594 -168.92307549 2221.96636842 2924.72852655]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5426.67501537   26.73544252 2825.1749743   123.6214372 ]
------
Step:4, Action:North
State  261
Old Q Values:  [5426.67501537   26.73544252 2825.1749743   123.6214372 ]
New Q values:  [4172.96906408   26.73544252 2825.1749743   123.6214372 ]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2323.86307178 2799.61966527 6676.33019311 1554.80203889]
------
Step:5, Action:East
State  177
Old Q Values:  [33510.55723334 15108.81536615 30616.80990221     0.        ]
New Q values:  [33510.55723334 15108.81536615 20101.94546536     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 26186.07168157 26135.65265282  1460.9765133 ]
------
Step:6, Action:South
State  195
Old Q Values:  [   38.85388605  6875.60421951 19850.01929415  1169.39963074]
New Q values:  [   38.85388605  3627.06024577 19850.01929415  1169.39963074]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2371.40351594 -168.92307549 2221.96636842 2924.72852655]
------
Step:7, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  7221.19600298  9806.62911307]
New Q values:  [  175.14749589 -8521.23367799  7221.19600298  5173.94236445]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4172.96906408   26.73544252 2825.1749743   123.6214372 ]
------
Step:8, Action:North
State  260
Old Q Values:  [11190.3245389  -5704.51612281  6205.03070938 -5679.36893145]
New Q values:  [ 5904.66476496 -5704.51612281  6205.03070938 -5679.36893145]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
xa .x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  4763.78316466     0.        ]
------
Step:9, Action:East
State  180
Old Q Values:  [  160.62456153  3998.65267373 17266.73136135 -4966.32149798]
New Q values:  [  160.62456153  3998.65267373 20337.14072518 -4966.32149798]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.31553016e+04 4.47701606e+04 7.32028793e+03]
------
Step:10, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  1.34672854e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  7.81139596e+03  1.03161518e+03]
Reward: 9  Episode Reward:  30
xxxxx
x.. x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8063.6060385  1127.85578529  790.72804752 1050.85266124]
------
Step:11, Action:North
State  208
Old Q Values:  [66617.67200515  8727.19723157 -4584.50430574 -1713.91177491]
New Q values:  [74083.32505767  8727.19723157 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  29
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     5661.05765619   -180.00807518 158122.85418535]
------
Step:12, Action:West
State  130
Old Q Values:  [ 26266.584521     5661.05765619   -180.00807518 158122.85418535]
New Q values:  [ 26266.584521     5661.05765619   -180.00807518 100953.79714516]
Reward: 9  Episode Reward:  38
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036    4378.49673879 125664.18490342]
------
Step:13, Action:West
State  126
Old Q Values:  [   0.          331.64678262  730.59172742 1631.56081097]
New Q values:  [   0.          331.64678262  730.59172742 1301.96461213]
Reward: 9  Episode Reward:  47
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 2146.46762581  265.50261253 -180.6       ]
------
Step:14, Action:East
State  108
Old Q Values:  [-8463.16477134  1968.68675445  2179.86545765     0.        ]
New Q values:  [-8463.16477134  1968.68675445  2058.93231779     0.        ]
Reward: -1  Episode Reward:  46
xxxxx
xga x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 3958.62044909 1772.94838375]
------
Step:15, Action:East
State  126
Old Q Values:  [   0.          331.64678262  730.59172742 1301.96461213]
New Q values:  [   0.          331.64678262  539.74834958 1301.96461213]
Reward: -1  Episode Reward:  45
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  8.27038862e+02]
------
Step:16, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  8.27038862e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  7.20804928e+02]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262  539.74834958 1301.96461213]
------
Step:17, Action:West
State  127
Old Q Values:  [  0.           1.67014986 895.48700012 702.27198793]
New Q values:  [0.00000000e+00 1.67014986e+00 8.95487000e+02 1.70709492e+03]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4755.95375682  751.03868094 -120.29354603]
------
Step:18, Action:South
State  99
Old Q Values:  [    0.         26118.08825259 51580.7370385      0.        ]
New Q values:  [    0.         39322.03373914 51580.7370385      0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[82228.67666629 16101.90751562 96251.32812703     0.        ]
------
Step:19, Action:East
State  191
Old Q Values:  [   3.06655861  941.79461398 1151.91422091    0.        ]
New Q values:  [   3.06655861  941.79461398 1211.41037433    0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[   0.         2504.14895323 1884.45205023    0.        ]
------
Step:20, Action:East
State  206
Old Q Values:  [   0.         2504.14895323 1884.45205023    0.        ]
New Q values:  [   0.         2504.14895323 1418.80979599    0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2218.76325298 2184.8079221     0.         1847.21017375]
------
Step:21, Action:North
State  216
Old Q Values:  [  650.32111404  8039.22536189 -8220.10378799   637.30368728]
New Q values:  [  475.76992415  8039.22536189 -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  7.20804928e+02]
------
Step:22, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  7.20804928e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  6.78311355e+02]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262  539.74834958 1301.96461213]
------
Step:23, Action:West
State  126
Old Q Values:  [   0.          331.64678262  539.74834958 1301.96461213]
New Q values:  [   0.          331.64678262  539.74834958 1946.9719719 ]
Reward: -1  Episode Reward:  37
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4755.95375682  751.03868094 -120.29354603]
------
Step:24, Action:South
State  111
Old Q Values:  [-177.44732869 4755.95375682  751.03868094 -120.29354603]
New Q values:  [-177.44732869 7735.06718576  751.03868094 -120.29354603]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  275.08817949  1438.8649519  19444.28561011   154.04646645]
------
Step:25, Action:South
State  189
Old Q Values:  [  275.08817949  1438.8649519  19444.28561011   154.04646645]
New Q values:  [  275.08817949  1826.83669998 19444.28561011   154.04646645]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4172.96906408   26.73544252 2825.1749743   123.6214372 ]
------
Step:26, Action:North
State  261
Old Q Values:  [4172.96906408   26.73544252 2825.1749743   123.6214372 ]
New Q values:  [7501.87330867   26.73544252 2825.1749743   123.6214372 ]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  275.08817949  1826.83669998 19444.28561011   154.04646645]
------
Step:27, Action:East
State  189
Old Q Values:  [  275.08817949  1826.83669998 19444.28561011   154.04646645]
New Q values:  [ 275.08817949 1826.83669998 8879.56680328  154.04646645]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         2790.09447719 3674.84186411  441.58769553]
------
Step:28, Action:East
State  204
Old Q Values:  [   0.         2790.09447719 3674.84186411  441.58769553]
New Q values:  [   0.         2790.09447719 3881.10435421  441.58769553]
Reward: -1  Episode Reward:  32
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  475.76992415  8039.22536189 -8220.10378799   637.30368728]
------
Step:29, Action:South
State  208
Old Q Values:  [74083.32505767  8727.19723157 -4584.50430574 -1713.91177491]
New Q values:  [74083.32505767 70024.55045383 -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100041
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8063.6060385  1127.85578529  790.72804752 1050.85266124]
------
Step:1, Action:North
State  208
Old Q Values:  [74083.32505767 70024.55045383 -4584.50430574 -1713.91177491]
New Q values:  [29842.22342957 70024.55045383 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  6.78311355e+02]
------
Step:2, Action:West
State  136
Old Q Values:  [ -170.77177351  8889.8827156  -2383.80019164  2071.50085392]
New Q values:  [ -170.77177351  8889.8827156  -2383.80019164 -2719.01845396]
Reward: -9991  Episode Reward:  -9982
xxxxx
x.g x
x.. x
x. .x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 2146.46762581  265.50261253 -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869 7735.06718576  751.03868094 -120.29354603]
New Q values:  [-177.44732869 5102.32593224  751.03868094 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2323.86307178 2799.61966527 6676.33019311 1554.80203889]
------
Step:2, Action:East
State  181
Old Q Values:  [1341.14060426 2450.39049527 9296.01460985  262.76946019]
New Q values:  [1341.14060426 2450.39049527 1320.50884867  262.76946019]
Reward: -9991  Episode Reward:  -9982
xxxxx
x ..x
x g.x
x ..x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  7.81139596e+03  1.03161518e+03]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  7.81139596e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  5.54904020e+03  1.03161518e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8063.6060385  1127.85578529  790.72804752 1050.85266124]
------
Step:2, Action:North
State  208
Old Q Values:  [29842.22342957 70024.55045383 -4584.50430574 -1713.91177491]
New Q values:  [42228.42851538 70024.55045383 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     5661.05765619   -180.00807518 100953.79714516]
------
Step:3, Action:West
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  6.78311355e+02]
New Q values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.79759800e+04]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036    4378.49673879 125664.18490342]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   168.88098524   853.86821601]
New Q values:  [ -281.736      -1150.91067548   168.88098524   418.44451863]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   238.32410743  -180.6       ]
------
Step:5, Action:East
State  104
Old Q Values:  [-8652.84        2246.80188749   797.27438942 -8652.84      ]
New Q values:  [-8652.84        2246.80188749  2765.29096025 -8652.84      ]
Reward: -1  Episode Reward:  35
xxxxx
xga x
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  8156.60401493  1024.49912872]
------
Step:6, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  8156.60401493  1024.49912872]
New Q values:  [-9594.56523706 -8069.05606225  5929.00642065  1024.49912872]
Reward: -1  Episode Reward:  34
xxxxx
x gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  8889.8827156  -2383.80019164 -2719.01845396]
------
Step:7, Action:South
State  136
Old Q Values:  [ -170.77177351  8889.8827156  -2383.80019164 -2719.01845396]
New Q values:  [ -170.77177351  5967.12069481 -2383.80019164 -2719.01845396]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
x. ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  475.76992415  8039.22536189 -8220.10378799   637.30368728]
------
Step:8, Action:South
State  208
Old Q Values:  [42228.42851538 70024.55045383 -4584.50430574 -1713.91177491]
New Q values:  [42228.42851538 34543.49174273 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21760.90520399 -6442.16912869 -8192.20126966 12050.59088753]
------
Step:9, Action:West
State  288
Old Q Values:  [21760.90520399 -6442.16912869 -8192.20126966 12050.59088753]
New Q values:  [21760.90520399 -6442.16912869 -8192.20126966  5697.05491298]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2371.40351594 -168.92307549 2221.96636842 2924.72852655]
------
Step:10, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  7221.19600298  5173.94236445]
New Q values:  [  175.14749589 -8521.23367799  7221.19600298  7220.49838137]
Reward: 9  Episode Reward:  50
xxxxx
x g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[17151.73811864 12764.58618105  7438.53947475  1875.31501677]
------
Step:11, Action:North
State  257
Old Q Values:  [17151.73811864 12764.58618105  7438.53947475  1875.31501677]
New Q values:  [76919.26241746 12764.58618105  7438.53947475  1875.31501677]
Reward: 100009  Episode Reward:  100059
xxxxx
x   x
xag x
x   x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21760.90520399 -6442.16912869 -8192.20126966  5697.05491298]
------
Step:1, Action:North
State  288
Old Q Values:  [21760.90520399 -6442.16912869 -8192.20126966  5697.05491298]
New Q values:  [21378.29063621 -6442.16912869 -8192.20126966  5697.05491298]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[42228.42851538 34543.49174273 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  208
Old Q Values:  [42228.42851538 34543.49174273 -4584.50430574 -1713.91177491]
New Q values:  [28289.56541006 34543.49174273 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.79759800e+04]
------
Step:3, Action:West
State  136
Old Q Values:  [ -170.77177351  5967.12069481 -2383.80019164 -2719.01845396]
New Q values:  [ -170.77177351  5967.12069481 -2383.80019164 -5303.50545539]
Reward: -9991  Episode Reward:  -9973
xxxxx
x g x
x.. x
x.. x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
Step:1, Action:North
State  260
Old Q Values:  [ 5904.66476496 -5704.51612281  6205.03070938 -5679.36893145]
New Q values:  [ 2468.40812354 -5704.51612281  6205.03070938 -5679.36893145]
Reward: -9991  Episode Reward:  -9991
xxxxx
x ..x
xg..x
x ..x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  3627.06024577 19850.01929415  1169.39963074]
------
Step:1, Action:East
State  193
Old Q Values:  [-5922.26708831 26186.07168157 26135.65265282  1460.9765133 ]
New Q values:  [-5922.26708831 26186.07168157 14822.70858395  1460.9765133 ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. gx
x.. x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.79759800e+04]
------
Step:1, Action:West
State  136
Old Q Values:  [ -170.77177351  5967.12069481 -2383.80019164 -5303.50545539]
New Q values:  [ -170.77177351  5967.12069481 -2383.80019164  -337.30025596]
Reward: 9  Episode Reward:  9
xxxxx
xga x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  5929.00642065  1024.49912872]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  5929.00642065  1024.49912872]
New Q values:  [-9594.56523706 -8069.05606225  4161.1387767   1024.49912872]
Reward: -1  Episode Reward:  8
xxxxx
x.gax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5967.12069481 -2383.80019164  -337.30025596]
------
Step:3, Action:South
State  138
Old Q Values:  [ 8.43634063e+00 -1.15722297e+03 -3.22965309e-01  3.79759800e+04]
New Q values:  [ 8.43634063e+00  1.95427842e+03 -3.22965309e-01  3.79759800e+04]
Reward: 9  Episode Reward:  17
xxxxx
x.  x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  475.76992415  8039.22536189 -8220.10378799   637.30368728]
------
Step:4, Action:South
State  208
Old Q Values:  [28289.56541006 34543.49174273 -4584.50430574 -1713.91177491]
New Q values:  [28289.56541006 20236.28388796 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  26
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21378.29063621 -6442.16912869 -8192.20126966  5697.05491298]
------
Step:5, Action:North
State  288
Old Q Values:  [21378.29063621 -6442.16912869 -8192.20126966  5697.05491298]
New Q values:  [17037.5858775  -6442.16912869 -8192.20126966  5697.05491298]
Reward: -1  Episode Reward:  25
xxxxx
x. gx
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28289.56541006 20236.28388796 -4584.50430574 -1713.91177491]
------
Step:6, Action:South
State  208
Old Q Values:  [28289.56541006 20236.28388796 -4584.50430574 -1713.91177491]
New Q values:  [28289.56541006 13205.18931843 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x .gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17037.5858775  -6442.16912869 -8192.20126966  5697.05491298]
------
Step:7, Action:West
State  288
Old Q Values:  [17037.5858775  -6442.16912869 -8192.20126966  5697.05491298]
New Q values:  [17037.5858775  -6442.16912869 -8192.20126966  4450.58076609]
Reward: 9  Episode Reward:  33
xxxxx
x.  x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  7221.19600298  7220.49838137]
------
Step:8, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  7221.19600298  7220.49838137]
New Q values:  [  175.14749589 -8521.23367799  7999.15416444  7220.49838137]
Reward: -1  Episode Reward:  32
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17037.5858775  -6442.16912869 -8192.20126966  4450.58076609]
------
Step:9, Action:North
State  288
Old Q Values:  [17037.5858775  -6442.16912869 -8192.20126966  4450.58076609]
New Q values:  [15301.30397402 -6442.16912869 -8192.20126966  4450.58076609]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28289.56541006 13205.18931843 -4584.50430574 -1713.91177491]
------
Step:10, Action:North
State  208
Old Q Values:  [28289.56541006 13205.18931843 -4584.50430574 -1713.91177491]
New Q values:  [22708.02016793 13205.18931843 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  30
xxxxx
x. ax
x .gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.95427842e+03 -3.22965309e-01  3.79759800e+04]
------
Step:11, Action:West
State  130
Old Q Values:  [ 26266.584521     5661.05765619   -180.00807518 100953.79714516]
New Q values:  [26266.584521    5661.05765619  -180.00807518 78080.17432909]
Reward: -1  Episode Reward:  29
xxxxx
x.a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036    4378.49673879 125664.18490342]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   168.88098524   418.44451863]
New Q values:  [ -281.736      -1150.91067548   168.88098524   244.27503968]
Reward: 9  Episode Reward:  38
xxxxx
xa  x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   238.32410743  -180.6       ]
------
Step:13, Action:East
State  104
Old Q Values:  [-8652.84        2246.80188749  2765.29096025 -8652.84      ]
New Q values:  [-8652.84        2246.80188749  2353.85801711 -8652.84      ]
Reward: -1  Episode Reward:  37
xxxxx
xga x
x . x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  4161.1387767   1024.49912872]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   168.88098524   244.27503968]
New Q values:  [ -281.736      -1150.91067548 11459.74639801   244.27503968]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.95427842e+03 -3.22965309e-01  3.79759800e+04]
------
Step:15, Action:West
State  136
Old Q Values:  [ -170.77177351  5967.12069481 -2383.80019164  -337.30025596]
New Q values:  [ -170.77177351  5967.12069481 -2383.80019164  1112.82153063]
Reward: -1  Episode Reward:  35
xxxxx
xga x
x . x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  4161.1387767   1024.49912872]
------
Step:16, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4161.1387767   1024.49912872]
New Q values:  [-9594.56523706 -8069.05606225  3453.99171912  1024.49912872]
Reward: -1  Episode Reward:  34
xxxxx
x gax
x . x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5967.12069481 -2383.80019164  1112.82153063]
------
Step:17, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.95427842e+03 -3.22965309e-01  3.79759800e+04]
New Q values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  3.79759800e+04]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  475.76992415  8039.22536189 -8220.10378799   637.30368728]
------
Step:18, Action:South
State  208
Old Q Values:  [22708.02016793 13205.18931843 -4584.50430574 -1713.91177491]
New Q values:  [22708.02016793  9871.86691958 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x . x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15301.30397402 -6442.16912869 -8192.20126966  4450.58076609]
------
Step:19, Action:North
State  288
Old Q Values:  [15301.30397402 -6442.16912869 -8192.20126966  4450.58076609]
New Q values:  [ 8531.68919817 -6442.16912869 -8192.20126966  4450.58076609]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  475.76992415  8039.22536189 -8220.10378799   637.30368728]
------
Step:20, Action:South
State  208
Old Q Values:  [22708.02016793  9871.86691958 -4584.50430574 -1713.91177491]
New Q values:  [22708.02016793  6507.65352728 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8531.68919817 -6442.16912869 -8192.20126966  4450.58076609]
------
Step:21, Action:North
State  288
Old Q Values:  [ 8531.68919817 -6442.16912869 -8192.20126966  4450.58076609]
New Q values:  [ 5831.15749082 -6442.16912869 -8192.20126966  4450.58076609]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8063.6060385  1127.85578529  790.72804752 1050.85266124]
------
Step:22, Action:North
State  210
Old Q Values:  [8063.6060385  1127.85578529  790.72804752 1050.85266124]
New Q values:  [14617.63641931  1127.85578529   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  3.79759800e+04]
------
Step:23, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  3.79759800e+04]
New Q values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  1.57019671e+04]
Reward: -1  Episode Reward:  27
xxxxx
x a x
x . x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   314.03785683  1707.25041982]
------
Step:24, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   314.03785683  1707.25041982]
New Q values:  [ -253.44886264 -1902.20915811   314.03785683   796.12336692]
Reward: -1  Episode Reward:  26
xxxxx
xa  x
x .gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  311.89931548  379.41066331 -252.78192178]
------
Step:25, Action:East
State  105
Old Q Values:  [ -180.6          125.3622648  -4615.35224813     0.        ]
New Q values:  [ -180.6          125.3622648  -1638.20239793     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x agx
x . x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8255.55694753   695.12833773]
------
Step:26, Action:West
State  121
Old Q Values:  [    0.             0.         -8255.55694753   695.12833773]
New Q values:  [    0.             0.         -8255.55694753   315.06001453]
Reward: -1  Episode Reward:  24
xxxxx
xa gx
x . x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6          125.3622648  -1638.20239793     0.        ]
------
Step:27, Action:South
State  105
Old Q Values:  [ -180.6          125.3622648  -1638.20239793     0.        ]
New Q values:  [ -180.6          784.6620545  -1638.20239793     0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x g x
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1341.14060426 2450.39049527 1320.50884867  262.76946019]
------
Step:28, Action:South
State  177
Old Q Values:  [33510.55723334 15108.81536615 20101.94546536     0.        ]
New Q values:  [33510.55723334 29124.7048717  20101.94546536     0.        ]
Reward: 9  Episode Reward:  32
xxxxx
x   x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[76919.26241746 12764.58618105  7438.53947475  1875.31501677]
------
Step:29, Action:North
State  261
Old Q Values:  [7501.87330867   26.73544252 2825.1749743   123.6214372 ]
New Q values:  [3735.26647205   26.73544252 2825.1749743   123.6214372 ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1341.14060426 2450.39049527 1320.50884867  262.76946019]
------
Step:30, Action:South
State  181
Old Q Values:  [1341.14060426 2450.39049527 1320.50884867  262.76946019]
New Q values:  [1341.14060426 2100.13613972 1320.50884867  262.76946019]
Reward: -1  Episode Reward:  30
xxxxx
x  gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3735.26647205   26.73544252 2825.1749743   123.6214372 ]
------
Step:31, Action:North
State  261
Old Q Values:  [3735.26647205   26.73544252 2825.1749743   123.6214372 ]
New Q values:  [2123.54743074   26.73544252 2825.1749743   123.6214372 ]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1341.14060426 2100.13613972 1320.50884867  262.76946019]
------
Step:32, Action:South
State  183
Old Q Values:  [2323.86307178 2799.61966527 6676.33019311 1554.80203889]
New Q values:  [2323.86307178 1966.8003584  6676.33019311 1554.80203889]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x . x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2123.54743074   26.73544252 2825.1749743   123.6214372 ]
------
Step:33, Action:East
State  261
Old Q Values:  [2123.54743074   26.73544252 2825.1749743   123.6214372 ]
New Q values:  [2123.54743074   26.73544252 2006.88854769  123.6214372 ]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2371.40351594 -168.92307549 2221.96636842 2924.72852655]
------
Step:34, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  7999.15416444  7220.49838137]
New Q values:  [  175.14749589 -8521.23367799  7999.15416444 25963.37807779]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[76919.26241746 12764.58618105  7438.53947475  1875.31501677]
------
Step:35, Action:North
State  261
Old Q Values:  [2123.54743074   26.73544252 2006.88854769  123.6214372 ]
New Q values:  [2851.71803023   26.73544252 2006.88854769  123.6214372 ]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2323.86307178 1966.8003584  6676.33019311 1554.80203889]
------
Step:36, Action:East
State  178
Old Q Values:  [    0.             0.         87863.23867435     0.        ]
New Q values:  [    0.             0.         96815.40752869     0.        ]
Reward: 100009  Episode Reward:  100034
xxxxx
x   x
x a x
xg  x
xxxxx
xxxxx
x...x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2468.40812354 -5704.51612281  6205.03070938 -5679.36893145]
------
Step:1, Action:East
State  260
Old Q Values:  [ 2468.40812354 -5704.51612281  6205.03070938 -5679.36893145]
New Q values:  [ 2468.40812354 -5704.51612281  9898.10444475 -5679.36893145]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197 24702.30720332  2691.5783287 ]
------
Step:2, Action:East
State  272
Old Q Values:  [  175.14749589 -8521.23367799  7999.15416444 25963.37807779]
New Q values:  [  175.14749589 -8521.23367799  4954.40891302 25963.37807779]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5831.15749082 -6442.16912869 -8192.20126966  4450.58076609]
------
Step:3, Action:North
State  288
Old Q Values:  [ 5831.15749082 -6442.16912869 -8192.20126966  4450.58076609]
New Q values:  [ 6723.15392212 -6442.16912869 -8192.20126966  4450.58076609]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[14617.63641931  1127.85578529   790.72804752  1050.85266124]
------
Step:4, Action:North
State  210
Old Q Values:  [14617.63641931  1127.85578529   790.72804752  1050.85266124]
New Q values:  [29276.50686645  1127.85578529   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 78080.17432909]
------
Step:5, Action:West
State  130
Old Q Values:  [26266.584521    5661.05765619  -180.00807518 78080.17432909]
New Q values:  [26266.584521    5661.05765619  -180.00807518 68936.72520266]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036    4378.49673879 125664.18490342]
------
Step:6, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   314.03785683   796.12336692]
New Q values:  [ -253.44886264 -1902.20915811   314.03785683  1854.54712644]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5102.32593224  751.03868094 -120.29354603]
------
Step:7, Action:South
State  109
Old Q Values:  [ -241.10880094  1765.24445416 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1335.53862358 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  53
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1341.14060426 2100.13613972 1320.50884867  262.76946019]
------
Step:8, Action:South
State  181
Old Q Values:  [1341.14060426 2100.13613972 1320.50884867  262.76946019]
New Q values:  [1341.14060426 1694.96986496 1320.50884867  262.76946019]
Reward: -1  Episode Reward:  52
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2851.71803023   26.73544252 2006.88854769  123.6214372 ]
------
Step:9, Action:North
State  261
Old Q Values:  [2851.71803023   26.73544252 2006.88854769  123.6214372 ]
New Q values:  [1648.57817158   26.73544252 2006.88854769  123.6214372 ]
Reward: -1  Episode Reward:  51
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1341.14060426 1694.96986496 1320.50884867  262.76946019]
------
Step:10, Action:South
State  181
Old Q Values:  [1341.14060426 1694.96986496 1320.50884867  262.76946019]
New Q values:  [1341.14060426 1279.45451029 1320.50884867  262.76946019]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1648.57817158   26.73544252 2006.88854769  123.6214372 ]
------
Step:11, Action:East
State  261
Old Q Values:  [1648.57817158   26.73544252 2006.88854769  123.6214372 ]
New Q values:  [1648.57817158   26.73544252 1679.57397704  123.6214372 ]
Reward: -1  Episode Reward:  49
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2371.40351594 -168.92307549 2221.96636842 2924.72852655]
------
Step:12, Action:West
State  273
Old Q Values:  [2371.40351594 -168.92307549 2221.96636842 2924.72852655]
New Q values:  [2371.40351594 -168.92307549 2221.96636842 1673.16360373]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1648.57817158   26.73544252 1679.57397704  123.6214372 ]
------
Step:13, Action:East
State  257
Old Q Values:  [76919.26241746 12764.58618105  7438.53947475  1875.31501677]
New Q values:  [76919.26241746 12764.58618105 10763.82921323  1875.31501677]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  4954.40891302 25963.37807779]
------
Step:14, Action:West
State  273
Old Q Values:  [2371.40351594 -168.92307549 2221.96636842 1673.16360373]
New Q values:  [2371.40351594 -168.92307549 2221.96636842 1172.53763461]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1648.57817158   26.73544252 1679.57397704  123.6214372 ]
------
Step:15, Action:East
State  257
Old Q Values:  [76919.26241746 12764.58618105 10763.82921323  1875.31501677]
New Q values:  [76919.26241746 12764.58618105 12093.94510863  1875.31501677]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  4954.40891302 25963.37807779]
------
Step:16, Action:West
State  273
Old Q Values:  [2371.40351594 -168.92307549 2221.96636842 1172.53763461]
New Q values:  [2371.40351594 -168.92307549 2221.96636842  972.28724695]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1648.57817158   26.73544252 1679.57397704  123.6214372 ]
------
Step:17, Action:East
State  261
Old Q Values:  [1648.57817158   26.73544252 1679.57397704  123.6214372 ]
New Q values:  [1648.57817158   26.73544252 1382.6506456   123.6214372 ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2371.40351594 -168.92307549 2221.96636842  972.28724695]
------
Step:18, Action:North
State  273
Old Q Values:  [2371.40351594 -168.92307549 2221.96636842  972.28724695]
New Q values:  [68809.78291085  -168.92307549  2221.96636842   972.28724695]
Reward: 100009  Episode Reward:  100052
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2323.86307178 1966.8003584  6676.33019311 1554.80203889]
------
Step:1, Action:East
State  183
Old Q Values:  [2323.86307178 1966.8003584  6676.33019311 1554.80203889]
New Q values:  [2323.86307178 1966.8003584  8630.93786549 1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  3627.06024577 19850.01929415  1169.39963074]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  5.54904020e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  1.10079681e+04  1.03161518e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[29276.50686645  1127.85578529   790.72804752  1050.85266124]
------
Step:3, Action:North
State  210
Old Q Values:  [29276.50686645  1127.85578529   790.72804752  1050.85266124]
New Q values:  [32397.02030738  1127.85578529   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 68936.72520266]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  1.57019671e+04]
New Q values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  4.65400095e+04]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 134179.40896346]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   314.03785683  1854.54712644]
New Q values:  [ -253.44886264 -1902.20915811   314.03785683   861.04204957]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  311.89931548  379.41066331 -252.78192178]
------
Step:6, Action:East
State  107
Old Q Values:  [-252.35169558  311.89931548  379.41066331 -252.78192178]
New Q values:  [-252.35169558  311.89931548 3589.08818473 -252.78192178]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 11459.74639801   244.27503968]
------
Step:7, Action:East
State  126
Old Q Values:  [   0.          331.64678262  539.74834958 1946.9719719 ]
New Q values:  [    0.           331.64678262 14177.30220228  1946.9719719 ]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  4.65400095e+04]
------
Step:8, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  4.65400095e+04]
New Q values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  2.20533277e+04]
Reward: -1  Episode Reward:  42
xxxxx
x a x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 11459.74639801   244.27503968]
------
Step:9, Action:East
State  126
Old Q Values:  [    0.           331.64678262 14177.30220228  1946.9719719 ]
New Q values:  [    0.           331.64678262 12286.31920171  1946.9719719 ]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  2.20533277e+04]
------
Step:10, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  2.20533277e+04]
New Q values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  1.22586550e+04]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 11459.74639801   244.27503968]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548 11459.74639801   244.27503968]
New Q values:  [ -281.736      -1150.91067548  8260.89506334   244.27503968]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  1.22586550e+04]
------
Step:12, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  1.22586550e+04]
New Q values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  7.38113052e+03]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  8260.89506334   244.27503968]
------
Step:13, Action:East
State  126
Old Q Values:  [    0.           331.64678262 12286.31920171  1946.9719719 ]
New Q values:  [   0.          331.64678262 7128.26683804 1946.9719719 ]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  7.38113052e+03]
------
Step:14, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  7.38113052e+03]
New Q values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  5.43012073e+03]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  8260.89506334   244.27503968]
------
Step:15, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   314.03785683   861.04204957]
New Q values:  [ -253.44886264 -1902.20915811  1754.05136138   861.04204957]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  5.43012073e+03]
------
Step:16, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  5.43012073e+03]
New Q values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  4.64971681e+03]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  8260.89506334   244.27503968]
------
Step:17, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  8260.89506334   244.27503968]
New Q values:  [ -281.736      -1150.91067548  4698.6730685    244.27503968]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  4.64971681e+03]
------
Step:18, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  4.64971681e+03]
New Q values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  3.26888864e+03]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4698.6730685    244.27503968]
------
Step:19, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  4698.6730685    244.27503968]
New Q values:  [ -281.736      -1150.91067548  2859.53582083   244.27503968]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  3.26888864e+03]
------
Step:20, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  3.26888864e+03]
New Q values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  1.83317087e+03]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1754.05136138   861.04204957]
------
Step:21, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1754.05136138   861.04204957]
New Q values:  [ -253.44886264 -1902.20915811  1658.88423777   861.04204957]
Reward: -1  Episode Reward:  29
xxxxx
x  ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  1.83317087e+03]
------
Step:22, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.19287898e+03 -3.22965309e-01  1.83317087e+03]
New Q values:  [ 8.43634063e+00  1.94218057e+03 -3.22965309e-01  1.83317087e+03]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2218.76325298 2184.8079221     0.         1847.21017375]
------
Step:23, Action:North
State  218
Old Q Values:  [2218.76325298 2184.8079221     0.         1847.21017375]
New Q values:  [1469.55947125 2184.8079221     0.         1847.21017375]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.94218057e+03 -3.22965309e-01  1.83317087e+03]
------
Step:24, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  1.94218057e+03 -3.22965309e-01  1.83317087e+03]
New Q values:  [ 8.43634063e+00  3.18803984e+03 -3.22965309e-01  1.83317087e+03]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xg ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  475.76992415  8039.22536189 -8220.10378799   637.30368728]
------
Step:25, Action:South
State  210
Old Q Values:  [32397.02030738  1127.85578529   790.72804752  1050.85266124]
New Q values:  [32397.02030738  2473.48849075   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6723.15392212 -6442.16912869 -8192.20126966  4450.58076609]
------
Step:26, Action:North
State  288
Old Q Values:  [ 6723.15392212 -6442.16912869 -8192.20126966  4450.58076609]
New Q values:  [ 5100.42917741 -6442.16912869 -8192.20126966  4450.58076609]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  475.76992415  8039.22536189 -8220.10378799   637.30368728]
------
Step:27, Action:South
State  216
Old Q Values:  [  475.76992415  8039.22536189 -8220.10378799   637.30368728]
New Q values:  [  475.76992415  4745.21889798 -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  33
xxxxx
xg  x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5100.42917741 -6442.16912869 -8192.20126966  4450.58076609]
------
Step:28, Action:North
State  288
Old Q Values:  [ 5100.42917741 -6442.16912869 -8192.20126966  4450.58076609]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966  4450.58076609]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  475.76992415  4745.21889798 -8220.10378799   637.30368728]
------
Step:29, Action:South
State  216
Old Q Values:  [  475.76992415  4745.21889798 -8220.10378799   637.30368728]
New Q values:  [  475.76992415  3232.66178902 -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966  4450.58076609]
------
Step:30, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966  4450.58076609]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966  9568.64572977]
Reward: -1  Episode Reward:  30
xxxxx
x g x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  4954.40891302 25963.37807779]
------
Step:31, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  4954.40891302 25963.37807779]
New Q values:  [  175.14749589 -8521.23367799  4954.40891302 93466.52995635]
Reward: 100009  Episode Reward:  100039
xxxxx
x   x
x g x
xa  x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966  9568.64572977]
------
Step:1, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966  9568.64572977]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 31872.81727881]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  4954.40891302 93466.52995635]
------
Step:2, Action:West
State  273
Old Q Values:  [68809.78291085  -168.92307549  2221.96636842   972.28724695]
New Q values:  [68809.78291085  -168.92307549  2221.96636842   888.88835026]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1648.57817158   26.73544252 1382.6506456   123.6214372 ]
------
Step:3, Action:North
State  261
Old Q Values:  [1648.57817158   26.73544252 1382.6506456   123.6214372 ]
New Q values:  [1061.17344991   26.73544252 1382.6506456   123.6214372 ]
Reward: -1  Episode Reward:  17
xxxxx
x..gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1341.14060426 1279.45451029 1320.50884867  262.76946019]
------
Step:4, Action:North
State  181
Old Q Values:  [1341.14060426 1279.45451029 1320.50884867  262.76946019]
New Q values:  [1803.38107605 1279.45451029 1320.50884867  262.76946019]
Reward: 9  Episode Reward:  26
xxxxx
xa..x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 4205.08278116  238.35800069    0.        ]
------
Step:5, Action:South
State  103
Old Q Values:  [ 221.30610858 4205.08278116  238.35800069    0.        ]
New Q values:  [ 221.30610858 2222.44743528  238.35800069    0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1803.38107605 1279.45451029 1320.50884867  262.76946019]
------
Step:6, Action:North
State  183
Old Q Values:  [2323.86307178 1966.8003584  8630.93786549 1554.80203889]
New Q values:  [1595.6794593  1966.8003584  8630.93786549 1554.80203889]
Reward: -1  Episode Reward:  24
xxxxx
xa..x
x ..x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2222.44743528  238.35800069    0.        ]
------
Step:7, Action:South
State  103
Old Q Values:  [ 221.30610858 2222.44743528  238.35800069    0.        ]
New Q values:  [ 221.30610858 3477.66033376  238.35800069    0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1595.6794593  1966.8003584  8630.93786549 1554.80203889]
------
Step:8, Action:East
State  183
Old Q Values:  [1595.6794593  1966.8003584  8630.93786549 1554.80203889]
New Q values:  [1595.6794593  1966.8003584  8591.33362666 1554.80203889]
Reward: 9  Episode Reward:  32
xxxxx
x ..x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.71118616e+04  0.00000000e+00]
------
Step:9, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  1.10079681e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  1.41276933e+04  1.03161518e+03]
Reward: 9  Episode Reward:  41
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32397.02030738  2473.48849075   790.72804752  1050.85266124]
------
Step:10, Action:North
State  210
Old Q Values:  [32397.02030738  2473.48849075   790.72804752  1050.85266124]
New Q values:  [33645.22568375  2473.48849075   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  50
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 68936.72520266]
------
Step:11, Action:West
State  130
Old Q Values:  [26266.584521    5661.05765619  -180.00807518 68936.72520266]
New Q values:  [ 26266.584521     5661.05765619   -180.00807518 125279.34555209]
Reward: 100009  Episode Reward:  100059
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5102.32593224  751.03868094 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 3477.66033376  238.35800069    0.        ]
New Q values:  [ 221.30610858 1937.47845632  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1803.38107605 1279.45451029 1320.50884867  262.76946019]
------
Step:2, Action:North
State  181
Old Q Values:  [1803.38107605 1279.45451029 1320.50884867  262.76946019]
New Q values:  [1301.99596732 1279.45451029 1320.50884867  262.76946019]
Reward: -1  Episode Reward:  8
xxxxx
xa..x
x g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1937.47845632  238.35800069    0.        ]
------
Step:3, Action:South
State  103
Old Q Values:  [ 221.30610858 1937.47845632  238.35800069    0.        ]
New Q values:  [ 221.30610858 3351.79147053  238.35800069    0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1595.6794593  1966.8003584  8591.33362666 1554.80203889]
------
Step:4, Action:East
State  181
Old Q Values:  [1301.99596732 1279.45451029 1320.50884867  262.76946019]
New Q values:  [ 1301.99596732  1279.45451029 -1869.69345581   262.76946019]
Reward: -9991  Episode Reward:  -9984
xxxxx
x ..x
x g.x
x.. x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
Step:1, Action:West
State  136
Old Q Values:  [ -170.77177351  5967.12069481 -2383.80019164  1112.82153063]
New Q values:  [ -170.77177351  5967.12069481 -2383.80019164  1486.72612799]
Reward: 9  Episode Reward:  9
xxxxx
xga x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  3453.99171912  1024.49912872]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3453.99171912  1024.49912872]
New Q values:  [-9594.56523706 -8069.05606225  3171.13289609  1024.49912872]
Reward: -1  Episode Reward:  8
xxxxx
x.gax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5967.12069481 -2383.80019164  1486.72612799]
------
Step:3, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  3.18803984e+03 -3.22965309e-01  1.83317087e+03]
New Q values:  [ 8.43634063e+00  2.25041447e+03 -3.22965309e-01  1.83317087e+03]
Reward: 9  Episode Reward:  17
xxxxx
x.  x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  475.76992415  3232.66178902 -8220.10378799   637.30368728]
------
Step:4, Action:South
State  208
Old Q Values:  [22708.02016793  6507.65352728 -4584.50430574 -1713.91177491]
New Q values:  [22708.02016793 12170.30659456 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  26
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 31872.81727881]
------
Step:5, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 31872.81727881]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 40794.48589843]
Reward: 9  Episode Reward:  35
xxxxx
x.  x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  175.14749589 -8521.23367799  4954.40891302 93466.52995635]
------
Step:6, Action:West
State  272
Old Q Values:  [  175.14749589 -8521.23367799  4954.40891302 93466.52995635]
New Q values:  [  175.14749589 -8521.23367799  4954.40891302 60467.79070778]
Reward: 9  Episode Reward:  44
xxxxx
x.  x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[76919.26241746 12764.58618105 12093.94510863  1875.31501677]
------
Step:7, Action:North
State  257
Old Q Values:  [76919.26241746 12764.58618105 12093.94510863  1875.31501677]
New Q values:  [40820.27213699 12764.58618105 12093.94510863  1875.31501677]
Reward: -1  Episode Reward:  43
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 29124.7048717  20101.94546536     0.        ]
------
Step:8, Action:North
State  183
Old Q Values:  [1595.6794593  1966.8003584  8591.33362666 1554.80203889]
New Q values:  [2174.36956339 1966.8003584  8591.33362666 1554.80203889]
Reward: 9  Episode Reward:  52
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5102.32593224  751.03868094 -120.29354603]
------
Step:9, Action:South
State  99
Old Q Values:  [    0.         39322.03373914 51580.7370385      0.        ]
New Q values:  [    0.         25781.38066566 51580.7370385      0.        ]
Reward: -1  Episode Reward:  51
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 29124.7048717  20101.94546536     0.        ]
------
Step:10, Action:North
State  180
Old Q Values:  [  160.62456153  3998.65267373 20337.14072518 -4966.32149798]
New Q values:  [  707.59011235  3998.65267373 20337.14072518 -4966.32149798]
Reward: -1  Episode Reward:  50
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 2146.46762581  265.50261253 -180.6       ]
------
Step:11, Action:East
State  108
Old Q Values:  [-8463.16477134  1968.68675445  2058.93231779     0.        ]
New Q values:  [-8463.16477134  1968.68675445  1774.31279594     0.        ]
Reward: -1  Episode Reward:  49
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  3171.13289609  1024.49912872]
------
Step:12, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3171.13289609  1024.49912872]
New Q values:  [-9594.56523706 -8069.05606225  3057.98936688  1024.49912872]
Reward: -1  Episode Reward:  48
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  5967.12069481 -2383.80019164  1486.72612799]
------
Step:13, Action:South
State  136
Old Q Values:  [ -170.77177351  5967.12069481 -2383.80019164  1486.72612799]
New Q values:  [ -170.77177351  9198.6543283  -2383.80019164  1486.72612799]
Reward: -1  Episode Reward:  47
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22708.02016793 12170.30659456 -4584.50430574 -1713.91177491]
------
Step:14, Action:North
State  208
Old Q Values:  [22708.02016793 12170.30659456 -4584.50430574 -1713.91177491]
New Q values:  [11842.20436566 12170.30659456 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  46
xxxxx
xg ax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  9198.6543283  -2383.80019164  1486.72612799]
------
Step:15, Action:South
State  136
Old Q Values:  [ -170.77177351  9198.6543283  -2383.80019164  1486.72612799]
New Q values:  [ -170.77177351  7329.95370969 -2383.80019164  1486.72612799]
Reward: -1  Episode Reward:  45
xxxxx
x g x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11842.20436566 12170.30659456 -4584.50430574 -1713.91177491]
------
Step:16, Action:South
State  208
Old Q Values:  [11842.20436566 12170.30659456 -4584.50430574 -1713.91177491]
New Q values:  [11842.20436566 17105.86840735 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  44
xxxxx
x  gx
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 40794.48589843]
------
Step:17, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 40794.48589843]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 36960.12923263]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[68809.78291085  -168.92307549  2221.96636842   888.88835026]
------
Step:18, Action:North
State  273
Old Q Values:  [68809.78291085  -168.92307549  2221.96636842   888.88835026]
New Q values:  [93484.31895258  -168.92307549  2221.96636842   888.88835026]
Reward: 100009  Episode Reward:  100052
xxxxx
x   x
x a x
x  gx
xxxxx
xxxxx
x.gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ -170.77177351  7329.95370969 -2383.80019164  1486.72612799]
------
Step:1, Action:South
State  136
Old Q Values:  [ -170.77177351  7329.95370969 -2383.80019164  1486.72612799]
New Q values:  [ -170.77177351  8069.14200608 -2383.80019164  1486.72612799]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11842.20436566 17105.86840735 -4584.50430574 -1713.91177491]
------
Step:2, Action:South
State  208
Old Q Values:  [11842.20436566 17105.86840735 -4584.50430574 -1713.91177491]
New Q values:  [11842.20436566 17935.78613273 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 36960.12923263]
------
Step:3, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 36960.12923263]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 42834.74737883]
Reward: 9  Episode Reward:  27
xxxxx
x. gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[93484.31895258  -168.92307549  2221.96636842   888.88835026]
------
Step:4, Action:North
State  273
Old Q Values:  [93484.31895258  -168.92307549  2221.96636842   888.88835026]
New Q values:  [45254.94908551  -168.92307549  2221.96636842   888.88835026]
Reward: 9  Episode Reward:  36
xxxxx
x.  x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 26186.07168157 14822.70858395  1460.9765133 ]
------
Step:5, Action:South
State  193
Old Q Values:  [-5922.26708831 26186.07168157 14822.70858395  1460.9765133 ]
New Q values:  [-5922.26708831 24050.31339828 14822.70858395  1460.9765133 ]
Reward: -1  Episode Reward:  35
xxxxx
x. gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[45254.94908551  -168.92307549  2221.96636842   888.88835026]
------
Step:6, Action:North
State  273
Old Q Values:  [45254.94908551  -168.92307549  2221.96636842   888.88835026]
New Q values:  [25316.47365369  -168.92307549  2221.96636842   888.88835026]
Reward: -1  Episode Reward:  34
xxxxx
x.  x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 24050.31339828 14822.70858395  1460.9765133 ]
------
Step:7, Action:South
State  195
Old Q Values:  [   38.85388605  3627.06024577 19850.01929415  1169.39963074]
New Q values:  [   38.85388605  9045.16619441 19850.01929415  1169.39963074]
Reward: -1  Episode Reward:  33
xxxxx
x.  x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[25316.47365369  -168.92307549  2221.96636842   888.88835026]
------
Step:8, Action:North
State  273
Old Q Values:  [25316.47365369  -168.92307549  2221.96636842   888.88835026]
New Q values:  [17341.08348096  -168.92307549  2221.96636842   888.88835026]
Reward: -1  Episode Reward:  32
xxxxx
x.  x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 24050.31339828 14822.70858395  1460.9765133 ]
------
Step:9, Action:South
State  195
Old Q Values:  [   38.85388605  9045.16619441 19850.01929415  1169.39963074]
New Q values:  [   38.85388605  8819.79152205 19850.01929415  1169.39963074]
Reward: -1  Episode Reward:  31
xxxxx
x.  x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[17341.08348096  -168.92307549  2221.96636842   888.88835026]
------
Step:10, Action:North
State  272
Old Q Values:  [  175.14749589 -8521.23367799  4954.40891302 60467.79070778]
New Q values:  [ 4307.76700264 -8521.23367799  4954.40891302 60467.79070778]
Reward: -1  Episode Reward:  30
xxxxx
x.  x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.41276933e+04  1.03161518e+03]
------
Step:11, Action:East
State  195
Old Q Values:  [   38.85388605  8819.79152205 19850.01929415  1169.39963074]
New Q values:  [   38.85388605  8819.79152205 18032.97542278  1169.39963074]
Reward: -1  Episode Reward:  29
xxxxx
x.  x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[33645.22568375  2473.48849075   790.72804752  1050.85266124]
------
Step:12, Action:North
State  210
Old Q Values:  [33645.22568375  2473.48849075   790.72804752  1050.85266124]
New Q values:  [14132.61461475  2473.48849075   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  28
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  2.25041447e+03 -3.22965309e-01  1.83317087e+03]
------
Step:13, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  2.25041447e+03 -3.22965309e-01  1.83317087e+03]
New Q values:  [ 8.43634063e+00  5.13935017e+03 -3.22965309e-01  1.83317087e+03]
Reward: -1  Episode Reward:  27
xxxxx
x.  x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[14132.61461475  2473.48849075   790.72804752  1050.85266124]
------
Step:14, Action:North
State  210
Old Q Values:  [14132.61461475  2473.48849075   790.72804752  1050.85266124]
New Q values:  [7194.25089773 2473.48849075  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  5.13935017e+03 -3.22965309e-01  1.83317087e+03]
------
Step:15, Action:South
State  138
Old Q Values:  [ 8.43634063e+00  5.13935017e+03 -3.22965309e-01  1.83317087e+03]
New Q values:  [ 8.43634063e+00  1.43587591e+03 -3.22965309e-01  1.83317087e+03]
Reward: -10001  Episode Reward:  -9975
xxxxx
x.  x
x. gx
x.  x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4307.76700264 -8521.23367799  4954.40891302 60467.79070778]
------
Step:1, Action:West
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197 24702.30720332  2691.5783287 ]
New Q values:  [ 7058.83631802 -5807.06396197 24702.30720332  1496.82652516]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1061.17344991   26.73544252 1382.6506456   123.6214372 ]
------
Step:2, Action:East
State  261
Old Q Values:  [1061.17344991   26.73544252 1382.6506456   123.6214372 ]
New Q values:  [1061.17344991   26.73544252 5754.78530253  123.6214372 ]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[17341.08348096  -168.92307549  2221.96636842   888.88835026]
------
Step:3, Action:North
State  273
Old Q Values:  [17341.08348096  -168.92307549  2221.96636842   888.88835026]
New Q values:  [12351.72601922  -168.92307549  2221.96636842   888.88835026]
Reward: 9  Episode Reward:  17
xxxxx
x. .x
x.a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  8819.79152205 18032.97542278  1169.39963074]
------
Step:4, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  1.41276933e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  7.81475261e+03  1.03161518e+03]
Reward: 9  Episode Reward:  26
xxxxx
x. .x
x. ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7194.25089773 2473.48849075  790.72804752 1050.85266124]
------
Step:5, Action:North
State  210
Old Q Values:  [7194.25089773 2473.48849075  790.72804752 1050.85266124]
New Q values:  [40466.90402472  2473.48849075   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  35
xxxxx
x. ax
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     5661.05765619   -180.00807518 125279.34555209]
------
Step:6, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.43587591e+03 -3.22965309e-01  1.83317087e+03]
New Q values:  [ 8.43634063e+00  1.43587591e+03 -3.22965309e-01  4.09864910e+04]
Reward: -1  Episode Reward:  34
xxxxx
x.a x
x. gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 134179.40896346]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  2859.53582083   244.27503968]
New Q values:  [ -281.736      -1150.91067548  2859.53582083  1179.83647129]
Reward: 9  Episode Reward:  43
xxxxx
xa  x
x.g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  311.89931548 3589.08818473 -252.78192178]
------
Step:8, Action:East
State  105
Old Q Values:  [ -180.6          784.6620545  -1638.20239793     0.        ]
New Q values:  [ -180.6          784.6620545  -5738.48414911     0.        ]
Reward: -10001  Episode Reward:  -9958
xxxxx
x g x
x.  x
x  .x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5102.32593224  751.03868094 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 3351.79147053  238.35800069    0.        ]
New Q values:  [ 221.30610858 1736.71537841  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1301.99596732  1279.45451029 -1869.69345581   262.76946019]
------
Step:2, Action:North
State  181
Old Q Values:  [ 1301.99596732  1279.45451029 -1869.69345581   262.76946019]
New Q values:  [ 2050.8961666   1279.45451029 -1869.69345581   262.76946019]
Reward: -1  Episode Reward:  8
xxxxx
xa..x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5102.32593224  751.03868094 -120.29354603]
------
Step:3, Action:South
State  109
Old Q Values:  [ -241.10880094  1335.53862358 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1148.88429941 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x g.x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2050.8961666   1279.45451029 -1869.69345581   262.76946019]
------
Step:4, Action:North
State  181
Old Q Values:  [ 2050.8961666   1279.45451029 -1869.69345581   262.76946019]
New Q values:  [ 2350.45624631  1279.45451029 -1869.69345581   262.76946019]
Reward: -1  Episode Reward:  6
xxxxx
xa..x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5102.32593224  751.03868094 -120.29354603]
------
Step:5, Action:South
State  103
Old Q Values:  [ 221.30610858 1736.71537841  238.35800069    0.        ]
New Q values:  [ 221.30610858 3271.48623936  238.35800069    0.        ]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
xa .x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2174.36956339 1966.8003584  8591.33362666 1554.80203889]
------
Step:6, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243  4763.78316466     0.        ]
New Q values:  [    0.         -5536.05678243  7038.47174633     0.        ]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.71118616e+04  0.00000000e+00]
------
Step:7, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  7.81475261e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  1.52713723e+04  1.03161518e+03]
Reward: 9  Episode Reward:  13
xxxxx
x ..x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[40466.90402472  2473.48849075   790.72804752  1050.85266124]
------
Step:8, Action:North
State  216
Old Q Values:  [  475.76992415  3232.66178902 -8220.10378799   637.30368728]
New Q values:  [12491.65528033  3232.66178902 -8220.10378799   637.30368728]
Reward: 9  Episode Reward:  22
xxxxx
x .ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.43634063e+00  1.43587591e+03 -3.22965309e-01  4.09864910e+04]
------
Step:9, Action:West
State  138
Old Q Values:  [ 8.43634063e+00  1.43587591e+03 -3.22965309e-01  4.09864910e+04]
New Q values:  [ 8.43634063e+00  1.43587591e+03 -3.22965309e-01  1.72578572e+04]
Reward: 9  Episode Reward:  31
xxxxx
x a x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2859.53582083  1179.83647129]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2859.53582083  1179.83647129]
New Q values:  [ -281.736      -1150.91067548  6320.57147647  1179.83647129]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x   x
x.g.x
xxxxx
Step:11, Action:North
State  138
Old Q Values:  [ 8.43634063e+00  1.43587591e+03 -3.22965309e-01  1.72578572e+04]
New Q values:  [ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  1.72578572e+04]
Reward: -301  Episode Reward:  -271
xxxxx
x  ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  1.72578572e+04]
------
Step:12, Action:West
State  136
Old Q Values:  [ -170.77177351  8069.14200608 -2383.80019164  1486.72612799]
New Q values:  [ -170.77177351  8069.14200608 -2383.80019164 -4488.51273874]
Reward: -10001  Episode Reward:  -10272
xxxxx
x g x
x   x
x...x
xxxxx
xxxxx
x.g.x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1061.17344991   26.73544252 5754.78530253  123.6214372 ]
------
Step:1, Action:East
State  261
Old Q Values:  [1061.17344991   26.73544252 5754.78530253  123.6214372 ]
New Q values:  [1061.17344991   26.73544252 6012.83192678  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[12351.72601922  -168.92307549  2221.96636842   888.88835026]
------
Step:2, Action:North
State  273
Old Q Values:  [12351.72601922  -168.92307549  2221.96636842   888.88835026]
New Q values:  [12161.18442717  -168.92307549  2221.96636842   888.88835026]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
x.agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 24050.31339828 14822.70858395  1460.9765133 ]
------
Step:3, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.31553016e+04 4.47701606e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 1.26722128e+04 4.47701606e+04 7.32028793e+03]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197 24702.30720332  1496.82652516]
------
Step:4, Action:East
State  273
Old Q Values:  [12161.18442717  -168.92307549  2221.96636842   888.88835026]
New Q values:  [12161.18442717  -168.92307549 13744.61076102   888.88835026]
Reward: 9  Episode Reward:  26
xxxxx
x. .x
x. gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 42834.74737883]
------
Step:5, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 42834.74737883]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 21256.68217983]
Reward: -1  Episode Reward:  25
xxxxx
x. .x
x. .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[12161.18442717  -168.92307549 13744.61076102   888.88835026]
------
Step:6, Action:North
State  272
Old Q Values:  [ 4307.76700264 -8521.23367799  4954.40891302 60467.79070778]
New Q values:  [ 6303.91847629 -8521.23367799  4954.40891302 60467.79070778]
Reward: -1  Episode Reward:  24
xxxxx
x. .x
x.a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.52713723e+04  1.03161518e+03]
------
Step:7, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.26722128e+04 4.47701606e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 1.26722128e+04 2.32942001e+04 7.32028793e+03]
Reward: 9  Episode Reward:  33
xxxxx
x. .x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11842.20436566 17935.78613273 -4584.50430574 -1713.91177491]
------
Step:8, Action:South
State  208
Old Q Values:  [11842.20436566 17935.78613273 -4584.50430574 -1713.91177491]
New Q values:  [11842.20436566 13550.71910704 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  32
xxxxx
x. .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 21256.68217983]
------
Step:9, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 21256.68217983]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 26642.41008427]
Reward: -1  Episode Reward:  31
xxxxx
x. .x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6303.91847629 -8521.23367799  4954.40891302 60467.79070778]
------
Step:10, Action:West
State  272
Old Q Values:  [ 6303.91847629 -8521.23367799  4954.40891302 60467.79070778]
New Q values:  [ 6303.91847629 -8521.23367799  4954.40891302 37661.69557219]
Reward: -1  Episode Reward:  30
xxxxx
x. .x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[44917.26429693 21390.00229459  7407.2956525    644.94785455]
------
Step:11, Action:South
State  256
Old Q Values:  [44917.26429693 21390.00229459  7407.2956525    644.94785455]
New Q values:  [44917.26429693 15850.58020692  7407.2956525    644.94785455]
Reward: -10301  Episode Reward:  -10271
xxxxx
x. .x
x.  x
xg  x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.26722128e+04 2.32942001e+04 7.32028793e+03]
------
Step:1, Action:East
State  193
Old Q Values:  [-5922.26708831 24050.31339828 14822.70858395  1460.9765133 ]
New Q values:  [-5922.26708831 24050.31339828  9681.98001768  1460.9765133 ]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[12491.65528033  3232.66178902 -8220.10378799   637.30368728]
------
Step:2, Action:South
State  208
Old Q Values:  [11842.20436566 13550.71910704 -4584.50430574 -1713.91177491]
New Q values:  [11842.20436566 13418.4106681  -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 26642.41008427]
------
Step:3, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 26642.41008427]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 21960.87270536]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6303.91847629 -8521.23367799  4954.40891302 37661.69557219]
------
Step:4, Action:West
State  273
Old Q Values:  [12161.18442717  -168.92307549 13744.61076102   888.88835026]
New Q values:  [12161.18442717  -168.92307549 13744.61076102 12607.0369812 ]
Reward: 9  Episode Reward:  36
xxxxx
x. .x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[40820.27213699 12764.58618105 12093.94510863  1875.31501677]
------
Step:5, Action:North
State  257
Old Q Values:  [40820.27213699 12764.58618105 12093.94510863  1875.31501677]
New Q values:  [26386.6760248  12764.58618105 12093.94510863  1875.31501677]
Reward: 9  Episode Reward:  45
xxxxx
x. .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 29124.7048717  20101.94546536     0.        ]
------
Step:6, Action:North
State  181
Old Q Values:  [ 2350.45624631  1279.45451029 -1869.69345581   262.76946019]
New Q values:  [ 2077.77287388  1279.45451029 -1869.69345581   262.76946019]
Reward: 9  Episode Reward:  54
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         3773.96791785    0.            0.        ]
------
Step:7, Action:South
State  100
Old Q Values:  [   0.         8676.39736464  443.65288404    0.        ]
New Q values:  [   0.         9571.10116341  443.65288404    0.        ]
Reward: -1  Episode Reward:  53
xxxxx
xg .x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  707.59011235  3998.65267373 20337.14072518 -4966.32149798]
------
Step:8, Action:East
State  180
Old Q Values:  [  707.59011235  3998.65267373 20337.14072518 -4966.32149798]
New Q values:  [  707.59011235  3998.65267373 11730.9592948  -4966.32149798]
Reward: -1  Episode Reward:  52
xxxxx
x  .x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4027.93373228 11989.01001575  3512.17261526]
------
Step:9, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -2.00610230e+02  1.71118616e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.00610230e+02  1.89842158e+04  0.00000000e+00]
Reward: -1  Episode Reward:  51
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[40466.90402472  2473.48849075   790.72804752  1050.85266124]
------
Step:10, Action:North
State  210
Old Q Values:  [40466.90402472  2473.48849075   790.72804752  1050.85266124]
New Q values:  [113775.96527551   2473.48849075    790.72804752   1050.85266124]
Reward: 100009  Episode Reward:  100060
xxxxx
x  ax
x   x
x g x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[12161.18442717  -168.92307549 13744.61076102 12607.0369812 ]
------
Step:1, Action:East
State  273
Old Q Values:  [12161.18442717  -168.92307549 13744.61076102 12607.0369812 ]
New Q values:  [12161.18442717  -168.92307549 12091.50611602 12607.0369812 ]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 21960.87270536]
------
Step:2, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 21960.87270536]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 12565.86017651]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[12161.18442717  -168.92307549 12091.50611602 12607.0369812 ]
------
Step:3, Action:West
State  273
Old Q Values:  [12161.18442717  -168.92307549 12091.50611602 12607.0369812 ]
New Q values:  [12161.18442717  -168.92307549 12091.50611602  6852.06437051]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x.. x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1061.17344991   26.73544252 6012.83192678  123.6214372 ]
------
Step:4, Action:East
State  261
Old Q Values:  [1061.17344991   26.73544252 6012.83192678  123.6214372 ]
New Q values:  [1061.17344991   26.73544252 6052.88809886  123.6214372 ]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x..gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[12161.18442717  -168.92307549 12091.50611602  6852.06437051]
------
Step:5, Action:North
State  272
Old Q Values:  [ 6303.91847629 -8521.23367799  4954.40891302 37661.69557219]
New Q values:  [ 3515.22741472 -8521.23367799  4954.40891302 37661.69557219]
Reward: -9991  Episode Reward:  -9975
xxxxx
x...x
x.g x
x   x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  4954.40891302 37661.69557219]
------
Step:1, Action:West
State  273
Old Q Values:  [12161.18442717  -168.92307549 12091.50611602  6852.06437051]
New Q values:  [12161.18442717  -168.92307549 12091.50611602  4562.09217786]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1061.17344991   26.73544252 6052.88809886  123.6214372 ]
------
Step:2, Action:East
State  261
Old Q Values:  [1061.17344991   26.73544252 6052.88809886  123.6214372 ]
New Q values:  [1061.17344991   26.73544252 6068.9105677   123.6214372 ]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[12161.18442717  -168.92307549 12091.50611602  4562.09217786]
------
Step:3, Action:North
State  273
Old Q Values:  [12161.18442717  -168.92307549 12091.50611602  4562.09217786]
New Q values:  [12084.96779035  -168.92307549 12091.50611602  4562.09217786]
Reward: 9  Episode Reward:  17
xxxxx
x. gx
x.a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 24050.31339828  9681.98001768  1460.9765133 ]
------
Step:4, Action:South
State  193
Old Q Values:  [-5922.26708831 24050.31339828  9681.98001768  1460.9765133 ]
New Q values:  [-5922.26708831 13246.97719412  9681.98001768  1460.9765133 ]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
x. gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[12084.96779035  -168.92307549 12091.50611602  4562.09217786]
------
Step:5, Action:East
State  273
Old Q Values:  [12084.96779035  -168.92307549 12091.50611602  4562.09217786]
New Q values:  [12084.96779035  -168.92307549  2611.76049936  4562.09217786]
Reward: -9991  Episode Reward:  -9975
xxxxx
x. .x
x. .x
x  gx
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 12565.86017651]
------
Step:1, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 12565.86017651]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 10330.25274226]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. .x
x.g x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  1.72578572e+04]
------
Step:1, Action:West
State  138
Old Q Values:  [ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  1.72578572e+04]
New Q values:  [ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  8.80471431e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  6320.57147647  1179.83647129]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  6320.57147647  1179.83647129]
New Q values:  [ -281.736      -1150.91067548  5169.04288273  1179.83647129]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  8.80471431e+03]
------
Step:3, Action:West
State  138
Old Q Values:  [ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  8.80471431e+03]
New Q values:  [ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  5.07199859e+03]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  5169.04288273  1179.83647129]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  5169.04288273  1179.83647129]
New Q values:  [ -281.736      -1150.91067548  3588.61672939  1179.83647129]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  5.07199859e+03]
------
Step:5, Action:West
State  138
Old Q Values:  [ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  5.07199859e+03]
New Q values:  [ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  3.10478445e+03]
Reward: -1  Episode Reward:  5
xxxxx
x.a x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3588.61672939  1179.83647129]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3588.61672939  1179.83647129]
New Q values:  [ -281.736      -1150.91067548  2934.88619707  1179.83647129]
Reward: -1  Episode Reward:  4
xxxxx
x. ax
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  3.10478445e+03]
------
Step:7, Action:North
State  138
Old Q Values:  [ 5.00013168e+03  1.43587591e+03 -3.22965309e-01  3.10478445e+03]
New Q values:  [ 3.31949218e+03  1.43587591e+03 -3.22965309e-01  3.10478445e+03]
Reward: -301  Episode Reward:  -297
xxxxx
x. ax
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.31949218e+03  1.43587591e+03 -3.22965309e-01  3.10478445e+03]
------
Step:8, Action:North
State  136
Old Q Values:  [ -170.77177351  8069.14200608 -2383.80019164 -4488.51273874]
New Q values:  [ 2171.83389242  8069.14200608 -2383.80019164 -4488.51273874]
Reward: -301  Episode Reward:  -598
xxxxx
xg ax
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 2171.83389242  8069.14200608 -2383.80019164 -4488.51273874]
------
Step:9, Action:South
State  138
Old Q Values:  [ 3.31949218e+03  1.43587591e+03 -3.22965309e-01  3.10478445e+03]
New Q values:  [ 3.31949218e+03  4.32724695e+03 -3.22965309e-01  3.10478445e+03]
Reward: 9  Episode Reward:  -589
xxxxx
x.  x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[12491.65528033  3232.66178902 -8220.10378799   637.30368728]
------
Step:10, Action:North
State  216
Old Q Values:  [12491.65528033  3232.66178902 -8220.10378799   637.30368728]
New Q values:  [ 6294.23619643  3232.66178902 -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  -590
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.31949218e+03  4.32724695e+03 -3.22965309e-01  3.10478445e+03]
------
Step:11, Action:South
State  138
Old Q Values:  [ 3.31949218e+03  4.32724695e+03 -3.22965309e-01  3.10478445e+03]
New Q values:  [ 3.31949218e+03 -2.38143036e+03 -3.22965309e-01  3.10478445e+03]
Reward: -10001  Episode Reward:  -10591
xxxxx
x.  x
x. gx
x...x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11842.20436566 13418.4106681  -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [11842.20436566 13418.4106681  -4584.50430574 -1713.91177491]
New Q values:  [11842.20436566  8471.84008992 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 10330.25274226]
------
Step:2, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 10330.25274226]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 15436.00976856]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  4954.40891302 37661.69557219]
------
Step:3, Action:West
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  4954.40891302 37661.69557219]
New Q values:  [ 3515.22741472 -8521.23367799  4954.40891302 22986.08103632]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[26386.6760248  12764.58618105 12093.94510863  1875.31501677]
------
Step:4, Action:North
State  261
Old Q Values:  [1061.17344991   26.73544252 6068.9105677   123.6214372 ]
New Q values:  [1053.20124213   26.73544252 6068.9105677   123.6214372 ]
Reward: 9  Episode Reward:  36
xxxxx
x..gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2077.77287388  1279.45451029 -1869.69345581   262.76946019]
------
Step:5, Action:North
State  181
Old Q Values:  [ 2077.77287388  1279.45451029 -1869.69345581   262.76946019]
New Q values:  [ 1817.95502136  1279.45451029 -1869.69345581   262.76946019]
Reward: 9  Episode Reward:  45
xxxxx
xa. x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 3271.48623936  238.35800069    0.        ]
------
Step:6, Action:South
State  103
Old Q Values:  [ 221.30610858 3271.48623936  238.35800069    0.        ]
New Q values:  [ 221.30610858 3885.39458374  238.35800069    0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x . x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2174.36956339 1966.8003584  8591.33362666 1554.80203889]
------
Step:7, Action:East
State  183
Old Q Values:  [2174.36956339 1966.8003584  8591.33362666 1554.80203889]
New Q values:  [2174.36956339 1966.8003584  8023.34512589 1554.80203889]
Reward: 9  Episode Reward:  53
xxxxx
x . x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.52713723e+04  1.03161518e+03]
------
Step:8, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  1.52713723e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  4.02407385e+04  1.03161518e+03]
Reward: -1  Episode Reward:  52
xxxxx
x . x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[113775.96527551   2473.48849075    790.72804752   1050.85266124]
------
Step:9, Action:North
State  208
Old Q Values:  [11842.20436566  8471.84008992 -4584.50430574 -1713.91177491]
New Q values:  [42320.08541189  8471.84008992 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  51
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     5661.05765619   -180.00807518 125279.34555209]
------
Step:10, Action:West
State  130
Old Q Values:  [ 26266.584521     5661.05765619   -180.00807518 125279.34555209]
New Q values:  [ 26266.584521     5661.05765619   -180.00807518 147816.39369186]
Reward: 100009  Episode Reward:  100060
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[113775.96527551   2473.48849075    790.72804752   1050.85266124]
------
Step:1, Action:North
State  208
Old Q Values:  [42320.08541189  8471.84008992 -4584.50430574 -1713.91177491]
New Q values:  [17929.28181848  8471.84008992 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.31949218e+03 -2.38143036e+03 -3.22965309e-01  3.10478445e+03]
------
Step:2, Action:North
State  138
Old Q Values:  [ 3.31949218e+03 -2.38143036e+03 -3.22965309e-01  3.10478445e+03]
New Q values:  [ 2.14304453e+03 -2.38143036e+03 -3.22965309e-01  3.10478445e+03]
Reward: -301  Episode Reward:  -292
xxxxx
x..ax
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.14304453e+03 -2.38143036e+03 -3.22965309e-01  3.10478445e+03]
------
Step:3, Action:West
State  138
Old Q Values:  [ 2.14304453e+03 -2.38143036e+03 -3.22965309e-01  3.10478445e+03]
New Q values:  [ 2.14304453e+03 -2.38143036e+03 -3.22965309e-01  1.74497905e+03]
Reward: 9  Episode Reward:  -283
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1658.88423777   861.04204957]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2934.88619707  1179.83647129]
New Q values:  [ -281.736      -1150.91067548  1816.26783644  1179.83647129]
Reward: -1  Episode Reward:  -284
xxxxx
x. ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.14304453e+03 -2.38143036e+03 -3.22965309e-01  1.74497905e+03]
------
Step:5, Action:North
State  138
Old Q Values:  [ 2.14304453e+03 -2.38143036e+03 -3.22965309e-01  1.74497905e+03]
New Q values:  [ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  1.74497905e+03]
Reward: -301  Episode Reward:  -585
xxxxx
x. ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  1.74497905e+03]
------
Step:6, Action:West
State  138
Old Q Values:  [ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  1.74497905e+03]
New Q values:  [ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  1.24227197e+03]
Reward: -1  Episode Reward:  -586
xxxxx
x.a x
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1816.26783644  1179.83647129]
------
Step:7, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1816.26783644  1179.83647129]
New Q values:  [ -281.736      -1150.91067548  1121.7664849   1179.83647129]
Reward: -1  Episode Reward:  -587
xxxxx
x. ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  1.24227197e+03]
------
Step:8, Action:North
State  136
Old Q Values:  [ 2171.83389242  8069.14200608 -2383.80019164 -4488.51273874]
New Q values:  [ 3108.87615879  8069.14200608 -2383.80019164 -4488.51273874]
Reward: -301  Episode Reward:  -888
xxxxx
x.gax
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 3108.87615879  8069.14200608 -2383.80019164 -4488.51273874]
------
Step:9, Action:South
State  136
Old Q Values:  [ 3108.87615879  8069.14200608 -2383.80019164 -4488.51273874]
New Q values:  [ 3108.87615879  8605.84134798 -2383.80019164 -4488.51273874]
Reward: -1  Episode Reward:  -889
xxxxx
xg  x
x..ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17929.28181848  8471.84008992 -4584.50430574 -1713.91177491]
------
Step:10, Action:North
State  208
Old Q Values:  [17929.28181848  8471.84008992 -4584.50430574 -1713.91177491]
New Q values:  [ 7566.97207772  8471.84008992 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -890
xxxxx
x. ax
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  1.24227197e+03]
------
Step:11, Action:North
State  136
Old Q Values:  [ 3108.87615879  8605.84134798 -2383.80019164 -4488.51273874]
New Q values:  [ 3644.70286791  8605.84134798 -2383.80019164 -4488.51273874]
Reward: -301  Episode Reward:  -1191
xxxxx
xg ax
x.. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 3644.70286791  8605.84134798 -2383.80019164 -4488.51273874]
------
Step:12, Action:South
State  136
Old Q Values:  [ 3644.70286791  8605.84134798 -2383.80019164 -4488.51273874]
New Q values:  [ 3644.70286791  5983.28856617 -2383.80019164 -4488.51273874]
Reward: -1  Episode Reward:  -1192
xxxxx
x.g x
x..ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7566.97207772  8471.84008992 -4584.50430574 -1713.91177491]
------
Step:13, Action:South
State  208
Old Q Values:  [ 7566.97207772  8471.84008992 -4584.50430574 -1713.91177491]
New Q values:  [ 7566.97207772  8024.93896653 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  -1183
xxxxx
x. gx
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 15436.00976856]
------
Step:14, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 15436.00976856]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 13069.62821832]
Reward: -1  Episode Reward:  -1184
xxxxx
x.g x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  4954.40891302 22986.08103632]
------
Step:15, Action:West
State  273
Old Q Values:  [12084.96779035  -168.92307549  2611.76049936  4562.09217786]
New Q values:  [12084.96779035  -168.92307549  2611.76049936  9746.23967858]
Reward: 9  Episode Reward:  -1175
xxxxx
x. gx
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[26386.6760248  12764.58618105 12093.94510863  1875.31501677]
------
Step:16, Action:North
State  257
Old Q Values:  [26386.6760248  12764.58618105 12093.94510863  1875.31501677]
New Q values:  [20613.23757992 12764.58618105 12093.94510863  1875.31501677]
Reward: 9  Episode Reward:  -1166
xxxxx
x.  x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 29124.7048717  20101.94546536     0.        ]
------
Step:17, Action:North
State  181
Old Q Values:  [ 1817.95502136  1279.45451029 -1869.69345581   262.76946019]
New Q values:  [ 1077.24729837  1279.45451029 -1869.69345581   262.76946019]
Reward: 9  Episode Reward:  -1157
xxxxx
xa gx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1148.88429941 -2165.66138672  -180.6       ]
------
Step:18, Action:South
State  111
Old Q Values:  [-177.44732869 5102.32593224  751.03868094 -120.29354603]
New Q values:  [-177.44732869 2424.16672598  751.03868094 -120.29354603]
Reward: -1  Episode Reward:  -1158
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1077.24729837  1279.45451029 -1869.69345581   262.76946019]
------
Step:19, Action:South
State  183
Old Q Values:  [2174.36956339 1966.8003584  8023.34512589 1554.80203889]
New Q values:  [2174.36956339 2606.79331367 8023.34512589 1554.80203889]
Reward: -1  Episode Reward:  -1159
xxxxx
x   x
x . x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1053.20124213   26.73544252 6068.9105677   123.6214372 ]
------
Step:20, Action:East
State  261
Old Q Values:  [1053.20124213   26.73544252 6068.9105677   123.6214372 ]
New Q values:  [1053.20124213   26.73544252 6052.45456418  123.6214372 ]
Reward: -1  Episode Reward:  -1160
xxxxx
x   x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[12084.96779035  -168.92307549  2611.76049936  9746.23967858]
------
Step:21, Action:North
State  273
Old Q Values:  [12084.96779035  -168.92307549  2611.76049936  9746.23967858]
New Q values:  [70249.27974298  -168.92307549  2611.76049936  9746.23967858]
Reward: 100009  Episode Reward:  98849
xxxxx
x   x
x a x
x  gx
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  4954.40891302 22986.08103632]
------
Step:1, Action:West
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197 24702.30720332  1496.82652516]
New Q values:  [ 7058.83631802 -5807.06396197 24702.30720332  3573.56194349]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2468.40812354 -5704.51612281  9898.10444475 -5679.36893145]
------
Step:2, Action:East
State  260
Old Q Values:  [ 2468.40812354 -5704.51612281  9898.10444475 -5679.36893145]
New Q values:  [ 2468.40812354 -5704.51612281 10854.46608879 -5679.36893145]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  4954.40891302 22986.08103632]
------
Step:3, Action:East
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  4954.40891302 22986.08103632]
New Q values:  [ 3515.22741472 -8521.23367799  5908.0520307  22986.08103632]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 13069.62821832]
------
Step:4, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 13069.62821832]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 12123.07559822]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x...x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  5908.0520307  22986.08103632]
------
Step:5, Action:East
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  5908.0520307  22986.08103632]
New Q values:  [ 3515.22741472 -8521.23367799  5999.54349175 22986.08103632]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 12123.07559822]
------
Step:6, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 12123.07559822]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966 11744.45455018]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x.g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  5999.54349175 22986.08103632]
------
Step:7, Action:West
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  5999.54349175 22986.08103632]
New Q values:  [ 3515.22741472 -8521.23367799  5999.54349175 11009.56878378]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
x...x
xag x
xxxxx
Step:8, Action:East
State  260
Old Q Values:  [ 2468.40812354 -5704.51612281 10854.46608879 -5679.36893145]
New Q values:  [ 2468.40812354 -5704.51612281  1597.52626216 -5679.36893145]
Reward: -10001  Episode Reward:  -9988
xxxxx
x ..x
x...x
xg  x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1053.20124213   26.73544252 6052.45456418  123.6214372 ]
------
Step:1, Action:East
State  260
Old Q Values:  [ 2468.40812354 -5704.51612281  1597.52626216 -5679.36893145]
New Q values:  [ 2468.40812354 -5704.51612281  8055.10266586 -5679.36893145]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197 24702.30720332  3573.56194349]
------
Step:2, Action:East
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  5999.54349175 11009.56878378]
New Q values:  [ 3515.22741472 -8521.23367799  5928.55376175 11009.56878378]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. .x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966 11744.45455018]
------
Step:3, Action:West
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966 11744.45455018]
New Q values:  [ 3463.13734036 -6442.16912869 -8192.20126966  2000.05245521]
Reward: -10001  Episode Reward:  -9983
xxxxx
x...x
x. .x
x g x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
Step:1, Action:South
State  208
Old Q Values:  [ 7566.97207772  8024.93896653 -4584.50430574 -1713.91177491]
New Q values:  [ 7566.97207772  4254.31678872 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3463.13734036 -6442.16912869 -8192.20126966  2000.05245521]
------
Step:2, Action:North
State  288
Old Q Values:  [ 3463.13734036 -6442.16912869 -8192.20126966  2000.05245521]
New Q values:  [ 3654.74655946 -6442.16912869 -8192.20126966  2000.05245521]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xg.ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7566.97207772  4254.31678872 -4584.50430574 -1713.91177491]
------
Step:3, Action:North
State  210
Old Q Values:  [113775.96527551   2473.48849075    790.72804752   1050.85266124]
New Q values:  [89860.70421776  2473.48849075   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  17
xxxxx
x..ax
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     5661.05765619   -180.00807518 147816.39369186]
------
Step:4, Action:West
State  138
Old Q Values:  [ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  1.24227197e+03]
New Q values:  [ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  8.56259730e+02]
Reward: 9  Episode Reward:  26
xxxxx
x.a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1121.7664849   1179.83647129]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1121.7664849   1179.83647129]
New Q values:  [ -281.736      -1150.91067548  1121.7664849   1554.06104393]
Reward: 9  Episode Reward:  35
xxxxx
xa  x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  311.89931548 3589.08818473 -252.78192178]
------
Step:6, Action:East
State  106
Old Q Values:  [ -180.6        -7710.46911494   238.32410743  -180.6       ]
New Q values:  [ -180.6        -7710.46911494   560.94795615  -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1121.7664849   1554.06104393]
------
Step:7, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3057.98936688  1024.49912872]
New Q values:  [-9594.56523706 -8069.05606225  3057.98936688 -4884.64294338]
Reward: -10001  Episode Reward:  -9967
xxxxx
xg  x
x.. x
x.  x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2174.36956339 2606.79331367 8023.34512589 1554.80203889]
------
Step:1, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243  7038.47174633     0.        ]
New Q values:  [    0.         -5536.05678243 14893.01024342     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  4.02407385e+04  1.03161518e+03]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  4.02407385e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  4.30599067e+04  1.03161518e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[89860.70421776  2473.48849075   790.72804752  1050.85266124]
------
Step:3, Action:North
State  210
Old Q Values:  [89860.70421776  2473.48849075   790.72804752  1050.85266124]
New Q values:  [80294.59979466  2473.48849075   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     5661.05765619   -180.00807518 147816.39369186]
------
Step:4, Action:West
State  138
Old Q Values:  [ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  8.56259730e+02]
New Q values:  [ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  4.06017266e+04]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x  gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 134179.40896346]
------
Step:5, Action:West
State  121
Old Q Values:  [    0.             0.         -8255.55694753   315.06001453]
New Q values:  [    0.             0.         -8255.55694753   366.82262216]
Reward: 9  Episode Reward:  45
xxxxx
xa gx
x   x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[ -180.6          784.6620545  -5738.48414911     0.        ]
------
Step:6, Action:South
State  105
Old Q Values:  [ -180.6          784.6620545  -5738.48414911     0.        ]
New Q values:  [ -180.6          446.06592384 -5738.48414911     0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x g x
xa  x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 145.72609444    0.          442.67034013 -178.98      ]
------
Step:7, Action:East
State  184
Old Q Values:  [ 127.54442502    0.         3820.33267721    0.        ]
New Q values:  [ 127.54442502    0.         8982.5331774     0.        ]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
x a x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  24850.00035503  1022.75074485   568.38654082]
------
Step:8, Action:South
State  200
Old Q Values:  [  169.9257398  24850.00035503  1022.75074485   568.38654082]
New Q values:  [  169.9257398  13242.27077715  1022.75074485   568.38654082]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  5928.55376175 11009.56878378]
------
Step:9, Action:West
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197 24702.30720332  3573.56194349]
New Q values:  [ 7058.83631802 -5807.06396197 24702.30720332  3250.56114665]
Reward: 9  Episode Reward:  51
xxxxx
x   x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1053.20124213   26.73544252 6052.45456418  123.6214372 ]
------
Step:10, Action:East
State  261
Old Q Values:  [1053.20124213   26.73544252 6052.45456418  123.6214372 ]
New Q values:  [1053.20124213   26.73544252 9831.07398667  123.6214372 ]
Reward: -1  Episode Reward:  50
xxxxx
x g x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197 24702.30720332  3250.56114665]
------
Step:11, Action:East
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  5928.55376175 11009.56878378]
New Q values:  [ 3515.22741472 -8521.23367799 63473.24547254 11009.56878378]
Reward: 100009  Episode Reward:  100059
xxxxx
xg  x
x   x
x  ax
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1658.88423777   861.04204957]
------
Step:1, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1658.88423777   861.04204957]
New Q values:  [ -253.44886264 -1902.20915811 12849.47166944   861.04204957]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  4.06017266e+04]
------
Step:2, Action:West
State  138
Old Q Values:  [ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  4.06017266e+04]
New Q values:  [ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  2.00949321e+04]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811 12849.47166944   861.04204957]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1121.7664849   1554.06104393]
New Q values:  [ -281.736      -1150.91067548  6476.58623394  1554.06104393]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.g x
x...x
xxxxx
Step:4, Action:East
State  136
Old Q Values:  [ 3644.70286791  5983.28856617 -2383.80019164 -4488.51273874]
New Q values:  [ 3644.70286791  5983.28856617   660.86649319 -4488.51273874]
Reward: -301  Episode Reward:  -294
xxxxx
x.gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 3644.70286791  5983.28856617   660.86649319 -4488.51273874]
------
Step:5, Action:South
State  136
Old Q Values:  [ 3644.70286791  5983.28856617   660.86649319 -4488.51273874]
New Q values:  [ 3644.70286791  4280.9862854    660.86649319 -4488.51273874]
Reward: -1  Episode Reward:  -295
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 6294.23619643  3232.66178902 -8220.10378799   637.30368728]
------
Step:6, Action:North
State  208
Old Q Values:  [ 7566.97207772  4254.31678872 -4584.50430574 -1713.91177491]
New Q values:  [ 4310.48471671  4254.31678872 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -296
xxxxx
x.gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 3644.70286791  4280.9862854    660.86649319 -4488.51273874]
------
Step:7, Action:South
State  136
Old Q Values:  [ 3644.70286791  4280.9862854    660.86649319 -4488.51273874]
New Q values:  [ 3644.70286791  3600.06537309   660.86649319 -4488.51273874]
Reward: -1  Episode Reward:  -297
xxxxx
xg  x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 6294.23619643  3232.66178902 -8220.10378799   637.30368728]
------
Step:8, Action:North
State  216
Old Q Values:  [ 6294.23619643  3232.66178902 -8220.10378799   637.30368728]
New Q values:  [ 3610.50533895  3232.66178902 -8220.10378799   637.30368728]
Reward: -1  Episode Reward:  -298
xxxxx
xg ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 3644.70286791  3600.06537309   660.86649319 -4488.51273874]
------
Step:9, Action:North
State  138
Old Q Values:  [ 1.31953117e+03 -2.38143036e+03 -3.22965309e-01  2.00949321e+04]
New Q values:  [ 6.37569211e+03 -2.38143036e+03 -3.22965309e-01  2.00949321e+04]
Reward: -301  Episode Reward:  -599
xxxxx
x. ax
xg. x
x...x
xxxxx
Step:10, Action:North
State  138
Old Q Values:  [ 6.37569211e+03 -2.38143036e+03 -3.22965309e-01  2.00949321e+04]
New Q values:  [ 8.39815648e+03 -2.38143036e+03 -3.22965309e-01  2.00949321e+04]
Reward: -301  Episode Reward:  -900
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.39815648e+03 -2.38143036e+03 -3.22965309e-01  2.00949321e+04]
------
Step:11, Action:West
State  138
Old Q Values:  [ 8.39815648e+03 -2.38143036e+03 -3.22965309e-01  2.00949321e+04]
New Q values:  [ 8.39815648e+03 -2.38143036e+03 -3.22965309e-01  1.18922144e+04]
Reward: -1  Episode Reward:  -901
xxxxx
x.a x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811 12849.47166944   861.04204957]
------
Step:12, Action:East
State  121
Old Q Values:  [    0.             0.         -8255.55694753   366.82262216]
New Q values:  [    0.             0.         -8209.41191864   366.82262216]
Reward: -10001  Episode Reward:  -10902
xxxxx
x. gx
x.. x
x...x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3654.74655946 -6442.16912869 -8192.20126966  2000.05245521]
------
Step:1, Action:North
State  288
Old Q Values:  [ 3654.74655946 -6442.16912869 -8192.20126966  2000.05245521]
New Q values:  [-3239.55596121 -6442.16912869 -8192.20126966  2000.05245521]
Reward: -9991  Episode Reward:  -9991
xxxxx
x.. x
x..gx
x.. x
xxxxx
xxxxx
x..ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.39815648e+03 -2.38143036e+03 -3.22965309e-01  1.18922144e+04]
------
Step:1, Action:West
State  138
Old Q Values:  [ 8.39815648e+03 -2.38143036e+03 -3.22965309e-01  1.18922144e+04]
New Q values:  [ 8.39815648e+03 -2.38143036e+03 -3.22965309e-01  6.70526161e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  6476.58623394  1554.06104393]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  6476.58623394  1554.06104393]
New Q values:  [ -281.736      -1150.91067548  5109.48143842  1554.06104393]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.39815648e+03 -2.38143036e+03 -3.22965309e-01  6.70526161e+03]
------
Step:3, Action:North
State  136
Old Q Values:  [ 3644.70286791  3600.06537309   660.86649319 -4488.51273874]
New Q values:  [ 2370.69200754  3600.06537309   660.86649319 -4488.51273874]
Reward: -301  Episode Reward:  -293
xxxxx
x.gax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 2370.69200754  3600.06537309   660.86649319 -4488.51273874]
------
Step:4, Action:South
State  138
Old Q Values:  [ 8.39815648e+03 -2.38143036e+03 -3.22965309e-01  6.70526161e+03]
New Q values:  [ 8.39815648e+03  3.45973270e+02 -3.22965309e-01  6.70526161e+03]
Reward: 9  Episode Reward:  -284
xxxxx
x.  x
x.gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4310.48471671  4254.31678872 -4584.50430574 -1713.91177491]
------
Step:5, Action:North
State  210
Old Q Values:  [80294.59979466  2473.48849075   790.72804752  1050.85266124]
New Q values:  [34636.68686271  2473.48849075   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  -285
xxxxx
x. ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.39815648e+03  3.45973270e+02 -3.22965309e-01  6.70526161e+03]
------
Step:6, Action:North
State  138
Old Q Values:  [ 8.39815648e+03  3.45973270e+02 -3.22965309e-01  6.70526161e+03]
New Q values:  [ 5.69810954e+03  3.45973270e+02 -3.22965309e-01  6.70526161e+03]
Reward: -301  Episode Reward:  -586
xxxxx
x. ax
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.69810954e+03  3.45973270e+02 -3.22965309e-01  6.70526161e+03]
------
Step:7, Action:West
State  138
Old Q Values:  [ 5.69810954e+03  3.45973270e+02 -3.22965309e-01  6.70526161e+03]
New Q values:  [ 5.69810954e+03  3.45973270e+02 -3.22965309e-01  4.21434908e+03]
Reward: -1  Episode Reward:  -587
xxxxx
x.a x
xg. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  5109.48143842  1554.06104393]
------
Step:8, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3057.98936688 -4884.64294338]
New Q values:  [-9594.56523706 -8069.05606225  2302.61535868 -4884.64294338]
Reward: -1  Episode Reward:  -588
xxxxx
xg ax
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 2370.69200754  3600.06537309   660.86649319 -4488.51273874]
------
Step:9, Action:South
State  136
Old Q Values:  [ 2370.69200754  3600.06537309   660.86649319 -4488.51273874]
New Q values:  [ 2370.69200754  2732.57156425   660.86649319 -4488.51273874]
Reward: -1  Episode Reward:  -589
xxxxx
x.g x
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4310.48471671  4254.31678872 -4584.50430574 -1713.91177491]
------
Step:10, Action:North
State  208
Old Q Values:  [ 4310.48471671  4254.31678872 -4584.50430574 -1713.91177491]
New Q values:  [ 2543.36535596  4254.31678872 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -590
xxxxx
xg ax
x.. x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 2370.69200754  2732.57156425   660.86649319 -4488.51273874]
------
Step:11, Action:South
State  138
Old Q Values:  [ 5.69810954e+03  3.45973270e+02 -3.22965309e-01  4.21434908e+03]
New Q values:  [ 5.69810954e+03  1.41408434e+03 -3.22965309e-01  4.21434908e+03]
Reward: -1  Episode Reward:  -591
xxxxx
x.  x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2543.36535596  4254.31678872 -4584.50430574 -1713.91177491]
------
Step:12, Action:South
State  208
Old Q Values:  [ 2543.36535596  4254.31678872 -4584.50430574 -1713.91177491]
New Q values:  [ 2543.36535596  2307.14245205 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  -582
xxxxx
xg  x
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121 -6442.16912869 -8192.20126966  2000.05245521]
------
Step:13, Action:West
State  288
Old Q Values:  [-3239.55596121 -6442.16912869 -8192.20126966  2000.05245521]
New Q values:  [-3239.55596121 -6442.16912869 -8192.20126966 19847.39462384]
Reward: 9  Episode Reward:  -573
xxxxx
x.  x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799 63473.24547254 11009.56878378]
------
Step:14, Action:East
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799 63473.24547254 11009.56878378]
New Q values:  [ 3515.22741472 -8521.23367799 31342.91657617 11009.56878378]
Reward: -1  Episode Reward:  -574
xxxxx
x.  x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121 -6442.16912869 -8192.20126966 19847.39462384]
------
Step:15, Action:West
State  288
Old Q Values:  [-3239.55596121 -6442.16912869 -8192.20126966 19847.39462384]
New Q values:  [-3239.55596121 -6442.16912869 -8192.20126966 17341.23282239]
Reward: -1  Episode Reward:  -575
xxxxx
x.  x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799 31342.91657617 11009.56878378]
------
Step:16, Action:East
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799 31342.91657617 11009.56878378]
New Q values:  [ 3515.22741472 -8521.23367799 17738.93647718 11009.56878378]
Reward: -1  Episode Reward:  -576
xxxxx
xg  x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121 -6442.16912869 -8192.20126966 17341.23282239]
------
Step:17, Action:West
State  288
Old Q Values:  [-3239.55596121 -6442.16912869 -8192.20126966 17341.23282239]
New Q values:  [-3239.55596121 -6442.16912869 -8192.20126966 12257.57407211]
Reward: -1  Episode Reward:  -577
xxxxx
x.g x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799 17738.93647718 11009.56878378]
------
Step:18, Action:East
State  273
Old Q Values:  [70249.27974298  -168.92307549  2611.76049936  9746.23967858]
New Q values:  [70249.27974298  -168.92307549  4721.37642138  9746.23967858]
Reward: -1  Episode Reward:  -578
xxxxx
x. gx
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121 -6442.16912869 -8192.20126966 12257.57407211]
------
Step:19, Action:West
State  288
Old Q Values:  [-3239.55596121 -6442.16912869 -8192.20126966 12257.57407211]
New Q values:  [-3239.55596121 -6442.16912869 -8192.20126966 25977.21355174]
Reward: -1  Episode Reward:  -579
xxxxx
x.  x
x..gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[70249.27974298  -168.92307549  4721.37642138  9746.23967858]
------
Step:20, Action:North
State  273
Old Q Values:  [70249.27974298  -168.92307549  4721.37642138  9746.23967858]
New Q values:  [33515.00452403  -168.92307549  4721.37642138  9746.23967858]
Reward: 9  Episode Reward:  -570
xxxxx
x.  x
x.a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  8819.79152205 18032.97542278  1169.39963074]
------
Step:21, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  4.30599067e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  2.76143687e+04  1.03161518e+03]
Reward: -1  Episode Reward:  -571
xxxxx
x.  x
x. ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[34636.68686271  2473.48849075   790.72804752  1050.85266124]
------
Step:22, Action:North
State  210
Old Q Values:  [34636.68686271  2473.48849075   790.72804752  1050.85266124]
New Q values:  [58198.99285264  2473.48849075   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  -572
xxxxx
x. ax
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 26266.584521     5661.05765619   -180.00807518 147816.39369186]
------
Step:23, Action:West
State  130
Old Q Values:  [ 26266.584521     5661.05765619   -180.00807518 147816.39369186]
New Q values:  [26266.584521    5661.05765619  -180.00807518 96825.21294777]
Reward: -1  Episode Reward:  -573
xxxxx
x.a x
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036    4378.49673879 125664.18490342]
------
Step:24, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  5109.48143842  1554.06104393]
New Q values:  [ -281.736      -1150.91067548  5109.48143842   795.30880442]
Reward: 9  Episode Reward:  -564
xxxxx
xa  x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   560.94795615  -180.6       ]
------
Step:25, Action:East
State  106
Old Q Values:  [ -180.6        -7710.46911494   560.94795615  -180.6       ]
New Q values:  [ -180.6        -7710.46911494  1756.62361399  -180.6       ]
Reward: -1  Episode Reward:  -565
xxxxx
x a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  5109.48143842   795.30880442]
------
Step:26, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  5109.48143842   795.30880442]
New Q values:  [ -281.736      -1150.91067548  3752.62543676   795.30880442]
Reward: -1  Episode Reward:  -566
xxxxx
x  ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.69810954e+03  1.41408434e+03 -3.22965309e-01  4.21434908e+03]
------
Step:27, Action:North
State  138
Old Q Values:  [ 5.69810954e+03  1.41408434e+03 -3.22965309e-01  4.21434908e+03]
New Q values:  [ 3.80807668e+03  1.41408434e+03 -3.22965309e-01  4.21434908e+03]
Reward: -301  Episode Reward:  -867
xxxxx
x  ax
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.80807668e+03  1.41408434e+03 -3.22965309e-01  4.21434908e+03]
------
Step:28, Action:West
State  138
Old Q Values:  [ 3.80807668e+03  1.41408434e+03 -3.22965309e-01  4.21434908e+03]
New Q values:  [ 3.80807668e+03  1.41408434e+03 -3.22965309e-01  2.81092726e+03]
Reward: -1  Episode Reward:  -868
xxxxx
x a x
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3752.62543676   795.30880442]
------
Step:29, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811 12849.47166944   861.04204957]
New Q values:  [ -253.44886264 -1902.20915811  6281.61167075   861.04204957]
Reward: -1  Episode Reward:  -869
xxxxx
x  ax
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.80807668e+03  1.41408434e+03 -3.22965309e-01  2.81092726e+03]
------
Step:30, Action:North
State  138
Old Q Values:  [ 3.80807668e+03  1.41408434e+03 -3.22965309e-01  2.81092726e+03]
New Q values:  [ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  2.81092726e+03]
Reward: -301  Episode Reward:  -1170
xxxxx
x  ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  2.81092726e+03]
------
Step:31, Action:West
State  138
Old Q Values:  [ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  2.81092726e+03]
New Q values:  [ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  3.00825441e+03]
Reward: -1  Episode Reward:  -1171
xxxxx
x a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  6281.61167075   861.04204957]
------
Step:32, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3752.62543676   795.30880442]
New Q values:  [ -281.736      -1150.91067548  2402.92649646   795.30880442]
Reward: -1  Episode Reward:  -1172
xxxxx
x  ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  3.00825441e+03]
------
Step:33, Action:West
State  138
Old Q Values:  [ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  3.00825441e+03]
New Q values:  [ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  3.08718526e+03]
Reward: -1  Episode Reward:  -1173
xxxxx
x a x
x.  x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  6281.61167075   861.04204957]
------
Step:34, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2402.92649646   795.30880442]
New Q values:  [ -281.736      -1150.91067548  1886.72617765   795.30880442]
Reward: -1  Episode Reward:  -1174
xxxxx
x  ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  3.08718526e+03]
------
Step:35, Action:West
State  138
Old Q Values:  [ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  3.08718526e+03]
New Q values:  [ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  1.80029196e+03]
Reward: -1  Episode Reward:  -1175
xxxxx
x a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1886.72617765   795.30880442]
------
Step:36, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2302.61535868 -4884.64294338]
New Q values:  [-9594.56523706 -8069.05606225  1740.21761275 -4884.64294338]
Reward: -1  Episode Reward:  -1176
xxxxx
x gax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 2370.69200754  2732.57156425   660.86649319 -4488.51273874]
------
Step:37, Action:South
State  136
Old Q Values:  [ 2370.69200754  2732.57156425   660.86649319 -4488.51273874]
New Q values:  [ 2370.69200754  1855.43823249   660.86649319 -4488.51273874]
Reward: -1  Episode Reward:  -1177
xxxxx
xg  x
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2543.36535596  2307.14245205 -4584.50430574 -1713.91177491]
------
Step:38, Action:North
State  208
Old Q Values:  [ 2543.36535596  2307.14245205 -4584.50430574 -1713.91177491]
New Q values:  [30064.31002671  2307.14245205 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -1178
xxxxx
x  ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 96825.21294777]
------
Step:39, Action:West
State  138
Old Q Values:  [ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  1.80029196e+03]
New Q values:  [ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  1.28553464e+03]
Reward: -1  Episode Reward:  -1179
xxxxx
x a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1886.72617765   795.30880442]
------
Step:40, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036    4378.49673879 125664.18490342]
New Q values:  [  -180.6          3557.6642036   30798.36257985 125664.18490342]
Reward: -1  Episode Reward:  -1180
xxxxx
x  ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 96825.21294777]
------
Step:41, Action:West
State  136
Old Q Values:  [ 2370.69200754  1855.43823249   660.86649319 -4488.51273874]
New Q values:  [ 2370.69200754  1855.43823249   660.86649319 -1273.93981167]
Reward: -1  Episode Reward:  -1181
xxxxx
xga x
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1740.21761275 -4884.64294338]
------
Step:42, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1740.21761275 -4884.64294338]
New Q values:  [-9594.56523706 -8069.05606225  1406.69464736 -4884.64294338]
Reward: -1  Episode Reward:  -1182
xxxxx
x gax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 2370.69200754  1855.43823249   660.86649319 -1273.93981167]
------
Step:43, Action:North
State  136
Old Q Values:  [ 2370.69200754  1855.43823249   660.86649319 -1273.93981167]
New Q values:  [-4521.11559472  1855.43823249   660.86649319 -1273.93981167]
Reward: -10301  Episode Reward:  -11483
xxxxx
x  gx
x.  x
x   x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1053.20124213   26.73544252 9831.07398667  123.6214372 ]
------
Step:1, Action:East
State  261
Old Q Values:  [1053.20124213   26.73544252 9831.07398667  123.6214372 ]
New Q values:  [ 1053.20124213    26.73544252 13992.33095188   123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[33515.00452403  -168.92307549  4721.37642138  9746.23967858]
------
Step:2, Action:North
State  273
Old Q Values:  [33515.00452403  -168.92307549  4721.37642138  9746.23967858]
New Q values:  [17385.49496785  -168.92307549  4721.37642138  9746.23967858]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x.a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 13246.97719412  9681.98001768  1460.9765133 ]
------
Step:3, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.26722128e+04 2.32942001e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 1.03899661e+04 2.32942001e+04 7.32028793e+03]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799 17738.93647718 11009.56878378]
------
Step:4, Action:East
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799 17738.93647718 11009.56878378]
New Q values:  [ 3515.22741472 -8521.23367799 14894.13865639 11009.56878378]
Reward: 9  Episode Reward:  26
xxxxx
xg. x
x. .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121 -6442.16912869 -8192.20126966 25977.21355174]
------
Step:5, Action:West
State  288
Old Q Values:  [-3239.55596121 -6442.16912869 -8192.20126966 25977.21355174]
New Q values:  [-3239.55596121 -6442.16912869 -8192.20126966 14858.52701761]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xg .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799 14894.13865639 11009.56878378]
------
Step:6, Action:East
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799 14894.13865639 11009.56878378]
New Q values:  [ 3515.22741472 -8521.23367799 10414.61356784 11009.56878378]
Reward: -1  Episode Reward:  24
xxxxx
xg. x
x. .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121 -6442.16912869 -8192.20126966 14858.52701761]
------
Step:7, Action:West
State  288
Old Q Values:  [-3239.55596121 -6442.16912869 -8192.20126966 14858.52701761]
New Q values:  [-3239.55596121 -6442.16912869 -8192.20126966  9245.68144218]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x. .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799 10414.61356784 11009.56878378]
------
Step:8, Action:West
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799 10414.61356784 11009.56878378]
New Q values:  [ 3515.22741472 -8521.23367799 10414.61356784  6819.75831327]
Reward: -1  Episode Reward:  22
xxxxx
xg. x
x. .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2468.40812354 -5704.51612281  8055.10266586 -5679.36893145]
------
Step:9, Action:East
State  257
Old Q Values:  [20613.23757992 12764.58618105 12093.94510863  1875.31501677]
New Q values:  [20613.23757992 12764.58618105  7961.3621138   1875.31501677]
Reward: -1  Episode Reward:  21
xxxxx
x.g x
x. .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799 10414.61356784  6819.75831327]
------
Step:10, Action:East
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799 10414.61356784  6819.75831327]
New Q values:  [ 3515.22741472 -8521.23367799  6938.94985979  6819.75831327]
Reward: -1  Episode Reward:  20
xxxxx
xg. x
x. .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121 -6442.16912869 -8192.20126966  9245.68144218]
------
Step:11, Action:West
State  288
Old Q Values:  [-3239.55596121 -6442.16912869 -8192.20126966  9245.68144218]
New Q values:  [-3239.55596121 -6442.16912869 -8192.20126966  5779.35753481]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
xg .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  6938.94985979  6819.75831327]
------
Step:12, Action:East
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  6938.94985979  6819.75831327]
New Q values:  [ 3515.22741472 -8521.23367799  4508.78720436  6819.75831327]
Reward: -1  Episode Reward:  18
xxxxx
x.. x
x.g.x
x  ax
xxxxx
Step:13, Action:South
State  288
Old Q Values:  [-3239.55596121 -6442.16912869 -8192.20126966  5779.35753481]
New Q values:  [-3239.55596121 -1023.66039103 -8192.20126966  5779.35753481]
Reward: -301  Episode Reward:  -283
xxxxx
x.. x
x. .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121 -1023.66039103 -8192.20126966  5779.35753481]
------
Step:14, Action:South
State  288
Old Q Values:  [-3239.55596121 -1023.66039103 -8192.20126966  5779.35753481]
New Q values:  [-3239.55596121  1143.74310403 -8192.20126966  5779.35753481]
Reward: -301  Episode Reward:  -584
xxxxx
x.. x
x.g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121  1143.74310403 -8192.20126966  5779.35753481]
------
Step:15, Action:West
State  288
Old Q Values:  [-3239.55596121  1143.74310403 -8192.20126966  5779.35753481]
New Q values:  [-3239.55596121  1143.74310403 -8192.20126966  4357.0705079 ]
Reward: -1  Episode Reward:  -585
xxxxx
x.. x
xg .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  4508.78720436  6819.75831327]
------
Step:16, Action:West
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  4508.78720436  6819.75831327]
New Q values:  [ 3515.22741472 -8521.23367799  4508.78720436  5143.83412507]
Reward: -1  Episode Reward:  -586
xxxxx
xg. x
x. .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2468.40812354 -5704.51612281  8055.10266586 -5679.36893145]
------
Step:17, Action:East
State  257
Old Q Values:  [20613.23757992 12764.58618105  7961.3621138   1875.31501677]
New Q values:  [20613.23757992 12764.58618105  4727.09508304  1875.31501677]
Reward: -1  Episode Reward:  -587
xxxxx
x.g x
x. .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  4508.78720436  5143.83412507]
------
Step:18, Action:West
State  273
Old Q Values:  [17385.49496785  -168.92307549  4721.37642138  9746.23967858]
New Q values:  [17385.49496785  -168.92307549  4721.37642138 10081.86714541]
Reward: -1  Episode Reward:  -588
xxxxx
x..gx
x. .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[20613.23757992 12764.58618105  4727.09508304  1875.31501677]
------
Step:19, Action:North
State  257
Old Q Values:  [20613.23757992 12764.58618105  4727.09508304  1875.31501677]
New Q values:  [18303.86220197 12764.58618105  4727.09508304  1875.31501677]
Reward: 9  Episode Reward:  -579
xxxxx
x.g x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 29124.7048717  20101.94546536     0.        ]
------
Step:20, Action:North
State  180
Old Q Values:  [  707.59011235  3998.65267373 11730.9592948  -4966.32149798]
New Q values:  [-2840.23360603  3998.65267373 11730.9592948  -4966.32149798]
Reward: -9991  Episode Reward:  -10570
xxxxx
xg. x
x  .x
x   x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1077.24729837  1279.45451029 -1869.69345581   262.76946019]
------
Step:1, Action:South
State  183
Old Q Values:  [2174.36956339 2606.79331367 8023.34512589 1554.80203889]
New Q values:  [2174.36956339 5245.81661103 8023.34512589 1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1053.20124213    26.73544252 13992.33095188   123.6214372 ]
------
Step:2, Action:East
State  261
Old Q Values:  [ 1053.20124213    26.73544252 13992.33095188   123.6214372 ]
New Q values:  [ 1053.20124213    26.73544252 10817.9808711    123.6214372 ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x .gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[17385.49496785  -168.92307549  4721.37642138 10081.86714541]
------
Step:3, Action:North
State  273
Old Q Values:  [17385.49496785  -168.92307549  4721.37642138 10081.86714541]
New Q values:  [12369.49061397  -168.92307549  4721.37642138 10081.86714541]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605  8819.79152205 18032.97542278  1169.39963074]
------
Step:4, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  2.76143687e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  2.85048453e+04  1.03161518e+03]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[58198.99285264  2473.48849075   790.72804752  1050.85266124]
------
Step:5, Action:North
State  208
Old Q Values:  [30064.31002671  2307.14245205 -4584.50430574 -1713.91177491]
New Q values:  [41078.68789502  2307.14245205 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  35
xxxxx
x..ax
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[26266.584521    5661.05765619  -180.00807518 96825.21294777]
------
Step:6, Action:West
State  130
Old Q Values:  [26266.584521    5661.05765619  -180.00807518 96825.21294777]
New Q values:  [26266.584521    5661.05765619  -180.00807518 76434.74065013]
Reward: 9  Episode Reward:  44
xxxxx
x.a x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   30798.36257985 125664.18490342]
------
Step:7, Action:West
State  126
Old Q Values:  [   0.          331.64678262 7128.26683804 1946.9719719 ]
New Q values:  [   0.          331.64678262 7128.26683804 1511.43880655]
Reward: 9  Episode Reward:  53
xxxxx
xa  x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2424.16672598  751.03868094 -120.29354603]
------
Step:8, Action:South
State  110
Old Q Values:  [-239.29051573 2146.46762581  265.50261253 -180.6       ]
New Q values:  [ -239.29051573 -3902.35021231   265.50261253  -180.6       ]
Reward: -10001  Episode Reward:  -9948
xxxxx
x   x
xg  x
x  .x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  1.28553464e+03]
------
Step:1, Action:North
State  136
Old Q Values:  [-4521.11559472  1855.43823249   660.86649319 -1273.93981167]
New Q values:  [-7432.41476814  1855.43823249   660.86649319 -1273.93981167]
Reward: -10301  Episode Reward:  -10301
xxxxx
x..gx
x.. x
x...x
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2840.23360603  3998.65267373 11730.9592948  -4966.32149798]
------
Step:1, Action:East
State  180
Old Q Values:  [-2840.23360603  3998.65267373 11730.9592948  -4966.32149798]
New Q values:  [-2840.23360603  3998.65267373  8294.48672265 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4027.93373228 11989.01001575  3512.17261526]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  2.85048453e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  2.88670360e+04  1.03161518e+03]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[58198.99285264  2473.48849075   790.72804752  1050.85266124]
------
Step:3, Action:North
State  216
Old Q Values:  [ 3610.50533895  3232.66178902 -8220.10378799   637.30368728]
New Q values:  [ 2195.11823766  3232.66178902 -8220.10378799   637.30368728]
Reward: 9  Episode Reward:  27
xxxxx
x .ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  1.28553464e+03]
------
Step:4, Action:North
State  138
Old Q Values:  [ 2.48505367e+03  1.41408434e+03 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 1.55893757e+03  1.41408434e+03 -3.22965309e-01  1.28553464e+03]
Reward: -301  Episode Reward:  -274
xxxxx
x .ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.55893757e+03  1.41408434e+03 -3.22965309e-01  1.28553464e+03]
------
Step:5, Action:North
State  138
Old Q Values:  [ 1.55893757e+03  1.41408434e+03 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  1.41408434e+03 -3.22965309e-01  1.28553464e+03]
Reward: -301  Episode Reward:  -575
xxxxx
x .ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  1.41408434e+03 -3.22965309e-01  1.28553464e+03]
------
Step:6, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  1.41408434e+03 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  1.53483227e+03 -3.22965309e-01  1.28553464e+03]
Reward: -1  Episode Reward:  -576
xxxxx
x . x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  3232.66178902 -8220.10378799   637.30368728]
------
Step:7, Action:South
State  216
Old Q Values:  [ 2195.11823766  3232.66178902 -8220.10378799   637.30368728]
New Q values:  [ 2195.11823766  2605.58586798 -8220.10378799   637.30368728]
Reward: 9  Episode Reward:  -567
xxxxx
x g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121  1143.74310403 -8192.20126966  4357.0705079 ]
------
Step:8, Action:West
State  288
Old Q Values:  [-3239.55596121  1143.74310403 -8192.20126966  4357.0705079 ]
New Q values:  [-3239.55596121  1143.74310403 -8192.20126966  3291.37844068]
Reward: 9  Episode Reward:  -558
xxxxx
xg. x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  4508.78720436  5143.83412507]
------
Step:9, Action:West
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  4508.78720436  5143.83412507]
New Q values:  [ 3515.22741472 -8521.23367799  4508.78720436  7554.09231062]
Reward: 9  Episode Reward:  -549
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[18303.86220197 12764.58618105  4727.09508304  1875.31501677]
------
Step:10, Action:North
State  260
Old Q Values:  [ 2468.40812354 -5704.51612281  8055.10266586 -5679.36893145]
New Q values:  [ 3475.10926621 -5704.51612281  8055.10266586 -5679.36893145]
Reward: -1  Episode Reward:  -550
xxxxx
xg. x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2840.23360603  3998.65267373  8294.48672265 -4966.32149798]
------
Step:11, Action:East
State  180
Old Q Values:  [-2840.23360603  3998.65267373  8294.48672265 -4966.32149798]
New Q values:  [-2840.23360603  3998.65267373 10305.45471326 -4966.32149798]
Reward: -1  Episode Reward:  -551
xxxxx
x . x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.03899661e+04 2.32942001e+04 7.32028793e+03]
------
Step:12, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.03899661e+04 2.32942001e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 1.03899661e+04 2.16406864e+04 7.32028793e+03]
Reward: -1  Episode Reward:  -552
xxxxx
xg. x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[41078.68789502  2307.14245205 -4584.50430574 -1713.91177491]
------
Step:13, Action:North
State  208
Old Q Values:  [41078.68789502  2307.14245205 -4584.50430574 -1713.91177491]
New Q values:  [36646.60572015  2307.14245205 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -553
xxxxx
x gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[27558.53877122  8286.98116673 -8652.84       67385.76854049]
------
Step:14, Action:North
State  130
Old Q Values:  [26266.584521    5661.05765619  -180.00807518 76434.74065013]
New Q values:  [33256.45600344  5661.05765619  -180.00807518 76434.74065013]
Reward: -301  Episode Reward:  -854
xxxxx
x .ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  5661.05765619  -180.00807518 76434.74065013]
------
Step:15, Action:West
State  130
Old Q Values:  [33256.45600344  5661.05765619  -180.00807518 76434.74065013]
New Q values:  [ 33256.45600344   5661.05765619   -180.00807518 130833.11894909]
Reward: 100009  Episode Reward:  99155
xxxxx
x a x
x  gx
x   x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36646.60572015  2307.14245205 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [36646.60572015  2307.14245205 -4584.50430574 -1713.91177491]
New Q values:  [36646.60572015  1915.67051302 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121  1143.74310403 -8192.20126966  3291.37844068]
------
Step:2, Action:West
State  288
Old Q Values:  [-3239.55596121  1143.74310403 -8192.20126966  3291.37844068]
New Q values:  [-3239.55596121  1143.74310403 -8192.20126966  5032.79856046]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[12369.49061397  -168.92307549  4721.37642138 10081.86714541]
------
Step:3, Action:North
State  273
Old Q Values:  [12369.49061397  -168.92307549  4721.37642138 10081.86714541]
New Q values:  [ 8927.28940382  -168.92307549  4721.37642138 10081.86714541]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 13246.97719412  9681.98001768  1460.9765133 ]
------
Step:4, Action:South
State  195
Old Q Values:  [   38.85388605  8819.79152205 18032.97542278  1169.39963074]
New Q values:  [   38.85388605  6551.87675244 18032.97542278  1169.39963074]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 8927.28940382  -168.92307549  4721.37642138 10081.86714541]
------
Step:5, Action:West
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  4508.78720436  7554.09231062]
New Q values:  [ 3515.22741472 -8521.23367799  4508.78720436  8518.19558484]
Reward: 9  Episode Reward:  35
xxxxx
x.. x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[18303.86220197 12764.58618105  4727.09508304  1875.31501677]
------
Step:6, Action:North
State  256
Old Q Values:  [44917.26429693 15850.58020692  7407.2956525    644.94785455]
New Q values:  [47016.92797738 15850.58020692  7407.2956525    644.94785455]
Reward: 9  Episode Reward:  44
xxxxx
x.. x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         96815.40752869     0.        ]
------
Step:7, Action:East
State  179
Old Q Values:  [82228.67666629 16101.90751562 96251.32812703     0.        ]
New Q values:  [82228.67666629 16101.90751562 47160.04204891     0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x.. x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  2.88670360e+04  1.03161518e+03]
------
Step:8, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.03899661e+04 2.16406864e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 1.03899661e+04 1.96496563e+04 7.32028793e+03]
Reward: -1  Episode Reward:  42
xxxxx
x.. x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36646.60572015  1915.67051302 -4584.50430574 -1713.91177491]
------
Step:9, Action:North
State  208
Old Q Values:  [36646.60572015  1915.67051302 -4584.50430574 -1713.91177491]
New Q values:  [53907.97797279  1915.67051302 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  41
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 33256.45600344   5661.05765619   -180.00807518 130833.11894909]
------
Step:10, Action:West
State  130
Old Q Values:  [ 33256.45600344   5661.05765619   -180.00807518 130833.11894909]
New Q values:  [33256.45600344  5661.05765619  -180.00807518 92592.47026867]
Reward: 9  Episode Reward:  50
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 134179.40896346]
------
Step:11, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   30798.36257985 125664.18490342]
New Q values:  [  -180.6          3557.6642036   30798.36257985 125745.29507292]
Reward: 100009  Episode Reward:  100059
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 8927.28940382  -168.92307549  4721.37642138 10081.86714541]
------
Step:1, Action:West
State  273
Old Q Values:  [ 8927.28940382  -168.92307549  4721.37642138 10081.86714541]
New Q values:  [8927.28940382 -168.92307549 4721.37642138 7283.5411195 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.. x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1053.20124213    26.73544252 10817.9808711    123.6214372 ]
------
Step:2, Action:East
State  261
Old Q Values:  [ 1053.20124213    26.73544252 10817.9808711    123.6214372 ]
New Q values:  [1053.20124213   26.73544252  882.05102389  123.6214372 ]
Reward: -10001  Episode Reward:  -9992
xxxxx
x...x
x.. x
x g.x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  1.53483227e+03 -3.22965309e-01  1.28553464e+03]
------
Step:1, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  1.53483227e+03 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  1.80790308e+04 -3.22965309e-01  1.28553464e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[58198.99285264  2473.48849075   790.72804752  1050.85266124]
------
Step:2, Action:North
State  210
Old Q Values:  [58198.99285264  2473.48849075   790.72804752  1050.85266124]
New Q values:  [28702.70637075  2473.48849075   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  1.80790308e+04 -3.22965309e-01  1.28553464e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  1.80790308e+04 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  8.01268807e+03 -3.22965309e-01  1.28553464e+03]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  2605.58586798 -8220.10378799   637.30368728]
------
Step:4, Action:South
State  208
Old Q Values:  [53907.97797279  1915.67051302 -4584.50430574 -1713.91177491]
New Q values:  [53907.97797279  2281.50777335 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  16
xxxxx
x.. x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121  1143.74310403 -8192.20126966  5032.79856046]
------
Step:5, Action:West
State  288
Old Q Values:  [-3239.55596121  1143.74310403 -8192.20126966  5032.79856046]
New Q values:  [-3239.55596121  1143.74310403 -8192.20126966 -1426.02190036]
Reward: -9991  Episode Reward:  -9975
xxxxx
x.. x
x.  x
x.g x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-7432.41476814  1855.43823249   660.86649319 -1273.93981167]
------
Step:1, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  8.01268807e+03 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  3.99215099e+03 -3.22965309e-01  1.28553464e+03]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  2605.58586798 -8220.10378799   637.30368728]
------
Step:2, Action:South
State  210
Old Q Values:  [28702.70637075  2473.48849075   790.72804752  1050.85266124]
New Q values:  [28702.70637075  1337.91832751   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  18
xxxxx
x . x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121  1143.74310403 -8192.20126966 -1426.02190036]
------
Step:3, Action:South
State  288
Old Q Values:  [-3239.55596121  1143.74310403 -8192.20126966 -1426.02190036]
New Q values:  [-3239.55596121   620.02017282 -8192.20126966 -1426.02190036]
Reward: -301  Episode Reward:  -283
xxxxx
x . x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121   620.02017282 -8192.20126966 -1426.02190036]
------
Step:4, Action:South
State  288
Old Q Values:  [-3239.55596121   620.02017282 -8192.20126966 -1426.02190036]
New Q values:  [-3239.55596121   253.41412097 -8192.20126966 -1426.02190036]
Reward: -301  Episode Reward:  -584
xxxxx
x . x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121   253.41412097 -8192.20126966 -1426.02190036]
------
Step:5, Action:South
State  288
Old Q Values:  [-3239.55596121   253.41412097 -8192.20126966 -1426.02190036]
New Q values:  [-3.23955596e+03 -3.21011532e+00 -8.19220127e+03 -1.42602190e+03]
Reward: -301  Episode Reward:  -885
xxxxx
x . x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3.23955596e+03 -3.21011532e+00 -8.19220127e+03 -1.42602190e+03]
------
Step:6, Action:South
State  288
Old Q Values:  [-3.23955596e+03 -3.21011532e+00 -8.19220127e+03 -1.42602190e+03]
New Q values:  [-3239.55596121  -182.84708072 -8192.20126966 -1426.02190036]
Reward: -301  Episode Reward:  -1186
xxxxx
x . x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121  -182.84708072 -8192.20126966 -1426.02190036]
------
Step:7, Action:South
State  288
Old Q Values:  [-3239.55596121  -182.84708072 -8192.20126966 -1426.02190036]
New Q values:  [-3239.55596121  -308.59295651 -8192.20126966 -1426.02190036]
Reward: -301  Episode Reward:  -1487
xxxxx
x . x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121  -308.59295651 -8192.20126966 -1426.02190036]
------
Step:8, Action:South
State  288
Old Q Values:  [-3239.55596121  -308.59295651 -8192.20126966 -1426.02190036]
New Q values:  [-3239.55596121 -6396.61506955 -8192.20126966 -1426.02190036]
Reward: -10301  Episode Reward:  -11788
xxxxx
x . x
x.. x
x..gx
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2840.23360603  3998.65267373 10305.45471326 -4966.32149798]
------
Step:1, Action:East
State  180
Old Q Values:  [-2840.23360603  3998.65267373 10305.45471326 -4966.32149798]
New Q values:  [-2840.23360603  3998.65267373  7724.28489003 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  4027.93373228 11989.01001575  3512.17261526]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  2.88670360e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  2.01630263e+04  1.03161518e+03]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28702.70637075  1337.91832751   790.72804752  1050.85266124]
------
Step:3, Action:North
State  218
Old Q Values:  [1469.55947125 2184.8079221     0.         1847.21017375]
New Q values:  [1790.86908461 2184.8079221     0.         1847.21017375]
Reward: 9  Episode Reward:  27
xxxxx
x .ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  3.99215099e+03 -3.22965309e-01  1.28553464e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  3.99215099e+03 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  2.25170277e+03 -3.22965309e-01  1.28553464e+03]
Reward: -1  Episode Reward:  26
xxxxx
x . x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1790.86908461 2184.8079221     0.         1847.21017375]
------
Step:5, Action:South
State  210
Old Q Values:  [28702.70637075  1337.91832751   790.72804752  1050.85266124]
New Q values:  [28702.70637075   112.7607609    790.72804752  1050.85266124]
Reward: 9  Episode Reward:  35
xxxxx
x . x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3239.55596121 -6396.61506955 -8192.20126966 -1426.02190036]
------
Step:6, Action:North
State  288
Old Q Values:  [-3239.55596121 -6396.61506955 -8192.20126966 -1426.02190036]
New Q values:  [ -640.98000785 -6396.61506955 -8192.20126966 -1426.02190036]
Reward: -1  Episode Reward:  34
xxxxx
x . x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1790.86908461 2184.8079221     0.         1847.21017375]
------
Step:7, Action:West
State  216
Old Q Values:  [ 2195.11823766  2605.58586798 -8220.10378799   637.30368728]
New Q values:  [ 2195.11823766  2605.58586798 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[   2.33354578 2191.91549263 -501.63979658  529.38906334]
------
Step:8, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.03899661e+04 1.96496563e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 6.71684510e+03 1.96496563e+04 7.32028793e+03]
Reward: 9  Episode Reward:  42
xxxxx
x . x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  4508.78720436  8518.19558484]
------
Step:9, Action:West
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  4508.78720436  8518.19558484]
New Q values:  [ 3515.22741472 -8521.23367799  4508.78720436  8903.83689453]
Reward: 9  Episode Reward:  51
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[18303.86220197 12764.58618105  4727.09508304  1875.31501677]
------
Step:10, Action:North
State  261
Old Q Values:  [1053.20124213   26.73544252  882.05102389  123.6214372 ]
New Q values:  [804.51684994  26.73544252 882.05102389 123.6214372 ]
Reward: -1  Episode Reward:  50
xxxxx
x . x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1077.24729837  1279.45451029 -1869.69345581   262.76946019]
------
Step:11, Action:South
State  180
Old Q Values:  [-2840.23360603  3998.65267373  7724.28489003 -4966.32149798]
New Q values:  [-2840.23360603  4015.39186925  7724.28489003 -4966.32149798]
Reward: -1  Episode Reward:  49
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3475.10926621 -5704.51612281  8055.10266586 -5679.36893145]
------
Step:12, Action:East
State  260
Old Q Values:  [ 3475.10926621 -5704.51612281  8055.10266586 -5679.36893145]
New Q values:  [ 3475.10926621 -5704.51612281  5892.5921347  -5679.36893145]
Reward: -1  Episode Reward:  48
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  4508.78720436  8903.83689453]
------
Step:13, Action:West
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  4508.78720436  8903.83689453]
New Q values:  [ 3515.22741472 -8521.23367799  4508.78720436  5328.71239822]
Reward: -1  Episode Reward:  47
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3475.10926621 -5704.51612281  5892.5921347  -5679.36893145]
------
Step:14, Action:East
State  260
Old Q Values:  [ 3475.10926621 -5704.51612281  5892.5921347  -5679.36893145]
New Q values:  [ 3475.10926621 -5704.51612281  3955.05057335 -5679.36893145]
Reward: -1  Episode Reward:  46
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3515.22741472 -8521.23367799  4508.78720436  5328.71239822]
------
Step:15, Action:West
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  4508.78720436  5328.71239822]
New Q values:  [ 3515.22741472 -8521.23367799  4508.78720436  7622.04361988]
Reward: -1  Episode Reward:  45
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[18303.86220197 12764.58618105  4727.09508304  1875.31501677]
------
Step:16, Action:North
State  260
Old Q Values:  [ 3475.10926621 -5704.51612281  3955.05057335 -5679.36893145]
New Q values:  [ 3706.72917349 -5704.51612281  3955.05057335 -5679.36893145]
Reward: -1  Episode Reward:  44
xxxxx
xg. x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2840.23360603  4015.39186925  7724.28489003 -4966.32149798]
------
Step:17, Action:East
State  180
Old Q Values:  [-2840.23360603  4015.39186925  7724.28489003 -4966.32149798]
New Q values:  [-2840.23360603  4015.39186925  8984.01083892 -4966.32149798]
Reward: -1  Episode Reward:  43
xxxxx
x . x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.71684510e+03 1.96496563e+04 7.32028793e+03]
------
Step:18, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  2.01630263e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  1.66754224e+04  1.03161518e+03]
Reward: -1  Episode Reward:  42
xxxxx
x . x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28702.70637075   112.7607609    790.72804752  1050.85266124]
------
Step:19, Action:North
State  208
Old Q Values:  [53907.97797279  2281.50777335 -4584.50430574 -1713.91177491]
New Q values:  [49340.33226972  2281.50777335 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  41
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  5661.05765619  -180.00807518 92592.47026867]
------
Step:20, Action:West
State  128
Old Q Values:  [27558.53877122  8286.98116673 -8652.84       67385.76854049]
New Q values:  [ 27558.53877122   8286.98116673  -8652.84       120244.63633507]
Reward: 100009  Episode Reward:  100050
xxxxx
xga x
x   x
x   x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-7432.41476814  1855.43823249   660.86649319 -1273.93981167]
------
Step:1, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  2.25170277e+03 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  1.68775687e+03 -3.22965309e-01  1.28553464e+03]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  2605.58586798 -8220.10378799   911.8961227 ]
------
Step:2, Action:South
State  208
Old Q Values:  [49340.33226972  2281.50777335 -4584.50430574 -1713.91177491]
New Q values:  [49340.33226972   725.70910698 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ -640.98000785 -6396.61506955 -8192.20126966 -1426.02190036]
------
Step:3, Action:North
State  288
Old Q Values:  [ -640.98000785 -6396.61506955 -8192.20126966 -1426.02190036]
New Q values:  [14545.10767777 -6396.61506955 -8192.20126966 -1426.02190036]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[49340.33226972   725.70910698 -4584.50430574 -1713.91177491]
------
Step:4, Action:North
State  210
Old Q Values:  [28702.70637075   112.7607609    790.72804752  1050.85266124]
New Q values:  [11986.80960899   112.7607609    790.72804752  1050.85266124]
Reward: -1  Episode Reward:  16
xxxxx
x .ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  1.68775687e+03 -3.22965309e-01  1.28553464e+03]
------
Step:5, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  1.68775687e+03 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  1.54766024e+04 -3.22965309e-01  1.28553464e+03]
Reward: -1  Episode Reward:  15
xxxxx
x . x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[49340.33226972   725.70910698 -4584.50430574 -1713.91177491]
------
Step:6, Action:North
State  208
Old Q Values:  [49340.33226972   725.70910698 -4584.50430574 -1713.91177491]
New Q values:  [24378.51363644   725.70910698 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  14
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  1.54766024e+04 -3.22965309e-01  1.28553464e+03]
------
Step:7, Action:South
State  136
Old Q Values:  [-7432.41476814  1855.43823249   660.86649319 -1273.93981167]
New Q values:  [-7432.41476814  8055.12938393   660.86649319 -1273.93981167]
Reward: -1  Episode Reward:  13
xxxxx
x g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24378.51363644   725.70910698 -4584.50430574 -1713.91177491]
------
Step:8, Action:North
State  208
Old Q Values:  [24378.51363644   725.70910698 -4584.50430574 -1713.91177491]
New Q values:  [14393.78618313   725.70910698 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  12
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  1.54766024e+04 -3.22965309e-01  1.28553464e+03]
------
Step:9, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  1.54766024e+04 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  4.50817683e+03 -3.22965309e-01  1.28553464e+03]
Reward: -10001  Episode Reward:  -9989
xxxxx
x . x
x..gx
x.. x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14545.10767777 -6396.61506955 -8192.20126966 -1426.02190036]
------
Step:1, Action:North
State  288
Old Q Values:  [14545.10767777 -6396.61506955 -8192.20126966 -1426.02190036]
New Q values:  [10141.57892605 -6396.61506955 -8192.20126966 -1426.02190036]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14393.78618313   725.70910698 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  208
Old Q Values:  [14393.78618313   725.70910698 -4584.50430574 -1713.91177491]
New Q values:  [ 8179.45328843   725.70910698 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x.gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-7432.41476814  8055.12938393   660.86649319 -1273.93981167]
------
Step:3, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  4.50817683e+03 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  4.25650672e+03 -3.22965309e-01  1.28553464e+03]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8179.45328843   725.70910698 -4584.50430574 -1713.91177491]
------
Step:4, Action:North
State  208
Old Q Values:  [ 8179.45328843   725.70910698 -4584.50430574 -1713.91177491]
New Q values:  [ 4548.13333049   725.70910698 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  4.25650672e+03 -3.22965309e-01  1.28553464e+03]
------
Step:5, Action:South
State  136
Old Q Values:  [-7432.41476814  8055.12938393   660.86649319 -1273.93981167]
New Q values:  [-7432.41476814  4585.89175272   660.86649319 -1273.93981167]
Reward: -1  Episode Reward:  15
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4548.13333049   725.70910698 -4584.50430574 -1713.91177491]
------
Step:6, Action:North
State  208
Old Q Values:  [ 4548.13333049   725.70910698 -4584.50430574 -1713.91177491]
New Q values:  [ 3095.60534732   725.70910698 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  4.25650672e+03 -3.22965309e-01  1.28553464e+03]
------
Step:7, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  4.25650672e+03 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  2.63068429e+03 -3.22965309e-01  1.28553464e+03]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3095.60534732   725.70910698 -4584.50430574 -1713.91177491]
------
Step:8, Action:North
State  210
Old Q Values:  [11986.80960899   112.7607609    790.72804752  1050.85266124]
New Q values:  [5583.3291309   112.7607609   790.72804752 1050.85266124]
Reward: -1  Episode Reward:  12
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  2.63068429e+03 -3.22965309e-01  1.28553464e+03]
------
Step:9, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  2.63068429e+03 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  2.72667246e+03 -3.22965309e-01  1.28553464e+03]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5583.3291309   112.7607609   790.72804752 1050.85266124]
------
Step:10, Action:North
State  208
Old Q Values:  [ 3095.60534732   725.70910698 -4584.50430574 -1713.91177491]
New Q values:  [ 2055.64387563   725.70910698 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  10
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  2.72667246e+03 -3.22965309e-01  1.28553464e+03]
------
Step:11, Action:West
State  138
Old Q Values:  [ 9.10656300e+02  2.72667246e+03 -3.22965309e-01  1.28553464e+03]
New Q values:  [ 9.10656300e+02  2.72667246e+03 -3.22965309e-01  1.07963171e+03]
Reward: -1  Episode Reward:  9
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1886.72617765   795.30880442]
------
Step:12, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  6281.61167075   861.04204957]
New Q values:  [ -253.44886264 -1902.20915811  3330.046405     861.04204957]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  2.72667246e+03 -3.22965309e-01  1.07963171e+03]
------
Step:13, Action:West
State  138
Old Q Values:  [ 9.10656300e+02  2.72667246e+03 -3.22965309e-01  1.07963171e+03]
New Q values:  [ 9.10656300e+02  2.72667246e+03 -3.22965309e-01  9.97270536e+02]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1886.72617765   795.30880442]
------
Step:14, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1406.69464736 -4884.64294338]
New Q values:  [-9594.56523706 -8069.05606225  1937.84538476 -4884.64294338]
Reward: -1  Episode Reward:  6
xxxxx
x.gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-7432.41476814  4585.89175272   660.86649319 -1273.93981167]
------
Step:15, Action:South
State  136
Old Q Values:  [-7432.41476814  4585.89175272   660.86649319 -1273.93981167]
New Q values:  [-7432.41476814  2450.44986378   660.86649319 -1273.93981167]
Reward: -1  Episode Reward:  5
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2055.64387563   725.70910698 -4584.50430574 -1713.91177491]
------
Step:16, Action:South
State  208
Old Q Values:  [ 2055.64387563   725.70910698 -4584.50430574 -1713.91177491]
New Q values:  [ 2055.64387563  3332.15732061 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  4
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10141.57892605 -6396.61506955 -8192.20126966 -1426.02190036]
------
Step:17, Action:North
State  288
Old Q Values:  [10141.57892605 -6396.61506955 -8192.20126966 -1426.02190036]
New Q values:  [ 5055.6787666  -6396.61506955 -8192.20126966 -1426.02190036]
Reward: -1  Episode Reward:  3
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2055.64387563  3332.15732061 -4584.50430574 -1713.91177491]
------
Step:18, Action:South
State  208
Old Q Values:  [ 2055.64387563  3332.15732061 -4584.50430574 -1713.91177491]
New Q values:  [ 2055.64387563  2848.96655822 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  2
xxxxx
x.  x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5055.6787666  -6396.61506955 -8192.20126966 -1426.02190036]
------
Step:19, Action:West
State  288
Old Q Values:  [ 5055.6787666  -6396.61506955 -8192.20126966 -1426.02190036]
New Q values:  [ 5055.6787666  -6396.61506955 -8192.20126966  2113.178061  ]
Reward: 9  Episode Reward:  11
xxxxx
x.  x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[8927.28940382 -168.92307549 4721.37642138 7283.5411195 ]
------
Step:20, Action:North
State  273
Old Q Values:  [8927.28940382 -168.92307549 4721.37642138 7283.5411195 ]
New Q values:  [7550.40891976 -168.92307549 4721.37642138 7283.5411195 ]
Reward: 9  Episode Reward:  20
xxxxx
x.  x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 13246.97719412  9681.98001768  1460.9765133 ]
------
Step:21, Action:South
State  195
Old Q Values:  [   38.85388605  6551.87675244 18032.97542278  1169.39963074]
New Q values:  [   38.85388605  4885.27337691 18032.97542278  1169.39963074]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[7550.40891976 -168.92307549 4721.37642138 7283.5411195 ]
------
Step:22, Action:North
State  272
Old Q Values:  [ 3515.22741472 -8521.23367799  4508.78720436  7622.04361988]
New Q values:  [ 6408.1176963  -8521.23367799  4508.78720436  7622.04361988]
Reward: -1  Episode Reward:  18
xxxxx
x.  x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.66754224e+04  1.03161518e+03]
------
Step:23, Action:East
State  195
Old Q Values:  [   38.85388605  4885.27337691 18032.97542278  1169.39963074]
New Q values:  [  38.85388605 4885.27337691 8887.58890838 1169.39963074]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5583.3291309   112.7607609   790.72804752 1050.85266124]
------
Step:24, Action:North
State  208
Old Q Values:  [ 2055.64387563  2848.96655822 -4584.50430574 -1713.91177491]
New Q values:  [ 1639.65928696  2848.96655822 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  2.72667246e+03 -3.22965309e-01  9.97270536e+02]
------
Step:25, Action:West
State  138
Old Q Values:  [ 9.10656300e+02  2.72667246e+03 -3.22965309e-01  9.97270536e+02]
New Q values:  [ 9.10656300e+02  2.72667246e+03 -3.22965309e-01  1.39732214e+03]
Reward: -1  Episode Reward:  15
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  3330.046405     861.04204957]
------
Step:26, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1886.72617765   795.30880442]
New Q values:  [ -281.736      -1150.91067548  1572.09220776   795.30880442]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  2.72667246e+03 -3.22965309e-01  1.39732214e+03]
------
Step:27, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  2.72667246e+03 -3.22965309e-01  1.39732214e+03]
New Q values:  [ 9.10656300e+02  1.94475895e+03 -3.22965309e-01  1.39732214e+03]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1639.65928696  2848.96655822 -4584.50430574 -1713.91177491]
------
Step:28, Action:South
State  210
Old Q Values:  [5583.3291309   112.7607609   790.72804752 1050.85266124]
New Q values:  [5583.3291309  1561.20793434  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5055.6787666  -6396.61506955 -8192.20126966  2113.178061  ]
------
Step:29, Action:North
State  288
Old Q Values:  [ 5055.6787666  -6396.61506955 -8192.20126966  2113.178061  ]
New Q values:  [ 3696.67024591 -6396.61506955 -8192.20126966  2113.178061  ]
Reward: -1  Episode Reward:  11
xxxxx
x.  x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[5583.3291309  1561.20793434  790.72804752 1050.85266124]
------
Step:30, Action:North
State  210
Old Q Values:  [5583.3291309  1561.20793434  790.72804752 1050.85266124]
New Q values:  [2816.15933728 1561.20793434  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  10
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  1.94475895e+03 -3.22965309e-01  1.39732214e+03]
------
Step:31, Action:South
State  130
Old Q Values:  [33256.45600344  5661.05765619  -180.00807518 92592.47026867]
New Q values:  [33256.45600344  3108.67086366  -180.00807518 92592.47026867]
Reward: -1  Episode Reward:  9
xxxxx
x.  x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2816.15933728 1561.20793434  790.72804752 1050.85266124]
------
Step:32, Action:North
State  208
Old Q Values:  [ 1639.65928696  2848.96655822 -4584.50430574 -1713.91177491]
New Q values:  [28433.00479538  2848.96655822 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  3108.67086366  -180.00807518 92592.47026867]
------
Step:33, Action:West
State  130
Old Q Values:  [33256.45600344  3108.67086366  -180.00807518 92592.47026867]
New Q values:  [33256.45600344  3108.67086366  -180.00807518 74759.97662934]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   30798.36257985 125745.29507292]
------
Step:34, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1572.09220776   795.30880442]
New Q values:  [ -281.736      -1150.91067548  1572.09220776   850.51060596]
Reward: 9  Episode Reward:  16
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494  1756.62361399  -180.6       ]
------
Step:35, Action:East
State  107
Old Q Values:  [-252.35169558  311.89931548 3589.08818473 -252.78192178]
New Q values:  [-252.35169558  311.89931548 1906.66293622 -252.78192178]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1572.09220776   850.51060596]
------
Step:36, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1572.09220776   850.51060596]
New Q values:  [ -281.736      -1150.91067548  1211.66456803   850.51060596]
Reward: -1  Episode Reward:  14
xxxxx
x  ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  1.94475895e+03 -3.22965309e-01  1.39732214e+03]
------
Step:37, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  1.94475895e+03 -3.22965309e-01  1.39732214e+03]
New Q values:  [ 9.10656300e+02  1.62215138e+03 -3.22965309e-01  1.39732214e+03]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2816.15933728 1561.20793434  790.72804752 1050.85266124]
------
Step:38, Action:North
State  208
Old Q Values:  [28433.00479538  2848.96655822 -4584.50430574 -1713.91177491]
New Q values:  [11859.24733248  2848.96655822 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  1.62215138e+03 -3.22965309e-01  1.39732214e+03]
------
Step:39, Action:West
State  136
Old Q Values:  [-7432.41476814  2450.44986378   660.86649319 -1273.93981167]
New Q values:  [-7432.41476814  2450.44986378   660.86649319  -400.12913802]
Reward: -1  Episode Reward:  11
xxxxx
x agx
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   366.82262216]
------
Step:40, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  3330.046405     861.04204957]
New Q values:  [ -253.44886264 -1902.20915811  3330.046405     915.81570069]
Reward: -1  Episode Reward:  10
xxxxx
xa  x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  311.89931548 1906.66293622 -252.78192178]
------
Step:41, Action:East
State  105
Old Q Values:  [ -180.6          446.06592384 -5738.48414911     0.        ]
New Q values:  [ -180.6          446.06592384 -2185.94687299     0.        ]
Reward: -1  Episode Reward:  9
xxxxx
x agx
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   366.82262216]
------
Step:42, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  3330.046405     915.81570069]
New Q values:  [ -253.44886264 -1902.20915811  3330.046405     937.72516114]
Reward: -1  Episode Reward:  8
xxxxx
xa  x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  311.89931548 1906.66293622 -252.78192178]
------
Step:43, Action:East
State  105
Old Q Values:  [ -180.6          446.06592384 -2185.94687299     0.        ]
New Q values:  [-180.6         446.06592384 -764.93196255    0.        ]
Reward: -1  Episode Reward:  7
xxxxx
x agx
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   366.82262216]
------
Step:44, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1937.84538476 -4884.64294338]
New Q values:  [-9594.56523706 -8069.05606225  1937.84538476 -1820.6374002 ]
Reward: -1  Episode Reward:  6
xxxxx
xag x
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         446.06592384 -764.93196255    0.        ]
------
Step:45, Action:South
State  104
Old Q Values:  [-8652.84        2246.80188749  2353.85801711 -8652.84      ]
New Q values:  [-8652.84        3598.88070822  2353.85801711 -8652.84      ]
Reward: 9  Episode Reward:  15
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[ 127.54442502    0.         8982.5331774     0.        ]
------
Step:46, Action:East
State  185
Old Q Values:  [ 145.72609444    0.          442.67034013 -178.98      ]
New Q values:  [ 145.72609444    0.         4149.1493692  -178.98      ]
Reward: -1  Episode Reward:  14
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  13242.27077715  1022.75074485   568.38654082]
------
Step:47, Action:South
State  200
Old Q Values:  [  169.9257398  13242.27077715  1022.75074485   568.38654082]
New Q values:  [ 169.9257398  7582.92139682 1022.75074485  568.38654082]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6408.1176963  -8521.23367799  4508.78720436  7622.04361988]
------
Step:48, Action:West
State  272
Old Q Values:  [ 6408.1176963  -8521.23367799  4508.78720436  7622.04361988]
New Q values:  [ 6408.1176963  -8521.23367799  4508.78720436 77159.29584117]
Reward: 100009  Episode Reward:  100022
xxxxx
x   x
xg  x
xa  x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3706.72917349 -5704.51612281  3955.05057335 -5679.36893145]
------
Step:1, Action:East
State  260
Old Q Values:  [ 3706.72917349 -5704.51612281  3955.05057335 -5679.36893145]
New Q values:  [ 3706.72917349 -5704.51612281 24735.20898169 -5679.36893145]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6408.1176963  -8521.23367799  4508.78720436 77159.29584117]
------
Step:2, Action:West
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197 24702.30720332  3250.56114665]
New Q values:  [ 7058.83631802 -5807.06396197 24702.30720332  1564.23976583]
Reward: -1  Episode Reward:  8
xxxxx
x g.x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[804.51684994  26.73544252 882.05102389 123.6214372 ]
------
Step:3, Action:East
State  261
Old Q Values:  [804.51684994  26.73544252 882.05102389 123.6214372 ]
New Q values:  [ 804.51684994   26.73544252 7762.91257055  123.6214372 ]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7058.83631802 -5807.06396197 24702.30720332  1564.23976583]
------
Step:4, Action:East
State  272
Old Q Values:  [ 6408.1176963  -8521.23367799  4508.78720436 77159.29584117]
New Q values:  [ 6408.1176963  -8521.23367799  2917.91595552 77159.29584117]
Reward: 9  Episode Reward:  16
xxxxx
x g.x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3696.67024591 -6396.61506955 -8192.20126966  2113.178061  ]
------
Step:5, Action:North
State  288
Old Q Values:  [ 3696.67024591 -6396.61506955 -8192.20126966  2113.178061  ]
New Q values:  [ 5041.84229811 -6396.61506955 -8192.20126966  2113.178061  ]
Reward: 9  Episode Reward:  25
xxxxx
x ..x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11859.24733248  2848.96655822 -4584.50430574 -1713.91177491]
------
Step:6, Action:North
State  208
Old Q Values:  [11859.24733248  2848.96655822 -4584.50430574 -1713.91177491]
New Q values:  [ 5484.23389212  2848.96655822 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  34
xxxxx
x gax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-7432.41476814  2450.44986378   660.86649319  -400.12913802]
------
Step:7, Action:South
State  130
Old Q Values:  [33256.45600344  3108.67086366  -180.00807518 74759.97662934]
New Q values:  [33256.45600344  2888.1385131   -180.00807518 74759.97662934]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5484.23389212  2848.96655822 -4584.50430574 -1713.91177491]
------
Step:8, Action:North
State  208
Old Q Values:  [ 5484.23389212  2848.96655822 -4584.50430574 -1713.91177491]
New Q values:  [24621.08654565  2848.96655822 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  32
xxxxx
x .ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  2888.1385131   -180.00807518 74759.97662934]
------
Step:9, Action:West
State  136
Old Q Values:  [-7432.41476814  2450.44986378   660.86649319  -400.12913802]
New Q values:  [-7432.41476814  2450.44986378   660.86649319   426.70196022]
Reward: 9  Episode Reward:  41
xxxxx
xga x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1937.84538476 -1820.6374002 ]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1211.66456803   850.51060596]
New Q values:  [ -281.736      -1150.91067548   970.71124153   850.51060596]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  1.62215138e+03 -3.22965309e-01  1.39732214e+03]
------
Step:11, Action:South
State  136
Old Q Values:  [-7432.41476814  2450.44986378   660.86649319   426.70196022]
New Q values:  [-7432.41476814  8365.90590921   660.86649319   426.70196022]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24621.08654565  2848.96655822 -4584.50430574 -1713.91177491]
------
Step:12, Action:North
State  208
Old Q Values:  [24621.08654565  2848.96655822 -4584.50430574 -1713.91177491]
New Q values:  [10334.48003259  2848.96655822 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  1.62215138e+03 -3.22965309e-01  1.39732214e+03]
------
Step:13, Action:South
State  136
Old Q Values:  [-7432.41476814  8365.90590921   660.86649319   426.70196022]
New Q values:  [-7432.41476814  6446.10637346   660.86649319   426.70196022]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10334.48003259  2848.96655822 -4584.50430574 -1713.91177491]
------
Step:14, Action:North
State  208
Old Q Values:  [10334.48003259  2848.96655822 -4584.50430574 -1713.91177491]
New Q values:  [ 6067.02392507  2848.96655822 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  36
xxxxx
x gax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-7432.41476814  6446.10637346   660.86649319   426.70196022]
------
Step:15, Action:South
State  136
Old Q Values:  [-7432.41476814  6446.10637346   660.86649319   426.70196022]
New Q values:  [-7432.41476814  4397.9497269    660.86649319   426.70196022]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6067.02392507  2848.96655822 -4584.50430574 -1713.91177491]
------
Step:16, Action:South
State  208
Old Q Values:  [ 6067.02392507  2848.96655822 -4584.50430574 -1713.91177491]
New Q values:  [ 6067.02392507  2651.53931272 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5041.84229811 -6396.61506955 -8192.20126966  2113.178061  ]
------
Step:17, Action:West
State  288
Old Q Values:  [ 5041.84229811 -6396.61506955 -8192.20126966  2113.178061  ]
New Q values:  [ 5041.84229811 -6396.61506955 -8192.20126966  3109.79390033]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x.. x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[7550.40891976 -168.92307549 4721.37642138 7283.5411195 ]
------
Step:18, Action:North
State  272
Old Q Values:  [ 6408.1176963  -8521.23367799  2917.91595552 77159.29584117]
New Q values:  [ 7571.27380893 -8521.23367799  2917.91595552 77159.29584117]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x.a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.66754224e+04  1.03161518e+03]
------
Step:19, Action:East
State  195
Old Q Values:  [  38.85388605 4885.27337691 8887.58890838 1169.39963074]
New Q values:  [  38.85388605 4885.27337691 4399.28336454 1169.39963074]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x. ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[2816.15933728 1561.20793434  790.72804752 1050.85266124]
------
Step:20, Action:North
State  210
Old Q Values:  [2816.15933728 1561.20793434  790.72804752 1050.85266124]
New Q values:  [1612.50914924 1561.20793434  790.72804752 1050.85266124]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  1.62215138e+03 -3.22965309e-01  1.39732214e+03]
------
Step:21, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  1.62215138e+03 -3.22965309e-01  1.39732214e+03]
New Q values:  [ 9.10656300e+02  2.46836773e+03 -3.22965309e-01  1.39732214e+03]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6067.02392507  2651.53931272 -4584.50430574 -1713.91177491]
------
Step:22, Action:North
State  208
Old Q Values:  [ 6067.02392507  2651.53931272 -4584.50430574 -1713.91177491]
New Q values:  [ 3166.71988901  2651.53931272 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x. gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  2.46836773e+03 -3.22965309e-01  1.39732214e+03]
------
Step:23, Action:West
State  136
Old Q Values:  [-7432.41476814  4397.9497269    660.86649319   426.70196022]
New Q values:  [-7432.41476814  4397.9497269    660.86649319   280.12757074]
Reward: -1  Episode Reward:  37
xxxxx
x agx
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   366.82262216]
------
Step:24, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1937.84538476 -1820.6374002 ]
New Q values:  [-9594.56523706 -8069.05606225  1937.84538476  -595.03518293]
Reward: -1  Episode Reward:  36
xxxxx
xag x
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         446.06592384 -764.93196255    0.        ]
------
Step:25, Action:South
State  97
Old Q Values:  [    0.         32496.19690331     0.             0.        ]
New Q values:  [    0.         83057.04593133     0.             0.        ]
Reward: 100009  Episode Reward:  100045
xxxxx
x  gx
xa  x
x   x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 804.51684994   26.73544252 7762.91257055  123.6214372 ]
------
Step:1, Action:North
State  261
Old Q Values:  [ 804.51684994   26.73544252 7762.91257055  123.6214372 ]
New Q values:  [2734.21027774   26.73544252 7762.91257055  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2174.36956339 5245.81661103 8023.34512589 1554.80203889]
------
Step:2, Action:East
State  183
Old Q Values:  [2174.36956339 5245.81661103 8023.34512589 1554.80203889]
New Q values:  [2174.36956339 5245.81661103 8217.36478077 1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.66754224e+04  1.03161518e+03]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.71684510e+03 1.96496563e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 6.71684510e+03 8.81527848e+03 7.32028793e+03]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3166.71988901  2651.53931272 -4584.50430574 -1713.91177491]
------
Step:4, Action:North
State  210
Old Q Values:  [1612.50914924 1561.20793434  790.72804752 1050.85266124]
New Q values:  [23078.3966485   1561.20793434   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  2888.1385131   -180.00807518 74759.97662934]
------
Step:5, Action:West
State  130
Old Q Values:  [33256.45600344  2888.1385131   -180.00807518 74759.97662934]
New Q values:  [33256.45600344  2888.1385131   -180.00807518 67632.97917361]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   30798.36257985 125745.29507292]
------
Step:6, Action:West
State  126
Old Q Values:  [   0.          331.64678262 7128.26683804 1511.43880655]
New Q values:  [   0.          331.64678262 7128.26683804 1337.22554042]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2424.16672598  751.03868094 -120.29354603]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869 2424.16672598  751.03868094 -120.29354603]
New Q values:  [-177.44732869 3632.93673138  751.03868094 -120.29354603]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1826.83669998 8879.56680328  154.04646645]
------
Step:8, Action:South
State  189
Old Q Values:  [ 275.08817949 1826.83669998 8879.56680328  154.04646645]
New Q values:  [ 275.08817949 3059.00845116 8879.56680328  154.04646645]
Reward: -1  Episode Reward:  52
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2734.21027774   26.73544252 7762.91257055  123.6214372 ]
------
Step:9, Action:East
State  261
Old Q Values:  [2734.21027774   26.73544252 7762.91257055  123.6214372 ]
New Q values:  [2734.21027774   26.73544252 3586.58107829  123.6214372 ]
Reward: -1  Episode Reward:  51
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.         1606.7201669  1468.83994914]
------
Step:10, Action:East
State  273
Old Q Values:  [7550.40891976 -168.92307549 4721.37642138 7283.5411195 ]
New Q values:  [ 7550.40891976  -168.92307549 63406.50325798  7283.5411195 ]
Reward: 100009  Episode Reward:  100060
xxxxx
x   x
x  gx
x  ax
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.71684510e+03 8.81527848e+03 7.32028793e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [ 169.9257398  7582.92139682 1022.75074485  568.38654082]
New Q values:  [ 169.9257398  7582.92139682 1196.17605834  568.38654082]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  2605.58586798 -8220.10378799   911.8961227 ]
------
Step:2, Action:South
State  208
Old Q Values:  [ 3166.71988901  2651.53931272 -4584.50430574 -1713.91177491]
New Q values:  [ 3166.71988901  2578.56841452 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5041.84229811 -6396.61506955 -8192.20126966  3109.79390033]
------
Step:3, Action:North
State  288
Old Q Values:  [ 5041.84229811 -6396.61506955 -8192.20126966  3109.79390033]
New Q values:  [ 8939.65591379 -6396.61506955 -8192.20126966  3109.79390033]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[23078.3966485   1561.20793434   790.72804752  1050.85266124]
------
Step:4, Action:North
State  210
Old Q Values:  [23078.3966485   1561.20793434   790.72804752  1050.85266124]
New Q values:  [9977.26897839 1561.20793434  790.72804752 1050.85266124]
Reward: 9  Episode Reward:  26
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  2.46836773e+03 -3.22965309e-01  1.39732214e+03]
------
Step:5, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  2.46836773e+03 -3.22965309e-01  1.39732214e+03]
New Q values:  [ 9.10656300e+02  3.97992779e+03 -3.22965309e-01  1.39732214e+03]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9977.26897839 1561.20793434  790.72804752 1050.85266124]
------
Step:6, Action:North
State  208
Old Q Values:  [ 3166.71988901  2578.56841452 -4584.50430574 -1713.91177491]
New Q values:  [ 2460.06629125  2578.56841452 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  3.97992779e+03 -3.22965309e-01  1.39732214e+03]
------
Step:7, Action:West
State  136
Old Q Values:  [-7432.41476814  4397.9497269    660.86649319   280.12757074]
New Q values:  [-7432.41476814  4397.9497269    660.86649319   221.49781494]
Reward: -1  Episode Reward:  23
xxxxx
x.agx
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   366.82262216]
------
Step:8, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1937.84538476  -595.03518293]
New Q values:  [-9594.56523706 -8069.05606225  1937.84538476   -98.79429602]
Reward: 9  Episode Reward:  32
xxxxx
xag x
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         446.06592384 -764.93196255    0.        ]
------
Step:9, Action:South
State  107
Old Q Values:  [-252.35169558  311.89931548 1906.66293622 -252.78192178]
New Q values:  [-252.35169558 1374.90453695 1906.66293622 -252.78192178]
Reward: 9  Episode Reward:  41
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 145.72609444    0.         4149.1493692  -178.98      ]
------
Step:10, Action:North
State  184
Old Q Values:  [ 127.54442502    0.         8982.5331774     0.        ]
New Q values:  [ 577.4048542    0.        8982.5331774    0.       ]
Reward: -1  Episode Reward:  40
xxxxx
xa  x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494  1756.62361399  -180.6       ]
------
Step:11, Action:East
State  110
Old Q Values:  [ -239.29051573 -3902.35021231   265.50261253  -180.6       ]
New Q values:  [ -239.29051573 -3902.35021231   396.81441747  -180.6       ]
Reward: -1  Episode Reward:  39
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   970.71124153   850.51060596]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   970.71124153   850.51060596]
New Q values:  [ -281.736      -1150.91067548  1581.66283226   850.51060596]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  3.97992779e+03 -3.22965309e-01  1.39732214e+03]
------
Step:13, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  3.97992779e+03 -3.22965309e-01  1.39732214e+03]
New Q values:  [ 9.10656300e+02  2.37304687e+03 -3.22965309e-01  1.39732214e+03]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  2605.58586798 -8220.10378799   911.8961227 ]
------
Step:14, Action:South
State  216
Old Q Values:  [ 2195.11823766  2605.58586798 -8220.10378799   911.8961227 ]
New Q values:  [ 2195.11823766  3723.53112133 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8939.65591379 -6396.61506955 -8192.20126966  3109.79390033]
------
Step:15, Action:West
State  288
Old Q Values:  [ 8939.65591379 -6396.61506955 -8192.20126966  3109.79390033]
New Q values:  [ 8939.65591379 -6396.61506955 -8192.20126966 20271.26853753]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 7550.40891976  -168.92307549 63406.50325798  7283.5411195 ]
------
Step:16, Action:North
State  272
Old Q Values:  [ 7571.27380893 -8521.23367799  2917.91595552 77159.29584117]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 77159.29584117]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  2672.40916596  1278.19575341]
------
Step:17, Action:East
State  200
Old Q Values:  [ 169.9257398  7582.92139682 1196.17605834  568.38654082]
New Q values:  [ 169.9257398  7582.92139682 1594.92975973  568.38654082]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  3723.53112133 -8220.10378799   911.8961227 ]
------
Step:18, Action:South
State  216
Old Q Values:  [ 2195.11823766  3723.53112133 -8220.10378799   911.8961227 ]
New Q values:  [ 2195.11823766  7570.19300979 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8939.65591379 -6396.61506955 -8192.20126966 20271.26853753]
------
Step:19, Action:West
State  288
Old Q Values:  [ 8939.65591379 -6396.61506955 -8192.20126966 20271.26853753]
New Q values:  [ 8939.65591379 -6396.61506955 -8192.20126966 31255.69616736]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 77159.29584117]
------
Step:20, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 77159.29584117]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 96360.27699706]
Reward: 100009  Episode Reward:  100050
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 7550.40891976  -168.92307549 63406.50325798  7283.5411195 ]
------
Step:1, Action:East
State  273
Old Q Values:  [ 7550.40891976  -168.92307549 63406.50325798  7283.5411195 ]
New Q values:  [ 7550.40891976  -168.92307549 28744.7101534   7283.5411195 ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x.. x
x. gx
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8939.65591379 -6396.61506955 -8192.20126966 31255.69616736]
------
Step:1, Action:North
State  288
Old Q Values:  [ 8939.65591379 -6396.61506955 -8192.20126966 31255.69616736]
New Q values:  [ 6574.44305903 -6396.61506955 -8192.20126966 31255.69616736]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9977.26897839 1561.20793434  790.72804752 1050.85266124]
------
Step:2, Action:North
State  210
Old Q Values:  [9977.26897839 1561.20793434  790.72804752 1050.85266124]
New Q values:  [24286.20134344  1561.20793434   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  2888.1385131   -180.00807518 67632.97917361]
------
Step:3, Action:West
State  138
Old Q Values:  [ 9.10656300e+02  2.37304687e+03 -3.22965309e-01  1.39732214e+03]
New Q values:  [ 9.10656300e+02  2.37304687e+03 -3.22965309e-01  1.03882770e+03]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1581.66283226   850.51060596]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  3330.046405     937.72516114]
New Q values:  [ -253.44886264 -1902.20915811  2043.33262438   937.72516114]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02  2.37304687e+03 -3.22965309e-01  1.03882770e+03]
------
Step:5, Action:South
State  138
Old Q Values:  [ 9.10656300e+02  2.37304687e+03 -3.22965309e-01  1.03882770e+03]
New Q values:  [ 9.10656300e+02 -4.27781073e+03 -3.22965309e-01  1.03882770e+03]
Reward: -10001  Episode Reward:  -9975
xxxxx
x.  x
x..gx
x.  x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2174.36956339 5245.81661103 8217.36478077 1554.80203889]
------
Step:1, Action:East
State  183
Old Q Values:  [2174.36956339 5245.81661103 8217.36478077 1554.80203889]
New Q values:  [2174.36956339 5245.81661103 8294.97264272 1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.66754224e+04  1.03161518e+03]
------
Step:2, Action:East
State  195
Old Q Values:  [  38.85388605 4885.27337691 4399.28336454 1169.39963074]
New Q values:  [  38.85388605 4885.27337691 9050.97374885 1169.39963074]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[24286.20134344  1561.20793434   790.72804752  1050.85266124]
------
Step:3, Action:North
State  210
Old Q Values:  [24286.20134344  1561.20793434   790.72804752  1050.85266124]
New Q values:  [30009.77428946  1561.20793434   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  2888.1385131   -180.00807518 67632.97917361]
------
Step:4, Action:West
State  138
Old Q Values:  [ 9.10656300e+02 -4.27781073e+03 -3.22965309e-01  1.03882770e+03]
New Q values:  [ 9.10656300e+02 -4.27781073e+03 -3.22965309e-01  1.03393087e+03]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2043.33262438   937.72516114]
------
Step:5, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   30798.36257985 125745.29507292]
New Q values:  [  -180.6          3557.6642036   32608.63878402 125745.29507292]
Reward: -1  Episode Reward:  35
xxxxx
x. ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  2888.1385131   -180.00807518 67632.97917361]
------
Step:6, Action:West
State  130
Old Q Values:  [33256.45600344  2888.1385131   -180.00807518 67632.97917361]
New Q values:  [33256.45600344  2888.1385131   -180.00807518 64776.18019132]
Reward: -1  Episode Reward:  34
xxxxx
x.a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 125745.29507292]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1581.66283226   850.51060596]
New Q values:  [ -281.736      -1150.91067548  1581.66283226   917.60312325]
Reward: 9  Episode Reward:  43
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1374.90453695 1906.66293622 -252.78192178]
------
Step:8, Action:East
State  107
Old Q Values:  [-252.35169558 1374.90453695 1906.66293622 -252.78192178]
New Q values:  [-252.35169558 1374.90453695 1236.56402417 -252.78192178]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1581.66283226   917.60312325]
------
Step:9, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1581.66283226   917.60312325]
New Q values:  [ -281.736      -1150.91067548   942.24439359   917.60312325]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02 -4.27781073e+03 -3.22965309e-01  1.03393087e+03]
------
Step:10, Action:West
State  138
Old Q Values:  [ 9.10656300e+02 -4.27781073e+03 -3.22965309e-01  1.03393087e+03]
New Q values:  [ 9.10656300e+02 -4.27781073e+03 -3.22965309e-01  6.95645666e+02]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   942.24439359   917.60312325]
------
Step:11, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   942.24439359   917.60312325]
New Q values:  [ -281.736      -1150.91067548   649.49464746   917.60312325]
Reward: -1  Episode Reward:  39
xxxxx
x  ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 9.10656300e+02 -4.27781073e+03 -3.22965309e-01  6.95645666e+02]
------
Step:12, Action:North
State  138
Old Q Values:  [ 9.10656300e+02 -4.27781073e+03 -3.22965309e-01  6.95645666e+02]
New Q values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  6.95645666e+02]
Reward: -301  Episode Reward:  -262
xxxxx
x  ax
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  6.95645666e+02]
------
Step:13, Action:West
State  138
Old Q Values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  6.95645666e+02]
New Q values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  5.52939203e+02]
Reward: -1  Episode Reward:  -263
xxxxx
x a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   649.49464746   917.60312325]
------
Step:14, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2043.33262438   937.72516114]
New Q values:  [ -253.44886264 -1902.20915811  2043.33262438   786.96142554]
Reward: -1  Episode Reward:  -264
xxxxx
xa  x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1374.90453695 1236.56402417 -252.78192178]
------
Step:15, Action:South
State  107
Old Q Values:  [-252.35169558 1374.90453695 1236.56402417 -252.78192178]
New Q values:  [-252.35169558 1828.15756819 1236.56402417 -252.78192178]
Reward: -1  Episode Reward:  -265
xxxxx
x   x
xa  x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         4262.65251137    0.        ]
------
Step:16, Action:East
State  185
Old Q Values:  [ 145.72609444    0.         4149.1493692  -178.98      ]
New Q values:  [  145.72609444     0.         -2066.06383327  -178.98      ]
Reward: -10001  Episode Reward:  -10266
xxxxx
x   x
x g x
x.. x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2460.06629125  2578.56841452 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [ 2460.06629125  2578.56841452 -4584.50430574 -1713.91177491]
New Q values:  [ 2460.06629125 10413.53621602 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.44305903 -6396.61506955 -8192.20126966 31255.69616736]
------
Step:2, Action:West
State  288
Old Q Values:  [ 6574.44305903 -6396.61506955 -8192.20126966 31255.69616736]
New Q values:  [ 6574.44305903 -6396.61506955 -8192.20126966 41415.76156606]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x . x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 96360.27699706]
------
Step:3, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 96360.27699706]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 45970.07349333]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3706.72917349 -5704.51612281 24735.20898169 -5679.36893145]
------
Step:4, Action:East
State  261
Old Q Values:  [2734.21027774   26.73544252 3586.58107829  123.6214372 ]
New Q values:  [ 2734.21027774    26.73544252 15225.05447932   123.6214372 ]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 45970.07349333]
------
Step:5, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 45970.07349333]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 22954.94574113]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2734.21027774    26.73544252 15225.05447932   123.6214372 ]
------
Step:6, Action:East
State  261
Old Q Values:  [ 2734.21027774    26.73544252 15225.05447932   123.6214372 ]
New Q values:  [2734.21027774   26.73544252 6975.90551406  123.6214372 ]
Reward: -10001  Episode Reward:  -9976
xxxxx
x...x
x . x
x g x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-7432.41476814  4397.9497269    660.86649319   221.49781494]
------
Step:1, Action:South
State  136
Old Q Values:  [-7432.41476814  4397.9497269    660.86649319   221.49781494]
New Q values:  [-7432.41476814  4035.6377937    660.86649319   221.49781494]
Reward: 9  Episode Reward:  9
xxxxx
x g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  7570.19300979 -8220.10378799   911.8961227 ]
------
Step:2, Action:South
State  208
Old Q Values:  [ 2460.06629125 10413.53621602 -4584.50430574 -1713.91177491]
New Q values:  [ 2460.06629125 16595.54295623 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.44305903 -6396.61506955 -8192.20126966 41415.76156606]
------
Step:3, Action:West
State  288
Old Q Values:  [ 6574.44305903 -6396.61506955 -8192.20126966 41415.76156606]
New Q values:  [ 6574.44305903 -6396.61506955 -8192.20126966 25195.11767245]
Reward: 9  Episode Reward:  27
xxxxx
x . x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 7550.40891976  -168.92307549 28744.7101534   7283.5411195 ]
------
Step:4, Action:East
State  273
Old Q Values:  [ 7550.40891976  -168.92307549 28744.7101534   7283.5411195 ]
New Q values:  [ 7550.40891976  -168.92307549 19055.81936309  7283.5411195 ]
Reward: -1  Episode Reward:  26
xxxxx
x .gx
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.44305903 -6396.61506955 -8192.20126966 25195.11767245]
------
Step:5, Action:West
State  288
Old Q Values:  [ 6574.44305903 -6396.61506955 -8192.20126966 25195.11767245]
New Q values:  [ 6574.44305903 -6396.61506955 -8192.20126966 16963.93079132]
Reward: -1  Episode Reward:  25
xxxxx
x g x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 22954.94574113]
------
Step:6, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 22954.94574113]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 16607.94099096]
Reward: 9  Episode Reward:  34
xxxxx
xg. x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3706.72917349 -5704.51612281 24735.20898169 -5679.36893145]
------
Step:7, Action:East
State  257
Old Q Values:  [18303.86220197 12764.58618105  4727.09508304  1875.31501677]
New Q values:  [18303.86220197 12764.58618105  6872.6203305   1875.31501677]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 16607.94099096]
------
Step:8, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 16607.94099096]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 12133.73505697]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[18303.86220197 12764.58618105  6872.6203305   1875.31501677]
------
Step:9, Action:North
State  261
Old Q Values:  [2734.21027774   26.73544252 6975.90551406  123.6214372 ]
New Q values:  [3587.57590391   26.73544252 6975.90551406  123.6214372 ]
Reward: 9  Episode Reward:  41
xxxxx
x . x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2174.36956339 5245.81661103 8294.97264272 1554.80203889]
------
Step:10, Action:East
State  181
Old Q Values:  [ 1077.24729837  1279.45451029 -1869.69345581   262.76946019]
New Q values:  [ 1077.24729837  1279.45451029 -4097.89383915   262.76946019]
Reward: -9991  Episode Reward:  -9950
xxxxx
x . x
x g x
x   x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   649.49464746   917.60312325]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   649.49464746   917.60312325]
New Q values:  [ -281.736      -1150.91067548   649.49464746   491.48557454]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3902.35021231   396.81441747  -180.6       ]
------
Step:2, Action:East
State  108
Old Q Values:  [-8463.16477134  1968.68675445  1774.31279594     0.        ]
New Q values:  [-8463.16477134  1968.68675445  1290.4787338      0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xga.x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1937.84538476   -98.79429602]
------
Step:3, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1937.84538476   -98.79429602]
New Q values:  [-9594.56523706 -8069.05606225  1991.22949201   -98.79429602]
Reward: 9  Episode Reward:  17
xxxxx
x gax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-7432.41476814  4035.6377937    660.86649319   221.49781494]
------
Step:4, Action:South
State  136
Old Q Values:  [-7432.41476814  4035.6377937    660.86649319   221.49781494]
New Q values:  [-7432.41476814  6598.31800435   660.86649319   221.49781494]
Reward: 9  Episode Reward:  26
xxxxx
x  gx
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2460.06629125 16595.54295623 -4584.50430574 -1713.91177491]
------
Step:5, Action:South
State  208
Old Q Values:  [ 2460.06629125 16595.54295623 -4584.50430574 -1713.91177491]
New Q values:  [ 2460.06629125 11732.79641988 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  35
xxxxx
x g x
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.44305903 -6396.61506955 -8192.20126966 16963.93079132]
------
Step:6, Action:West
State  288
Old Q Values:  [ 6574.44305903 -6396.61506955 -8192.20126966 16963.93079132]
New Q values:  [ 6574.44305903 -6396.61506955 -8192.20126966 10431.09283362]
Reward: 9  Episode Reward:  44
xxxxx
xg  x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 12133.73505697]
------
Step:7, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 12133.73505697]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 12273.4567173 ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3706.72917349 -5704.51612281 24735.20898169 -5679.36893145]
------
Step:8, Action:East
State  257
Old Q Values:  [18303.86220197 12764.58618105  6872.6203305   1875.31501677]
New Q values:  [18303.86220197 12764.58618105  6430.48514739  1875.31501677]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 12273.4567173 ]
------
Step:9, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 12273.4567173 ]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 12329.34538143]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3706.72917349 -5704.51612281 24735.20898169 -5679.36893145]
------
Step:10, Action:East
State  256
Old Q Values:  [47016.92797738 15850.58020692  7407.2956525    644.94785455]
New Q values:  [47016.92797738 15850.58020692  6661.12187543   644.94785455]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 12329.34538143]
------
Step:11, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 12329.34538143]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 10422.29681316]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[18303.86220197 12764.58618105  6430.48514739  1875.31501677]
------
Step:12, Action:North
State  260
Old Q Values:  [ 3706.72917349 -5704.51612281 24735.20898169 -5679.36893145]
New Q values:  [ 4183.29492107 -5704.51612281 24735.20898169 -5679.36893145]
Reward: 9  Episode Reward:  48
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2840.23360603  4015.39186925  8984.01083892 -4966.32149798]
------
Step:13, Action:East
State  176
Old Q Values:  [103770.24881615   1621.55095326  26933.90471518      0.        ]
New Q values:  [103770.24881615   1621.55095326  73423.54542925      0.        ]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3632.93673138  751.03868094 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 3885.39458374  238.35800069    0.        ]
New Q values:  [ 221.30610858 4048.04962631  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa .x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2174.36956339 5245.81661103 8294.97264272 1554.80203889]
------
Step:2, Action:East
State  183
Old Q Values:  [2174.36956339 5245.81661103 8294.97264272 1554.80203889]
New Q values:  [2174.36956339 5245.81661103 5641.83063345 1554.80203889]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  14.86214194 1482.85185902 7748.13858789 1915.70494401]
------
Step:3, Action:East
State  201
Old Q Values:  [   2.33354578 2191.91549263 -501.63979658  529.38906334]
New Q values:  [ 2.33354578e+00  2.19191549e+03 -3.92419802e+03  5.29389063e+02]
Reward: -9991  Episode Reward:  -9983
xxxxx
x ..x
x  gx
x...x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.71684510e+03 8.81527848e+03 7.32028793e+03]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  1.66754224e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  1.56785013e+04  1.03161518e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[30009.77428946  1561.20793434   790.72804752  1050.85266124]
------
Step:2, Action:North
State  210
Old Q Values:  [30009.77428946  1561.20793434   790.72804752  1050.85266124]
New Q values:  [31442.16377318  1561.20793434   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  2888.1385131   -180.00807518 64776.18019132]
------
Step:3, Action:West
State  138
Old Q Values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  5.52939203e+02]
New Q values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  3.79501642e+04]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 125745.29507292]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   649.49464746   491.48557454]
New Q values:  [ -281.736      -1150.91067548   649.49464746   321.03855506]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3902.35021231   396.81441747  -180.6       ]
------
Step:5, Action:East
State  110
Old Q Values:  [ -239.29051573 -3902.35021231   396.81441747  -180.6       ]
New Q values:  [ -239.29051573 -3902.35021231   352.97416123  -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   649.49464746   321.03855506]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   649.49464746   321.03855506]
New Q values:  [ -281.736      -1150.91067548 11644.24711993   321.03855506]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  3.79501642e+04]
------
Step:7, Action:West
State  138
Old Q Values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  3.79501642e+04]
New Q values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  1.86727398e+04]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 11644.24711993   321.03855506]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548 11644.24711993   321.03855506]
New Q values:  [ -281.736      -1150.91067548 10258.92079315   321.03855506]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  1.86727398e+04]
------
Step:9, Action:West
State  138
Old Q Values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  1.86727398e+04]
New Q values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  1.05461722e+04]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 10258.92079315   321.03855506]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548 10258.92079315   321.03855506]
New Q values:  [ -281.736      -1150.91067548  7266.81996671   321.03855506]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  1.05461722e+04]
------
Step:11, Action:West
State  138
Old Q Values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  1.05461722e+04]
New Q values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  6.39791486e+03]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  7266.81996671   321.03855506]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  7266.81996671   321.03855506]
New Q values:  [ -281.736      -1150.91067548  4825.50244347   321.03855506]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  6.39791486e+03]
------
Step:13, Action:West
State  138
Old Q Values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  6.39791486e+03]
New Q values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  4.00621668e+03]
Reward: -1  Episode Reward:  27
xxxxx
x a x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4825.50244347   321.03855506]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  4825.50244347   321.03855506]
New Q values:  [ -281.736      -1150.91067548  3131.46598001   321.03855506]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  4.00621668e+03]
------
Step:15, Action:West
State  138
Old Q Values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  4.00621668e+03]
New Q values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  2.54132646e+03]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3131.46598001   321.03855506]
------
Step:16, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3131.46598001   321.03855506]
New Q values:  [ -281.736      -1150.91067548  2014.38433126   321.03855506]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  2.54132646e+03]
------
Step:17, Action:West
State  138
Old Q Values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  2.54132646e+03]
New Q values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  1.62024589e+03]
Reward: -1  Episode Reward:  23
xxxxx
x a x
xg  x
x...x
xxxxx
Step:18, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1991.22949201   -98.79429602]
New Q values:  [-9594.56523706 -8069.05606225  2775.38719811   -98.79429602]
Reward: -1  Episode Reward:  22
xxxxx
xg ax
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-7432.41476814  6598.31800435   660.86649319   221.49781494]
------
Step:19, Action:South
State  138
Old Q Values:  [ 4.56859410e+02 -4.27781073e+03 -3.22965309e-01  1.62024589e+03]
New Q values:  [ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  1.62024589e+03]
Reward: -1  Episode Reward:  21
xxxxx
x   x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  7570.19300979 -8220.10378799   911.8961227 ]
------
Step:20, Action:South
State  216
Old Q Values:  [ 2195.11823766  7570.19300979 -8220.10378799   911.8961227 ]
New Q values:  [ 2195.11823766  6162.805054   -8220.10378799   911.8961227 ]
Reward: 9  Episode Reward:  30
xxxxx
xg  x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.44305903 -6396.61506955 -8192.20126966 10431.09283362]
------
Step:21, Action:West
State  288
Old Q Values:  [ 6574.44305903 -6396.61506955 -8192.20126966 10431.09283362]
New Q values:  [ 6574.44305903 -6396.61506955 -8192.20126966  7304.5261774 ]
Reward: 9  Episode Reward:  39
xxxxx
x   x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 10422.29681316]
------
Step:22, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 10422.29681316]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 78279.39711848]
Reward: 100009  Episode Reward:  100048
xxxxx
xg  x
x   x
xa  x
xxxxx
xxxxx
x...x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4183.29492107 -5704.51612281 24735.20898169 -5679.36893145]
------
Step:1, Action:East
State  260
Old Q Values:  [ 4183.29492107 -5704.51612281 24735.20898169 -5679.36893145]
New Q values:  [ 4183.29492107 -5704.51612281 33383.30272822 -5679.36893145]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x ..x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 78279.39711848]
------
Step:2, Action:North
State  276
Old Q Values:  [ 7058.83631802 -5807.06396197 24702.30720332  1564.23976583]
New Q values:  [ 5473.51807038 -5807.06396197 24702.30720332  1564.23976583]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.71684510e+03 8.81527848e+03 7.32028793e+03]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.71684510e+03 8.81527848e+03 7.32028793e+03]
New Q values:  [3.89777037e-01 6.71684510e+03 7.05135032e+03 7.32028793e+03]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2460.06629125 11732.79641988 -4584.50430574 -1713.91177491]
------
Step:4, Action:South
State  208
Old Q Values:  [ 2460.06629125 11732.79641988 -4584.50430574 -1713.91177491]
New Q values:  [ 2460.06629125  6889.87642117 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  36
xxxxx
x.g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.44305903 -6396.61506955 -8192.20126966  7304.5261774 ]
------
Step:5, Action:West
State  288
Old Q Values:  [ 6574.44305903 -6396.61506955 -8192.20126966  7304.5261774 ]
New Q values:  [ 6574.44305903 -6396.61506955 -8192.20126966 26405.0296065 ]
Reward: -1  Episode Reward:  35
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 78279.39711848]
------
Step:6, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 78279.39711848]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 41326.14966586]
Reward: -1  Episode Reward:  34
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4183.29492107 -5704.51612281 33383.30272822 -5679.36893145]
------
Step:7, Action:East
State  260
Old Q Values:  [ 4183.29492107 -5704.51612281 33383.30272822 -5679.36893145]
New Q values:  [ 4183.29492107 -5704.51612281 25750.56599104 -5679.36893145]
Reward: -1  Episode Reward:  33
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 41326.14966586]
------
Step:8, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 41326.14966586]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 22021.01852693]
Reward: -1  Episode Reward:  32
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[18303.86220197 12764.58618105  6430.48514739  1875.31501677]
------
Step:9, Action:North
State  257
Old Q Values:  [18303.86220197 12764.58618105  6430.48514739  1875.31501677]
New Q values:  [17374.11205079 12764.58618105  6430.48514739  1875.31501677]
Reward: -1  Episode Reward:  31
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 29124.7048717  20101.94546536     0.        ]
------
Step:10, Action:North
State  181
Old Q Values:  [ 1077.24729837  1279.45451029 -4097.89383915   262.76946019]
New Q values:  [ 1650.71380724  1279.45451029 -4097.89383915   262.76946019]
Reward: 9  Episode Reward:  40
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 4048.04962631  238.35800069    0.        ]
------
Step:11, Action:South
State  101
Old Q Values:  [   0.         3773.96791785    0.            0.        ]
New Q values:  [   0.         2004.20130931    0.            0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1650.71380724  1279.45451029 -4097.89383915   262.76946019]
------
Step:12, Action:North
State  181
Old Q Values:  [ 1650.71380724  1279.45451029 -4097.89383915   262.76946019]
New Q values:  [ 1260.94591569  1279.45451029 -4097.89383915   262.76946019]
Reward: -1  Episode Reward:  38
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         2004.20130931    0.            0.        ]
------
Step:13, Action:South
State  100
Old Q Values:  [   0.         9571.10116341  443.65288404    0.        ]
New Q values:  [   0.         6523.04371704  443.65288404    0.        ]
Reward: -1  Episode Reward:  37
xxxxx
xg..x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2840.23360603  4015.39186925  8984.01083892 -4966.32149798]
------
Step:14, Action:East
State  180
Old Q Values:  [-2840.23360603  4015.39186925  8984.01083892 -4966.32149798]
New Q values:  [-2840.23360603  4015.39186925  5789.09071482 -4966.32149798]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.71684510e+03 7.05135032e+03 7.32028793e+03]
------
Step:15, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  1.56785013e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  1.57034496e+04  1.03161518e+03]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[31442.16377318  1561.20793434   790.72804752  1050.85266124]
------
Step:16, Action:North
State  210
Old Q Values:  [31442.16377318  1561.20793434   790.72804752  1050.85266124]
New Q values:  [32015.11956667  1561.20793434   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  44
xxxxx
x .ax
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  2888.1385131   -180.00807518 64776.18019132]
------
Step:17, Action:West
State  130
Old Q Values:  [33256.45600344  2888.1385131   -180.00807518 64776.18019132]
New Q values:  [ 33256.45600344   2888.1385131    -180.00807518 123639.4605984 ]
Reward: 100009  Episode Reward:  100053
xxxxx
x a x
x   x
x g x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2460.06629125  6889.87642117 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [ 2460.06629125  6889.87642117 -4584.50430574 -1713.91177491]
New Q values:  [ 2460.06629125 10682.85945042 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.44305903 -6396.61506955 -8192.20126966 26405.0296065 ]
------
Step:2, Action:West
State  288
Old Q Values:  [ 6574.44305903 -6396.61506955 -8192.20126966 26405.0296065 ]
New Q values:  [ 6574.44305903 -6396.61506955 -8192.20126966 17173.71740068]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 22021.01852693]
------
Step:3, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 22021.01852693]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 16538.97720809]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4183.29492107 -5704.51612281 25750.56599104 -5679.36893145]
------
Step:4, Action:East
State  261
Old Q Values:  [3587.57590391   26.73544252 6975.90551406  123.6214372 ]
New Q values:  [3587.57590391   26.73544252 7751.45536805  123.6214372 ]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 16538.97720809]
------
Step:5, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 16538.97720809]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 14340.16068055]
Reward: -1  Episode Reward:  25
xxxxx
xg..x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4183.29492107 -5704.51612281 25750.56599104 -5679.36893145]
------
Step:6, Action:East
State  261
Old Q Values:  [3587.57590391   26.73544252 7751.45536805  123.6214372 ]
New Q values:  [3587.57590391   26.73544252 7402.03035139  123.6214372 ]
Reward: -1  Episode Reward:  24
xxxxx
x.g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 14340.16068055]
------
Step:7, Action:West
State  273
Old Q Values:  [ 7550.40891976  -168.92307549 19055.81936309  7283.5411195 ]
New Q values:  [ 7550.40891976  -168.92307549 19055.81936309  5133.42555321]
Reward: -1  Episode Reward:  23
xxxxx
x..gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3587.57590391   26.73544252 7402.03035139  123.6214372 ]
------
Step:8, Action:East
State  261
Old Q Values:  [3587.57590391   26.73544252 7402.03035139  123.6214372 ]
New Q values:  [3587.57590391   26.73544252 7262.26034472  123.6214372 ]
Reward: -1  Episode Reward:  22
xxxxx
x.g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 14340.16068055]
------
Step:9, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 14340.16068055]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552 13460.63406953]
Reward: -1  Episode Reward:  21
xxxxx
xg..x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4183.29492107 -5704.51612281 25750.56599104 -5679.36893145]
------
Step:10, Action:East
State  260
Old Q Values:  [ 4183.29492107 -5704.51612281 25750.56599104 -5679.36893145]
New Q values:  [ 4183.29492107 -5704.51612281 14337.81661728 -5679.36893145]
Reward: -1  Episode Reward:  20
xxxxx
x...x
xg. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552 13460.63406953]
------
Step:11, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552 13460.63406953]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552  7562.33173123]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3587.57590391   26.73544252 7262.26034472  123.6214372 ]
------
Step:12, Action:East
State  261
Old Q Values:  [3587.57590391   26.73544252 7262.26034472  123.6214372 ]
New Q values:  [3587.57590391   26.73544252 8621.04994682  123.6214372 ]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 7550.40891976  -168.92307549 19055.81936309  5133.42555321]
------
Step:13, Action:East
State  273
Old Q Values:  [ 7550.40891976  -168.92307549 19055.81936309  5133.42555321]
New Q values:  [ 7550.40891976  -168.92307549 12773.84296544  5133.42555321]
Reward: -1  Episode Reward:  17
xxxxx
x..gx
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.44305903 -6396.61506955 -8192.20126966 17173.71740068]
------
Step:14, Action:West
State  288
Old Q Values:  [ 6574.44305903 -6396.61506955 -8192.20126966 17173.71740068]
New Q values:  [ 6574.44305903 -6396.61506955 -8192.20126966  9137.58647964]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552  7562.33173123]
------
Step:15, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552  7562.33173123]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552  5610.64767654]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3587.57590391   26.73544252 8621.04994682  123.6214372 ]
------
Step:16, Action:East
State  261
Old Q Values:  [3587.57590391   26.73544252 8621.04994682  123.6214372 ]
New Q values:  [3587.57590391   26.73544252 5131.01428169  123.6214372 ]
Reward: -1  Episode Reward:  14
xxxxx
x.g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552  5610.64767654]
------
Step:17, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552  5610.64767654]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552  6545.0040558 ]
Reward: -1  Episode Reward:  13
xxxxx
xg..x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4183.29492107 -5704.51612281 14337.81661728 -5679.36893145]
------
Step:18, Action:East
State  261
Old Q Values:  [3587.57590391   26.73544252 5131.01428169  123.6214372 ]
New Q values:  [3587.57590391   26.73544252 4015.30692941  123.6214372 ]
Reward: -1  Episode Reward:  12
xxxxx
x.g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  2917.91595552  6545.0040558 ]
------
Step:19, Action:West
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552  6545.0040558 ]
New Q values:  [ 3829.63227336 -8521.23367799  2917.91595552  3821.99370114]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3587.57590391   26.73544252 4015.30692941  123.6214372 ]
------
Step:20, Action:East
State  261
Old Q Values:  [3587.57590391   26.73544252 4015.30692941  123.6214372 ]
New Q values:  [ 3587.57590391    26.73544252 -3245.58754623   123.6214372 ]
Reward: -10001  Episode Reward:  -9990
xxxxx
x...x
x . x
x g x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.71684510e+03 7.05135032e+03 7.32028793e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [ 169.9257398  7582.92139682 1594.92975973  568.38654082]
New Q values:  [ 169.9257398  7582.92139682 2492.21342009  568.38654082]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  6162.805054   -8220.10378799   911.8961227 ]
------
Step:2, Action:South
State  208
Old Q Values:  [ 2460.06629125 10682.85945042 -4584.50430574 -1713.91177491]
New Q values:  [ 2460.06629125  7019.81972406 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.44305903 -6396.61506955 -8192.20126966  9137.58647964]
------
Step:3, Action:West
State  288
Old Q Values:  [ 6574.44305903 -6396.61506955 -8192.20126966  9137.58647964]
New Q values:  [ 6574.44305903 -6396.61506955 -8192.20126966  7492.58748149]
Reward: 9  Episode Reward:  27
xxxxx
x..gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 7550.40891976  -168.92307549 12773.84296544  5133.42555321]
------
Step:4, Action:East
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  2917.91595552  3821.99370114]
New Q values:  [ 3829.63227336 -8521.23367799  3414.34262665  3821.99370114]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.44305903 -6396.61506955 -8192.20126966  7492.58748149]
------
Step:5, Action:West
State  288
Old Q Values:  [ 6574.44305903 -6396.61506955 -8192.20126966  7492.58748149]
New Q values:  [ 6574.44305903 -6396.61506955 -8192.20126966  6828.58788223]
Reward: -1  Episode Reward:  25
xxxxx
x..gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 7550.40891976  -168.92307549 12773.84296544  5133.42555321]
------
Step:6, Action:East
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  3414.34262665  3821.99370114]
New Q values:  [ 3829.63227336 -8521.23367799  3413.71341533  3821.99370114]
Reward: -1  Episode Reward:  24
xxxxx
x.g.x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.44305903 -6396.61506955 -8192.20126966  6828.58788223]
------
Step:7, Action:West
State  288
Old Q Values:  [ 6574.44305903 -6396.61506955 -8192.20126966  6828.58788223]
New Q values:  [ 6574.44305903 -6396.61506955 -8192.20126966  6562.98804252]
Reward: -1  Episode Reward:  23
xxxxx
x..gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 7550.40891976  -168.92307549 12773.84296544  5133.42555321]
------
Step:8, Action:East
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  3413.71341533  3821.99370114]
New Q values:  [ 3829.63227336 -8521.23367799  3337.21828384  3821.99370114]
Reward: -1  Episode Reward:  22
xxxxx
x.g.x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6574.44305903 -6396.61506955 -8192.20126966  6562.98804252]
------
Step:9, Action:North
State  288
Old Q Values:  [ 6574.44305903 -6396.61506955 -8192.20126966  6562.98804252]
New Q values:  [ 4735.12314083 -6396.61506955 -8192.20126966  6562.98804252]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2460.06629125  7019.81972406 -4584.50430574 -1713.91177491]
------
Step:10, Action:South
State  210
Old Q Values:  [32015.11956667  1561.20793434   790.72804752  1050.85266124]
New Q values:  [32015.11956667  2592.77958649   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  20
xxxxx
x...x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4735.12314083 -6396.61506955 -8192.20126966  6562.98804252]
------
Step:11, Action:North
State  288
Old Q Values:  [ 4735.12314083 -6396.61506955 -8192.20126966  6562.98804252]
New Q values:  [11497.98512633 -6396.61506955 -8192.20126966  6562.98804252]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32015.11956667  2592.77958649   790.72804752  1050.85266124]
------
Step:12, Action:North
State  208
Old Q Values:  [ 2460.06629125  7019.81972406 -4584.50430574 -1713.91177491]
New Q values:  [38081.26469602  7019.81972406 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  28
xxxxx
x..ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 33256.45600344   2888.1385131    -180.00807518 123639.4605984 ]
------
Step:13, Action:West
State  128
Old Q Values:  [ 27558.53877122   8286.98116673  -8652.84       120244.63633507]
New Q values:  [27558.53877122  8286.98116673 -8652.84       69406.76602238]
Reward: 9  Episode Reward:  37
xxxxx
x.agx
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NE
[    0.          2847.11106414 -5999.38454759 71011.70496116]
------
Step:14, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2043.33262438   786.96142554]
New Q values:  [ -253.44886264 -1902.20915811  2043.33262438   868.63184067]
Reward: 9  Episode Reward:  46
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1828.15756819 1236.56402417 -252.78192178]
------
Step:15, Action:South
State  105
Old Q Values:  [-180.6         446.06592384 -764.93196255    0.        ]
New Q values:  [-180.6         221.54419787 -764.93196255    0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  145.72609444     0.         -2066.06383327  -178.98      ]
------
Step:16, Action:North
State  185
Old Q Values:  [  145.72609444     0.         -2066.06383327  -178.98      ]
New Q values:  [  606.13770823     0.         -2066.06383327  -178.98      ]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1828.15756819 1236.56402417 -252.78192178]
------
Step:17, Action:South
State  107
Old Q Values:  [-252.35169558 1828.15756819 1236.56402417 -252.78192178]
New Q values:  [-252.35169558 2009.45878069 1236.56402417 -252.78192178]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         4262.65251137    0.        ]
------
Step:18, Action:East
State  187
Old Q Values:  [ 320.07341842    0.         4262.65251137    0.        ]
New Q values:  [ 320.07341842    0.         2506.18375434    0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  2672.40916596  1278.19575341]
------
Step:19, Action:East
State  203
Old Q Values:  [3.60604218e+00 3.97827546e+03 3.50642584e+03 9.06816004e+03]
New Q values:  [3.60604218e+00 3.97827546e+03 2.05741271e+03 9.06816004e+03]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[1790.86908461 2184.8079221     0.         1847.21017375]
------
Step:20, Action:West
State  218
Old Q Values:  [1790.86908461 2184.8079221     0.         1847.21017375]
New Q values:  [1790.86908461 2184.8079221     0.         1540.00681929]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  2672.40916596  1278.19575341]
------
Step:21, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  1.57034496e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  1.58853157e+04  1.03161518e+03]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32015.11956667  2592.77958649   790.72804752  1050.85266124]
------
Step:22, Action:North
State  218
Old Q Values:  [1790.86908461 2184.8079221     0.         1540.00681929]
New Q values:  [1201.82139936 2184.8079221     0.         1540.00681929]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  1.62024589e+03]
------
Step:23, Action:West
State  138
Old Q Values:  [ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  1.62024589e+03]
New Q values:  [ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  1.26049814e+03]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2043.33262438   868.63184067]
------
Step:24, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2014.38433126   321.03855506]
New Q values:  [ -281.736      -1150.91067548  1183.3031749    321.03855506]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  1.26049814e+03]
------
Step:25, Action:West
State  130
Old Q Values:  [ 33256.45600344   2888.1385131    -180.00807518 123639.4605984 ]
New Q values:  [33256.45600344  2888.1385131   -180.00807518 87178.77276124]
Reward: -1  Episode Reward:  35
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 125745.29507292]
------
Step:26, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1183.3031749    321.03855506]
New Q values:  [ -281.736      -1150.91067548  1183.3031749    730.65305623]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 2009.45878069 1236.56402417 -252.78192178]
------
Step:27, Action:South
State  107
Old Q Values:  [-252.35169558 2009.45878069 1236.56402417 -252.78192178]
New Q values:  [-252.35169558  985.02482475 1236.56402417 -252.78192178]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  606.13770823     0.         -2066.06383327  -178.98      ]
------
Step:28, Action:North
State  185
Old Q Values:  [  606.13770823     0.         -2066.06383327  -178.98      ]
New Q values:  [  308.31834265     0.         -2066.06383327  -178.98      ]
Reward: -1  Episode Reward:  32
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         221.54419787 -764.93196255    0.        ]
------
Step:29, Action:South
State  107
Old Q Values:  [-252.35169558  985.02482475 1236.56402417 -252.78192178]
New Q values:  [-252.35169558  485.90543269 1236.56402417 -252.78192178]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[  308.31834265     0.         -2066.06383327  -178.98      ]
------
Step:30, Action:North
State  185
Old Q Values:  [  308.31834265     0.         -2066.06383327  -178.98      ]
New Q values:  [  189.19059642     0.         -2066.06383327  -178.98      ]
Reward: -1  Episode Reward:  30
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         221.54419787 -764.93196255    0.        ]
------
Step:31, Action:South
State  104
Old Q Values:  [-8652.84        3598.88070822  2353.85801711 -8652.84      ]
New Q values:  [-8652.84        4133.7122365   2353.85801711 -8652.84      ]
Reward: -1  Episode Reward:  29
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[ 577.4048542    0.        8982.5331774    0.       ]
------
Step:32, Action:East
State  185
Old Q Values:  [  189.19059642     0.         -2066.06383327  -178.98      ]
New Q values:  [ 189.19059642    0.         1447.85088574 -178.98      ]
Reward: -1  Episode Reward:  28
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  7582.92139682 2492.21342009  568.38654082]
------
Step:33, Action:South
State  201
Old Q Values:  [ 2.33354578e+00  2.19191549e+03 -3.92419802e+03  5.29389063e+02]
New Q values:  [ 2.33354578e+00  4.70831909e+03 -3.92419802e+03  5.29389063e+02]
Reward: -1  Episode Reward:  27
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 7550.40891976  -168.92307549 12773.84296544  5133.42555321]
------
Step:34, Action:East
State  273
Old Q Values:  [ 7550.40891976  -168.92307549 12773.84296544  5133.42555321]
New Q values:  [7550.40891976 -168.92307549 8558.33272408 5133.42555321]
Reward: -1  Episode Reward:  26
xxxxx
x  gx
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11497.98512633 -6396.61506955 -8192.20126966  6562.98804252]
------
Step:35, Action:North
State  288
Old Q Values:  [11497.98512633 -6396.61506955 -8192.20126966  6562.98804252]
New Q values:  [ 6447.43556673 -6396.61506955 -8192.20126966  6562.98804252]
Reward: -1  Episode Reward:  25
xxxxx
x g x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  6162.805054   -8220.10378799   911.8961227 ]
------
Step:36, Action:South
State  216
Old Q Values:  [ 2195.11823766  6162.805054   -8220.10378799   911.8961227 ]
New Q values:  [ 2195.11823766  4433.41843436 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6447.43556673 -6396.61506955 -8192.20126966  6562.98804252]
------
Step:37, Action:West
State  288
Old Q Values:  [ 6447.43556673 -6396.61506955 -8192.20126966  6562.98804252]
New Q values:  [ 6447.43556673 -6396.61506955 -8192.20126966  5192.09503423]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[7550.40891976 -168.92307549 8558.33272408 5133.42555321]
------
Step:38, Action:East
State  273
Old Q Values:  [7550.40891976 -168.92307549 8558.33272408 5133.42555321]
New Q values:  [7550.40891976 -168.92307549 -643.03624035 5133.42555321]
Reward: -10001  Episode Reward:  -9978
xxxxx
x   x
x   x
x. gx
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  1.26049814e+03]
------
Step:1, Action:West
State  138
Old Q Values:  [ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  1.26049814e+03]
New Q values:  [ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  8.64590209e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1183.3031749    730.65305623]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1183.3031749    730.65305623]
New Q values:  [ -281.736      -1150.91067548   732.09833266   730.65305623]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  8.64590209e+02]
------
Step:3, Action:West
State  138
Old Q Values:  [ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  8.64590209e+02]
New Q values:  [ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  5.64865583e+02]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   732.09833266   730.65305623]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   732.09833266   730.65305623]
New Q values:  [ -281.736      -1150.91067548   461.69900808   730.65305623]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
xg..x
x. .x
xxxxx
Step:5, Action:West
State  138
Old Q Values:  [ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  5.64865583e+02]
New Q values:  [ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  4.44542150e+02]
Reward: -1  Episode Reward:  5
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   461.69900808   730.65305623]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   461.69900808   730.65305623]
New Q values:  [ -281.736      -1150.91067548   461.69900808  1387.5422419 ]
Reward: 9  Episode Reward:  14
xxxxx
xa  x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3632.93673138  751.03868094 -120.29354603]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869 3632.93673138  751.03868094 -120.29354603]
New Q values:  [-177.44732869 3151.12388259  751.03868094 -120.29354603]
Reward: 9  Episode Reward:  23
xxxxx
x   x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2174.36956339 5245.81661103 5641.83063345 1554.80203889]
------
Step:8, Action:East
State  185
Old Q Values:  [ 189.19059642    0.         1447.85088574 -178.98      ]
New Q values:  [ 189.19059642    0.         1997.0360803  -178.98      ]
Reward: 9  Episode Reward:  32
xxxxx
x   x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 2.33354578e+00  4.70831909e+03 -3.92419802e+03  5.29389063e+02]
------
Step:9, Action:South
State  196
Old Q Values:  [-2469.90645144  4027.93373228 11989.01001575  3512.17261526]
New Q values:  [-2469.90645144  9021.26565391 11989.01001575  3512.17261526]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 5473.51807038 -5807.06396197 24702.30720332  1564.23976583]
------
Step:10, Action:East
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  3337.21828384  3821.99370114]
New Q values:  [ 3829.63227336 -8521.23367799  3274.51798356  3821.99370114]
Reward: 9  Episode Reward:  40
xxxxx
x   x
x  .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6447.43556673 -6396.61506955 -8192.20126966  5192.09503423]
------
Step:11, Action:North
State  288
Old Q Values:  [ 6447.43556673 -6396.61506955 -8192.20126966  5192.09503423]
New Q values:  [12188.91009669 -6396.61506955 -8192.20126966  5192.09503423]
Reward: 9  Episode Reward:  49
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32015.11956667  2592.77958649   790.72804752  1050.85266124]
------
Step:12, Action:North
State  218
Old Q Values:  [1201.82139936 2184.8079221     0.         1540.00681929]
New Q values:  [ 647.92864353 2184.8079221     0.         1540.00681929]
Reward: -1  Episode Reward:  48
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  4.44542150e+02]
------
Step:13, Action:South
State  130
Old Q Values:  [33256.45600344  2888.1385131   -180.00807518 87178.77276124]
New Q values:  [33256.45600344 10759.19127524  -180.00807518 87178.77276124]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32015.11956667  2592.77958649   790.72804752  1050.85266124]
------
Step:14, Action:North
State  218
Old Q Values:  [ 647.92864353 2184.8079221     0.         1540.00681929]
New Q values:  [ 426.3715412  2184.8079221     0.         1540.00681929]
Reward: -1  Episode Reward:  46
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  4.44542150e+02]
------
Step:15, Action:South
State  138
Old Q Values:  [ 4.56859410e+02  5.59333613e+02 -3.22965309e-01  4.44542150e+02]
New Q values:  [ 4.56859410e+02  1.55315898e+03 -3.22965309e-01  4.44542150e+02]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  4433.41843436 -8220.10378799   911.8961227 ]
------
Step:16, Action:South
State  216
Old Q Values:  [ 2195.11823766  4433.41843436 -8220.10378799   911.8961227 ]
New Q values:  [ 2195.11823766  5429.44040275 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  44
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12188.91009669 -6396.61506955 -8192.20126966  5192.09503423]
------
Step:17, Action:North
State  288
Old Q Values:  [12188.91009669 -6396.61506955 -8192.20126966  5192.09503423]
New Q values:  [ 6503.7961595  -6396.61506955 -8192.20126966  5192.09503423]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  5429.44040275 -8220.10378799   911.8961227 ]
------
Step:18, Action:South
State  216
Old Q Values:  [ 2195.11823766  5429.44040275 -8220.10378799   911.8961227 ]
New Q values:  [ 2195.11823766  4122.31500895 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6503.7961595  -6396.61506955 -8192.20126966  5192.09503423]
------
Step:19, Action:North
State  288
Old Q Values:  [ 6503.7961595  -6396.61506955 -8192.20126966  5192.09503423]
New Q values:  [ 3837.61296649 -6396.61506955 -8192.20126966  5192.09503423]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  4122.31500895 -8220.10378799   911.8961227 ]
------
Step:20, Action:South
State  216
Old Q Values:  [ 2195.11823766  4122.31500895 -8220.10378799   911.8961227 ]
New Q values:  [ 2195.11823766  3205.95451385 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  40
xxxxx
x g x
x   x
x. ax
xxxxx
Step:21, Action:East
State  288
Old Q Values:  [ 3837.61296649 -6396.61506955 -8192.20126966  5192.09503423]
New Q values:  [ 3837.61296649 -6396.61506955 -1899.85199759  5192.09503423]
Reward: -301  Episode Reward:  -261
xxxxx
x  gx
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3837.61296649 -6396.61506955 -1899.85199759  5192.09503423]
------
Step:22, Action:West
State  288
Old Q Values:  [ 3837.61296649 -6396.61506955 -1899.85199759  5192.09503423]
New Q values:  [ 3837.61296649 -6396.61506955 -1899.85199759  4341.36068962]
Reward: -1  Episode Reward:  -262
xxxxx
x   x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[7550.40891976 -168.92307549 -643.03624035 5133.42555321]
------
Step:23, Action:North
State  273
Old Q Values:  [7550.40891976 -168.92307549 -643.03624035 5133.42555321]
New Q values:  [4432.05929391 -168.92307549 -643.03624035 5133.42555321]
Reward: -1  Episode Reward:  -263
xxxxx
x  gx
x a x
x.  x
xxxxx
Step:24, Action:North
State  201
Old Q Values:  [ 2.33354578e+00  4.70831909e+03 -3.92419802e+03  5.29389063e+02]
New Q values:  [  613.33320563  4708.31908668 -3924.19801569   529.38906334]
Reward: -1  Episode Reward:  -264
xxxxx
x a x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2043.33262438   868.63184067]
------
Step:25, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   461.69900808  1387.5422419 ]
New Q values:  [ -281.736      -1150.91067548   650.02729584  1387.5422419 ]
Reward: -1  Episode Reward:  -265
xxxxx
x  ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02  1.55315898e+03 -3.22965309e-01  4.44542150e+02]
------
Step:26, Action:South
State  138
Old Q Values:  [ 4.56859410e+02  1.55315898e+03 -3.22965309e-01  4.44542150e+02]
New Q values:  [ 4.56859410e+02  1.58244994e+03 -3.22965309e-01  4.44542150e+02]
Reward: -1  Episode Reward:  -266
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  3205.95451385 -8220.10378799   911.8961227 ]
------
Step:27, Action:South
State  216
Old Q Values:  [ 2195.11823766  3205.95451385 -8220.10378799   911.8961227 ]
New Q values:  [ 2195.11823766  2584.19001243 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  -267
xxxxx
x   x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3837.61296649 -6396.61506955 -1899.85199759  4341.36068962]
------
Step:28, Action:West
State  288
Old Q Values:  [ 3837.61296649 -6396.61506955 -1899.85199759  4341.36068962]
New Q values:  [ 3837.61296649 -6396.61506955 -1899.85199759 -3115.16604214]
Reward: -10001  Episode Reward:  -10268
xxxxx
x   x
x   x
x.g x
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3837.61296649 -6396.61506955 -1899.85199759 -3115.16604214]
------
Step:1, Action:North
State  288
Old Q Values:  [ 3837.61296649 -6396.61506955 -1899.85199759 -3115.16604214]
New Q values:  [12964.8245954  -6396.61506955 -1899.85199759 -3115.16604214]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[38081.26469602  7019.81972406 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  208
Old Q Values:  [38081.26469602  7019.81972406 -4584.50430574 -1713.91177491]
New Q values:  [17217.40127971  7019.81972406 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
xg.ax
x.. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-7432.41476814  6598.31800435   660.86649319   221.49781494]
------
Step:3, Action:South
State  130
Old Q Values:  [33256.45600344 10759.19127524  -180.00807518 87178.77276124]
New Q values:  [33256.45600344  9468.29689401  -180.00807518 87178.77276124]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17217.40127971  7019.81972406 -4584.50430574 -1713.91177491]
------
Step:4, Action:North
State  210
Old Q Values:  [32015.11956667  2592.77958649   790.72804752  1050.85266124]
New Q values:  [13280.18280996  2592.77958649   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02  1.58244994e+03 -3.22965309e-01  4.44542150e+02]
------
Step:5, Action:South
State  130
Old Q Values:  [33256.45600344  9468.29689401  -180.00807518 87178.77276124]
New Q values:  [33256.45600344  7770.77360059  -180.00807518 87178.77276124]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x..ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13280.18280996  2592.77958649   790.72804752  1050.85266124]
------
Step:6, Action:North
State  208
Old Q Values:  [17217.40127971  7019.81972406 -4584.50430574 -1713.91177491]
New Q values:  [33039.99234026  7019.81972406 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  14
xxxxx
x..ax
x.g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  7770.77360059  -180.00807518 87178.77276124]
------
Step:7, Action:West
State  136
Old Q Values:  [-7432.41476814  6598.31800435   660.86649319   221.49781494]
New Q values:  [-7432.41476814  6598.31800435   660.86649319 -5073.38471459]
Reward: -9991  Episode Reward:  -9977
xxxxx
x.g x
x.. x
x . x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  3274.51798356  3821.99370114]
------
Step:1, Action:West
State  276
Old Q Values:  [ 5473.51807038 -5807.06396197 24702.30720332  1564.23976583]
New Q values:  [ 5473.51807038 -5807.06396197 24702.30720332  4932.44089151]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4183.29492107 -5704.51612281 14337.81661728 -5679.36893145]
------
Step:2, Action:East
State  261
Old Q Values:  [ 3587.57590391    26.73544252 -3245.58754623   123.6214372 ]
New Q values:  [3587.57590391   26.73544252 -149.94533648  123.6214372 ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  3274.51798356  3821.99370114]
------
Step:3, Action:West
State  276
Old Q Values:  [ 5473.51807038 -5807.06396197 24702.30720332  4932.44089151]
New Q values:  [ 5473.51807038 -5807.06396197 24702.30720332  3048.64912778]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3587.57590391   26.73544252 -149.94533648  123.6214372 ]
------
Step:4, Action:North
State  260
Old Q Values:  [ 4183.29492107 -5704.51612281 14337.81661728 -5679.36893145]
New Q values:  [ 3415.44518288 -5704.51612281 14337.81661728 -5679.36893145]
Reward: 9  Episode Reward:  16
xxxxx
xg..x
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2840.23360603  4015.39186925  5789.09071482 -4966.32149798]
------
Step:5, Action:East
State  180
Old Q Values:  [-2840.23360603  4015.39186925  5789.09071482 -4966.32149798]
New Q values:  [-2840.23360603  4015.39186925  4511.12266518 -4966.32149798]
Reward: -1  Episode Reward:  15
xxxxx
x...x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.71684510e+03 7.05135032e+03 7.32028793e+03]
------
Step:6, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  1.58853157e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  1.03435811e+04  1.03161518e+03]
Reward: 9  Episode Reward:  24
xxxxx
x...x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13280.18280996  2592.77958649   790.72804752  1050.85266124]
------
Step:7, Action:North
State  208
Old Q Values:  [33039.99234026  7019.81972406 -4584.50430574 -1713.91177491]
New Q values:  [39375.02876447  7019.81972406 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  33
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  7770.77360059  -180.00807518 87178.77276124]
------
Step:8, Action:West
State  130
Old Q Values:  [33256.45600344  7770.77360059  -180.00807518 87178.77276124]
New Q values:  [33256.45600344  7770.77360059  -180.00807518 72600.49762637]
Reward: 9  Episode Reward:  42
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 125745.29507292]
------
Step:9, Action:West
State  126
Old Q Values:  [   0.          331.64678262 7128.26683804 1337.22554042]
New Q values:  [   0.          331.64678262 7128.26683804  646.18246453]
Reward: 9  Episode Reward:  51
xxxxx
xa  x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3902.35021231   352.97416123  -180.6       ]
------
Step:10, Action:East
State  108
Old Q Values:  [-8463.16477134  1968.68675445  1290.4787338      0.        ]
New Q values:  [-8463.16477134  1968.68675445  1703.17762825     0.        ]
Reward: -1  Episode Reward:  50
xxxxx
xga x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 3958.62044909 1772.94838375]
------
Step:11, Action:East
State  124
Old Q Values:  [   0.         1166.51141701 3958.62044909 1772.94838375]
New Q values:  [   0.         1166.51141701 3562.34358094 1772.94838375]
Reward: -1  Episode Reward:  49
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-7432.41476814  6598.31800435   660.86649319 -5073.38471459]
------
Step:12, Action:South
State  136
Old Q Values:  [-7432.41476814  6598.31800435   660.86649319 -5073.38471459]
New Q values:  [-7432.41476814  3413.98420547   660.86649319 -5073.38471459]
Reward: -1  Episode Reward:  48
xxxxx
x  gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  2584.19001243 -8220.10378799   911.8961227 ]
------
Step:13, Action:South
State  208
Old Q Values:  [39375.02876447  7019.81972406 -4584.50430574 -1713.91177491]
New Q values:  [39375.02876447 66702.77526824 -4584.50430574 -1713.91177491]
Reward: 100009  Episode Reward:  100057
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
xg..x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2840.23360603  4015.39186925  4511.12266518 -4966.32149798]
------
Step:1, Action:East
State  181
Old Q Values:  [ 1260.94591569  1279.45451029 -4097.89383915   262.76946019]
New Q values:  [1260.94591569 1279.45451029 1962.94546907  262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  9021.26565391 11989.01001575  3512.17261526]
------
Step:2, Action:East
State  201
Old Q Values:  [  613.33320563  4708.31908668 -3924.19801569   529.38906334]
New Q values:  [ 613.33320563 4708.31908668 -789.02220255  529.38906334]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  2584.19001243 -8220.10378799   911.8961227 ]
------
Step:3, Action:South
State  216
Old Q Values:  [ 2195.11823766  2584.19001243 -8220.10378799   911.8961227 ]
New Q values:  [ 2195.11823766  4928.52338359 -8220.10378799   911.8961227 ]
Reward: 9  Episode Reward:  27
xxxxx
x .gx
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12964.8245954  -6396.61506955 -1899.85199759 -3115.16604214]
------
Step:4, Action:North
State  288
Old Q Values:  [12964.8245954  -6396.61506955 -1899.85199759 -3115.16604214]
New Q values:  [ 6663.88685324 -6396.61506955 -1899.85199759 -3115.16604214]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2195.11823766  4928.52338359 -8220.10378799   911.8961227 ]
------
Step:5, Action:South
State  216
Old Q Values:  [ 2195.11823766  4928.52338359 -8220.10378799   911.8961227 ]
New Q values:  [ 2195.11823766  3969.97540941 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  25
xxxxx
x .gx
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6663.88685324 -6396.61506955 -1899.85199759 -3115.16604214]
------
Step:6, Action:North
State  288
Old Q Values:  [ 6663.88685324 -6396.61506955 -1899.85199759 -3115.16604214]
New Q values:  [16675.78732177 -6396.61506955 -1899.85199759 -3115.16604214]
Reward: -10001  Episode Reward:  -9976
xxxxx
x ..x
x  gx
x.. x
xxxxx
xxxxx
x..ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02  1.58244994e+03 -3.22965309e-01  4.44542150e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 4.56859410e+02  1.58244994e+03 -3.22965309e-01  4.44542150e+02]
New Q values:  [ 4.56859410e+02  2.06492126e+04 -3.22965309e-01  4.44542150e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xg.ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[39375.02876447 66702.77526824 -4584.50430574 -1713.91177491]
------
Step:2, Action:South
State  208
Old Q Values:  [39375.02876447 66702.77526824 -4584.50430574 -1713.91177491]
New Q values:  [39375.02876447 31689.24630383 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16675.78732177 -6396.61506955 -1899.85199759 -3115.16604214]
------
Step:3, Action:North
State  288
Old Q Values:  [16675.78732177 -6396.61506955 -1899.85199759 -3115.16604214]
New Q values:  [18482.22355805 -6396.61506955 -1899.85199759 -3115.16604214]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[39375.02876447 31689.24630383 -4584.50430574 -1713.91177491]
------
Step:4, Action:North
State  208
Old Q Values:  [39375.02876447 31689.24630383 -4584.50430574 -1713.91177491]
New Q values:  [10773.60676743 31689.24630383 -4584.50430574 -1713.91177491]
Reward: -10001  Episode Reward:  -9984
xxxxx
x..gx
x.. x
x . x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  1.03435811e+04  1.03161518e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.71684510e+03 7.05135032e+03 7.32028793e+03]
New Q values:  [3.89777037e-01 6.71684510e+03 1.23327140e+04 7.32028793e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10773.60676743 31689.24630383 -4584.50430574 -1713.91177491]
------
Step:2, Action:South
State  208
Old Q Values:  [10773.60676743 31689.24630383 -4584.50430574 -1713.91177491]
New Q values:  [10773.60676743 18225.76558895 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x.  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18482.22355805 -6396.61506955 -1899.85199759 -3115.16604214]
------
Step:3, Action:North
State  288
Old Q Values:  [18482.22355805 -6396.61506955 -1899.85199759 -3115.16604214]
New Q values:  [12860.0190999  -6396.61506955 -1899.85199759 -3115.16604214]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x. ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10773.60676743 18225.76558895 -4584.50430574 -1713.91177491]
------
Step:4, Action:South
State  208
Old Q Values:  [10773.60676743 18225.76558895 -4584.50430574 -1713.91177491]
New Q values:  [10773.60676743 11147.71196555 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  16
xxxxx
xg..x
x.  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12860.0190999  -6396.61506955 -1899.85199759 -3115.16604214]
------
Step:5, Action:North
State  288
Old Q Values:  [12860.0190999  -6396.61506955 -1899.85199759 -3115.16604214]
New Q values:  [ 8487.72122963 -6396.61506955 -1899.85199759 -3115.16604214]
Reward: -1  Episode Reward:  15
xxxxx
x.g.x
x. ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10773.60676743 11147.71196555 -4584.50430574 -1713.91177491]
------
Step:6, Action:South
State  208
Old Q Values:  [10773.60676743 11147.71196555 -4584.50430574 -1713.91177491]
New Q values:  [10773.60676743  7004.80115511 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  14
xxxxx
x..gx
x.  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8487.72122963 -6396.61506955 -1899.85199759 -3115.16604214]
------
Step:7, Action:North
State  288
Old Q Values:  [ 8487.72122963 -6396.61506955 -1899.85199759 -3115.16604214]
New Q values:  [ 6626.57052208 -6396.61506955 -1899.85199759 -3115.16604214]
Reward: -1  Episode Reward:  13
xxxxx
x.g.x
x. ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10773.60676743  7004.80115511 -4584.50430574 -1713.91177491]
------
Step:8, Action:North
State  208
Old Q Values:  [10773.60676743  7004.80115511 -4584.50430574 -1713.91177491]
New Q values:  [19136.87251368  7004.80115511 -4584.50430574 -1713.91177491]
Reward: -9991  Episode Reward:  -9978
xxxxx
x..gx
x.  x
x . x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[4432.05929391 -168.92307549 -643.03624035 5133.42555321]
------
Step:1, Action:West
State  273
Old Q Values:  [4432.05929391 -168.92307549 -643.03624035 5133.42555321]
New Q values:  [4432.05929391 -168.92307549 -643.03624035 3135.04299246]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3587.57590391   26.73544252 -149.94533648  123.6214372 ]
------
Step:2, Action:North
State  261
Old Q Values:  [3587.57590391   26.73544252 -149.94533648  123.6214372 ]
New Q values:  [2029.31400228   26.73544252 -149.94533648  123.6214372 ]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1260.94591569 1279.45451029 1962.94546907  262.76946019]
------
Step:3, Action:South
State  180
Old Q Values:  [-2840.23360603  4015.39186925  4511.12266518 -4966.32149798]
New Q values:  [-2840.23360603  5906.90173288  4511.12266518 -4966.32149798]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3415.44518288 -5704.51612281 14337.81661728 -5679.36893145]
------
Step:4, Action:East
State  260
Old Q Values:  [ 3415.44518288 -5704.51612281 14337.81661728 -5679.36893145]
New Q values:  [ 3415.44518288 -5704.51612281  6883.41632892 -5679.36893145]
Reward: -1  Episode Reward:  16
xxxxx
xg. x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3829.63227336 -8521.23367799  3274.51798356  3821.99370114]
------
Step:5, Action:North
State  272
Old Q Values:  [ 3829.63227336 -8521.23367799  3274.51798356  3821.99370114]
New Q values:  [ 5237.06711471 -8521.23367799  3274.51798356  3821.99370114]
Reward: 9  Episode Reward:  25
xxxxx
x.. x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.71684510e+03 1.23327140e+04 7.32028793e+03]
------
Step:6, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  1.03435811e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  8.12688730e+03  1.03161518e+03]
Reward: 9  Episode Reward:  34
xxxxx
x.. x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13280.18280996  2592.77958649   790.72804752  1050.85266124]
------
Step:7, Action:North
State  208
Old Q Values:  [19136.87251368  7004.80115511 -4584.50430574 -1713.91177491]
New Q values:  [29434.29829338  7004.80115511 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  33
xxxxx
x..ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  7770.77360059  -180.00807518 72600.49762637]
------
Step:8, Action:West
State  136
Old Q Values:  [-7432.41476814  3413.98420547   660.86649319 -5073.38471459]
New Q values:  [-7432.41476814  3413.98420547   660.86649319  -955.25081155]
Reward: 9  Episode Reward:  42
xxxxx
xga x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[   0.         1166.51141701 3562.34358094 1772.94838375]
------
Step:9, Action:East
State  112
Old Q Values:  [     0.           3629.92591876  33495.48974052 110949.76306292]
New Q values:  [     0.           3629.92591876  34219.62570292 110949.76306292]
Reward: -1  Episode Reward:  41
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[27558.53877122  8286.98116673 -8652.84       69406.76602238]
------
Step:10, Action:North
State  136
Old Q Values:  [-7432.41476814  3413.98420547   660.86649319  -955.25081155]
New Q values:  [-2129.37064562  3413.98420547   660.86649319  -955.25081155]
Reward: -301  Episode Reward:  -260
xxxxx
xg ax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  3413.98420547   660.86649319  -955.25081155]
------
Step:11, Action:South
State  128
Old Q Values:  [27558.53877122  8286.98116673 -8652.84       69406.76602238]
New Q values:  [27558.53877122 12144.48195471 -8652.84       69406.76602238]
Reward: -1  Episode Reward:  -261
xxxxx
x.g x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[29434.29829338  7004.80115511 -4584.50430574 -1713.91177491]
------
Step:12, Action:North
State  208
Old Q Values:  [29434.29829338  7004.80115511 -4584.50430574 -1713.91177491]
New Q values:  [26595.14912407  7004.80115511 -4584.50430574 -1713.91177491]
Reward: -10001  Episode Reward:  -10262
xxxxx
x. gx
x   x
x  .x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   650.02729584  1387.5422419 ]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   650.02729584  1387.5422419 ]
New Q values:  [ -281.736      -1150.91067548   650.02729584   666.30914513]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3902.35021231   352.97416123  -180.6       ]
------
Step:2, Action:East
State  108
Old Q Values:  [-8463.16477134  1968.68675445  1703.17762825     0.        ]
New Q values:  [-8463.16477134  1968.68675445  1513.28721073     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xga.x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2775.38719811   -98.79429602]
------
Step:3, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2775.38719811   -98.79429602]
New Q values:  [-9594.56523706 -8069.05606225  2139.75014088   -98.79429602]
Reward: 9  Episode Reward:  17
xxxxx
x gax
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  3413.98420547   660.86649319  -955.25081155]
------
Step:4, Action:South
State  136
Old Q Values:  [-2129.37064562  3413.98420547   660.86649319  -955.25081155]
New Q values:  [-2129.37064562  9349.53841941   660.86649319  -955.25081155]
Reward: 9  Episode Reward:  26
xxxxx
x  gx
x..ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26595.14912407  7004.80115511 -4584.50430574 -1713.91177491]
------
Step:5, Action:South
State  208
Old Q Values:  [26595.14912407  7004.80115511 -4584.50430574 -1713.91177491]
New Q values:  [26595.14912407  4795.29161867 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  35
xxxxx
x g x
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6626.57052208 -6396.61506955 -1899.85199759 -3115.16604214]
------
Step:6, Action:North
State  288
Old Q Values:  [ 6626.57052208 -6396.61506955 -1899.85199759 -3115.16604214]
New Q values:  [10628.57294605 -6396.61506955 -1899.85199759 -3115.16604214]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x.gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26595.14912407  4795.29161867 -4584.50430574 -1713.91177491]
------
Step:7, Action:North
State  208
Old Q Values:  [26595.14912407  4795.29161867 -4584.50430574 -1713.91177491]
New Q values:  [13442.32117545  4795.29161867 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  33
xxxxx
x gax
x.. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  9349.53841941   660.86649319  -955.25081155]
------
Step:8, Action:South
State  136
Old Q Values:  [-2129.37064562  9349.53841941   660.86649319  -955.25081155]
New Q values:  [-2129.37064562  7771.9117204    660.86649319  -955.25081155]
Reward: -1  Episode Reward:  32
xxxxx
xg  x
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13442.32117545  4795.29161867 -4584.50430574 -1713.91177491]
------
Step:9, Action:North
State  208
Old Q Values:  [13442.32117545  4795.29161867 -4584.50430574 -1713.91177491]
New Q values:  [ 7707.9019863   4795.29161867 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  31
xxxxx
x gax
x.. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  7771.9117204    660.86649319  -955.25081155]
------
Step:10, Action:South
State  136
Old Q Values:  [-2129.37064562  7771.9117204    660.86649319  -955.25081155]
New Q values:  [-2129.37064562  5420.53528405   660.86649319  -955.25081155]
Reward: -1  Episode Reward:  30
xxxxx
xg  x
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7707.9019863   4795.29161867 -4584.50430574 -1713.91177491]
------
Step:11, Action:North
State  208
Old Q Values:  [ 7707.9019863   4795.29161867 -4584.50430574 -1713.91177491]
New Q values:  [ 4708.72137973  4795.29161867 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  29
xxxxx
x gax
x.. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  5420.53528405   660.86649319  -955.25081155]
------
Step:12, Action:South
State  136
Old Q Values:  [-2129.37064562  5420.53528405   660.86649319  -955.25081155]
New Q values:  [-2129.37064562  3606.20159922   660.86649319  -955.25081155]
Reward: -1  Episode Reward:  28
xxxxx
x  gx
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4708.72137973  4795.29161867 -4584.50430574 -1713.91177491]
------
Step:13, Action:South
State  208
Old Q Values:  [ 4708.72137973  4795.29161867 -4584.50430574 -1713.91177491]
New Q values:  [ 4708.72137973  5106.08853128 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  27
xxxxx
x   x
x..gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10628.57294605 -6396.61506955 -1899.85199759 -3115.16604214]
------
Step:14, Action:East
State  288
Old Q Values:  [10628.57294605 -6396.61506955 -1899.85199759 -3115.16604214]
New Q values:  [10628.57294605 -6396.61506955  2248.03108478 -3115.16604214]
Reward: -301  Episode Reward:  -274
xxxxx
x   x
x.g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10628.57294605 -6396.61506955  2248.03108478 -3115.16604214]
------
Step:15, Action:North
State  288
Old Q Values:  [10628.57294605 -6396.61506955  2248.03108478 -3115.16604214]
New Q values:  [ 5782.65573781 -6396.61506955  2248.03108478 -3115.16604214]
Reward: -1  Episode Reward:  -275
xxxxx
x   x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4708.72137973  5106.08853128 -4584.50430574 -1713.91177491]
------
Step:16, Action:South
State  208
Old Q Values:  [ 4708.72137973  5106.08853128 -4584.50430574 -1713.91177491]
New Q values:  [ 4708.72137973  3776.63213385 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -276
xxxxx
x   x
x.g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5782.65573781 -6396.61506955  2248.03108478 -3115.16604214]
------
Step:17, Action:North
State  288
Old Q Values:  [ 5782.65573781 -6396.61506955  2248.03108478 -3115.16604214]
New Q values:  [ 3725.07870904 -6396.61506955  2248.03108478 -3115.16604214]
Reward: -1  Episode Reward:  -277
xxxxx
x   x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4708.72137973  3776.63213385 -4584.50430574 -1713.91177491]
------
Step:18, Action:North
State  210
Old Q Values:  [13280.18280996  2592.77958649   790.72804752  1050.85266124]
New Q values:  [11506.23689144  2592.77958649   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  -278
xxxxx
x  ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02  2.06492126e+04 -3.22965309e-01  4.44542150e+02]
------
Step:19, Action:South
State  138
Old Q Values:  [ 4.56859410e+02  2.06492126e+04 -3.22965309e-01  4.44542150e+02]
New Q values:  [ 4.56859410e+02  9.67170144e+03 -3.22965309e-01  4.44542150e+02]
Reward: -1  Episode Reward:  -279
xxxxx
x   x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4708.72137973  3776.63213385 -4584.50430574 -1713.91177491]
------
Step:20, Action:North
State  208
Old Q Values:  [ 4708.72137973  3776.63213385 -4584.50430574 -1713.91177491]
New Q values:  [ 4784.39898305  3776.63213385 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -280
xxxxx
x  ax
x.g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02  9.67170144e+03 -3.22965309e-01  4.44542150e+02]
------
Step:21, Action:South
State  138
Old Q Values:  [ 4.56859410e+02  9.67170144e+03 -3.22965309e-01  4.44542150e+02]
New Q values:  [ 4.56859410e+02 -6.96599730e+02 -3.22965309e-01  4.44542150e+02]
Reward: -10001  Episode Reward:  -10281
xxxxx
x   x
x..gx
x . x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   366.82262216]
------
Step:1, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2043.33262438   868.63184067]
New Q values:  [ -253.44886264 -1902.20915811  2043.33262438  1298.18990105]
Reward: 9  Episode Reward:  9
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3151.12388259  751.03868094 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 3151.12388259  751.03868094 -120.29354603]
New Q values:  [-177.44732869 2958.39874307  751.03868094 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x   x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2174.36956339 5245.81661103 5641.83063345 1554.80203889]
------
Step:3, Action:East
State  189
Old Q Values:  [ 275.08817949 3059.00845116 8879.56680328  154.04646645]
New Q values:  [ 275.08817949 3059.00845116 4969.72244732  154.04646645]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 4708.31908668 -789.02220255  529.38906334]
------
Step:4, Action:South
State  196
Old Q Values:  [-2469.90645144  9021.26565391 11989.01001575  3512.17261526]
New Q values:  [-2469.90645144 11024.59842256 11989.01001575  3512.17261526]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 5473.51807038 -5807.06396197 24702.30720332  3048.64912778]
------
Step:5, Action:East
State  273
Old Q Values:  [4432.05929391 -168.92307549 -643.03624035 3135.04299246]
New Q values:  [4432.05929391 -168.92307549  865.70911657 3135.04299246]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3725.07870904 -6396.61506955  2248.03108478 -3115.16604214]
------
Step:6, Action:East
State  288
Old Q Values:  [ 3725.07870904 -6396.61506955  2248.03108478 -3115.16604214]
New Q values:  [ 3725.07870904 -6396.61506955  1836.13604662 -3115.16604214]
Reward: -301  Episode Reward:  -256
xxxxx
x  gx
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3725.07870904 -6396.61506955  1836.13604662 -3115.16604214]
------
Step:7, Action:North
State  288
Old Q Values:  [ 3725.07870904 -6396.61506955  1836.13604662 -3115.16604214]
New Q values:  [-3313.57589356 -6396.61506955  1836.13604662 -3115.16604214]
Reward: -9991  Episode Reward:  -10247
xxxxx
x   x
x  gx
x.  x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1260.94591569 1279.45451029 1962.94546907  262.76946019]
------
Step:1, Action:South
State  181
Old Q Values:  [1260.94591569 1279.45451029 1962.94546907  262.76946019]
New Q values:  [1260.94591569 1125.9760048  1962.94546907  262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2029.31400228   26.73544252 -149.94533648  123.6214372 ]
------
Step:2, Action:North
State  260
Old Q Values:  [ 3415.44518288 -5704.51612281  6883.41632892 -5679.36893145]
New Q values:  [ 3137.64859301 -5704.51612281  6883.41632892 -5679.36893145]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2840.23360603  5906.90173288  4511.12266518 -4966.32149798]
------
Step:3, Action:South
State  181
Old Q Values:  [1260.94591569 1125.9760048  1962.94546907  262.76946019]
New Q values:  [1260.94591569 1058.58460261 1962.94546907  262.76946019]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2029.31400228   26.73544252 -149.94533648  123.6214372 ]
------
Step:4, Action:North
State  261
Old Q Values:  [2029.31400228   26.73544252 -149.94533648  123.6214372 ]
New Q values:  [1400.00924163   26.73544252 -149.94533648  123.6214372 ]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1260.94591569 1058.58460261 1962.94546907  262.76946019]
------
Step:5, Action:North
State  181
Old Q Values:  [1260.94591569 1058.58460261 1962.94546907  262.76946019]
New Q values:  [ 854.4436561  1058.58460261 1962.94546907  262.76946019]
Reward: 9  Episode Reward:  15
xxxxx
xag.x
x  .x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1148.88429941 -2165.66138672  -180.6       ]
------
Step:6, Action:South
State  109
Old Q Values:  [ -241.10880094  1148.88429941 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1047.83736048 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  14
xxxxx
x .gx
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 854.4436561  1058.58460261 1962.94546907  262.76946019]
------
Step:7, Action:East
State  181
Old Q Values:  [ 854.4436561  1058.58460261 1962.94546907  262.76946019]
New Q values:  [ 854.4436561  1058.58460261 4381.28119235  262.76946019]
Reward: -1  Episode Reward:  13
xxxxx
x g.x
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144 11024.59842256 11989.01001575  3512.17261526]
------
Step:8, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.71684510e+03 1.23327140e+04 7.32028793e+03]
New Q values:  [3.89777037e-01 6.71684510e+03 6.37380530e+03 7.32028793e+03]
Reward: 9  Episode Reward:  22
xxxxx
xg..x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4784.39898305  3776.63213385 -4584.50430574 -1713.91177491]
------
Step:9, Action:North
State  216
Old Q Values:  [ 2195.11823766  3969.97540941 -8220.10378799   911.8961227 ]
New Q values:  [ 1020.50511808  3969.97540941 -8220.10378799   911.8961227 ]
Reward: 9  Episode Reward:  31
xxxxx
x .ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.56859410e+02 -6.96599730e+02 -3.22965309e-01  4.44542150e+02]
------
Step:10, Action:North
State  138
Old Q Values:  [ 4.56859410e+02 -6.96599730e+02 -3.22965309e-01  4.44542150e+02]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  4.44542150e+02]
Reward: -301  Episode Reward:  -270
xxxxx
x .ax
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  4.44542150e+02]
------
Step:11, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  4.44542150e+02]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  2.32169691e+03]
Reward: 9  Episode Reward:  -261
xxxxx
x a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 7128.26683804  646.18246453]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   650.02729584   666.30914513]
New Q values:  [ -281.736      -1150.91067548   955.91999179   666.30914513]
Reward: -1  Episode Reward:  -262
xxxxx
x  ax
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  2.32169691e+03]
------
Step:13, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  2.32169691e+03]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  3.06655882e+03]
Reward: -1  Episode Reward:  -263
xxxxx
x a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 7128.26683804  646.18246453]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   955.91999179   666.30914513]
New Q values:  [ -281.736      -1150.91067548  1301.73564152   666.30914513]
Reward: -1  Episode Reward:  -264
xxxxx
x  ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  3.06655882e+03]
------
Step:15, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  3.06655882e+03]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.61654422e+03]
Reward: -1  Episode Reward:  -265
xxxxx
x a x
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1301.73564152   666.30914513]
------
Step:16, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1301.73564152   666.30914513]
New Q values:  [ -281.736      -1150.91067548  1005.05752227   666.30914513]
Reward: -1  Episode Reward:  -266
xxxxx
x  ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.61654422e+03]
------
Step:17, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.61654422e+03]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  2.78449774e+03]
Reward: -1  Episode Reward:  -267
xxxxx
x a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 7128.26683804  646.18246453]
------
Step:18, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2043.33262438  1298.18990105]
New Q values:  [ -253.44886264 -1902.20915811  1652.08237144  1298.18990105]
Reward: -1  Episode Reward:  -268
xxxxx
x  ax
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  2.78449774e+03]
------
Step:19, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  2.78449774e+03]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.60882381e+03]
Reward: -1  Episode Reward:  -269
xxxxx
x a x
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1652.08237144  1298.18990105]
------
Step:20, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1005.05752227   666.30914513]
New Q values:  [ -281.736      -1150.91067548   884.07015101   666.30914513]
Reward: -1  Episode Reward:  -270
xxxxx
x  ax
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.60882381e+03]
------
Step:21, Action:West
State  136
Old Q Values:  [-2129.37064562  3606.20159922   660.86649319  -955.25081155]
New Q values:  [-2129.37064562  3606.20159922   660.86649319 -5740.77528236]
Reward: -10001  Episode Reward:  -10271
xxxxx
x g x
x   x
x ..x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  8.12688730e+03  1.03161518e+03]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.71684510e+03 6.37380530e+03 7.32028793e+03]
New Q values:  [3.89777037e-01 6.71684510e+03 3.99024182e+03 7.32028793e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4784.39898305  3776.63213385 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  208
Old Q Values:  [ 4784.39898305  3776.63213385 -4584.50430574 -1713.91177491]
New Q values:  [ 2401.80673533  3776.63213385 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.60882381e+03]
------
Step:3, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.60882381e+03]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  3.83725180e+04]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 125745.29507292]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   884.07015101   666.30914513]
New Q values:  [ -281.736      -1150.91067548   884.07015101   377.81590642]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -3902.35021231   352.97416123  -180.6       ]
------
Step:5, Action:East
State  110
Old Q Values:  [ -239.29051573 -3902.35021231   352.97416123  -180.6       ]
New Q values:  [ -239.29051573 -3902.35021231   405.81070979  -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x a x
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   884.07015101   377.81590642]
------
Step:6, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2139.75014088   -98.79429602]
New Q values:  [-9594.56523706 -8069.05606225  1937.16053612   -98.79429602]
Reward: -1  Episode Reward:  34
xxxxx
xg ax
x.  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  3606.20159922   660.86649319 -5740.77528236]
------
Step:7, Action:South
State  136
Old Q Values:  [-2129.37064562  3606.20159922   660.86649319 -5740.77528236]
New Q values:  [-2129.37064562  2632.87326251   660.86649319 -5740.77528236]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x. ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1020.50511808  3969.97540941 -8220.10378799   911.8961227 ]
------
Step:8, Action:South
State  208
Old Q Values:  [ 2401.80673533  3776.63213385 -4584.50430574 -1713.91177491]
New Q values:  [ 2401.80673533  2066.89366753 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x.g x
x .ax
xxxxx
Step:9, Action:West
State  288
Old Q Values:  [-3313.57589356 -6396.61506955  1836.13604662 -3115.16604214]
New Q values:  [-3313.57589356 -6396.61506955  1836.13604662    88.95137132]
Reward: 9  Episode Reward:  51
xxxxx
x   x
x. gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[4432.05929391 -168.92307549  865.70911657 3135.04299246]
------
Step:10, Action:North
State  273
Old Q Values:  [4432.05929391 -168.92307549  865.70911657 3135.04299246]
New Q values:  [4487.51584222 -168.92307549  865.70911657 3135.04299246]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x.a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 4885.27337691 9050.97374885 1169.39963074]
------
Step:11, Action:East
State  193
Old Q Values:  [-5922.26708831 13246.97719412  9681.98001768  1460.9765133 ]
New Q values:  [-5922.26708831 13246.97719412 -1407.26597233  1460.9765133 ]
Reward: -10001  Episode Reward:  -9951
xxxxx
x   x
x. gx
x   x
xxxxx
Episode # 1200
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3313.57589356 -6396.61506955  1836.13604662    88.95137132]
------
Step:1, Action:East
State  288
Old Q Values:  [-3313.57589356 -6396.61506955  1836.13604662    88.95137132]
New Q values:  [-3313.57589356 -6396.61506955  1104.69523264    88.95137132]
Reward: -301  Episode Reward:  -301
xxxxx
xg..x
x ..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3313.57589356 -6396.61506955  1104.69523264    88.95137132]
------
Step:2, Action:East
State  288
Old Q Values:  [-3313.57589356 -6396.61506955  1104.69523264    88.95137132]
New Q values:  [-3313.57589356 -6396.61506955   592.68666285    88.95137132]
Reward: -301  Episode Reward:  -602
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3313.57589356 -6396.61506955   592.68666285    88.95137132]
------
Step:3, Action:East
State  288
Old Q Values:  [-3313.57589356 -6396.61506955   592.68666285    88.95137132]
New Q values:  [-3313.57589356 -6396.61506955   234.28066399    88.95137132]
Reward: -301  Episode Reward:  -903
xxxxx
xg..x
x ..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3313.57589356 -6396.61506955   234.28066399    88.95137132]
------
Step:4, Action:East
State  288
Old Q Values:  [-3313.57589356 -6396.61506955   234.28066399    88.95137132]
New Q values:  [-3313.57589356 -6396.61506955   -16.60353521    88.95137132]
Reward: -301  Episode Reward:  -1204
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3313.57589356 -6396.61506955   -16.60353521    88.95137132]
------
Step:5, Action:West
State  288
Old Q Values:  [-3313.57589356 -6396.61506955   -16.60353521    88.95137132]
New Q values:  [-3313.57589356 -6396.61506955   -16.60353521  1612.10068294]
Reward: 9  Episode Reward:  -1195
xxxxx
x...x
x g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5237.06711471 -8521.23367799  3274.51798356  3821.99370114]
------
Step:6, Action:West
State  272
Old Q Values:  [ 5237.06711471 -8521.23367799  3274.51798356  3821.99370114]
New Q values:  [ 5237.06711471 -8521.23367799  3274.51798356  1954.20025295]
Reward: 9  Episode Reward:  -1186
xxxxx
x...x
x ..x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1400.00924163   26.73544252 -149.94533648  123.6214372 ]
------
Step:7, Action:North
State  261
Old Q Values:  [1400.00924163   26.73544252 -149.94533648  123.6214372 ]
New Q values:  [1873.78805436   26.73544252 -149.94533648  123.6214372 ]
Reward: -1  Episode Reward:  -1187
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 854.4436561  1058.58460261 4381.28119235  262.76946019]
------
Step:8, Action:South
State  181
Old Q Values:  [ 854.4436561  1058.58460261 4381.28119235  262.76946019]
New Q values:  [ 854.4436561   984.97025735 4381.28119235  262.76946019]
Reward: -1  Episode Reward:  -1188
xxxxx
x.g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1873.78805436   26.73544252 -149.94533648  123.6214372 ]
------
Step:9, Action:North
State  261
Old Q Values:  [1873.78805436   26.73544252 -149.94533648  123.6214372 ]
New Q values:  [2063.29957945   26.73544252 -149.94533648  123.6214372 ]
Reward: -1  Episode Reward:  -1189
xxxxx
x..gx
xa..x
x   x
xxxxx
Step:10, Action:South
State  181
Old Q Values:  [ 854.4436561   984.97025735 4381.28119235  262.76946019]
New Q values:  [ 854.4436561  1012.37797677 4381.28119235  262.76946019]
Reward: -1  Episode Reward:  -1190
xxxxx
x..gx
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2063.29957945   26.73544252 -149.94533648  123.6214372 ]
------
Step:11, Action:North
State  261
Old Q Values:  [2063.29957945   26.73544252 -149.94533648  123.6214372 ]
New Q values:  [2139.10418949   26.73544252 -149.94533648  123.6214372 ]
Reward: -1  Episode Reward:  -1191
xxxxx
x...x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 854.4436561  1012.37797677 4381.28119235  262.76946019]
------
Step:12, Action:East
State  181
Old Q Values:  [ 854.4436561  1012.37797677 4381.28119235  262.76946019]
New Q values:  [ 854.4436561  1012.37797677 5732.00563518  262.76946019]
Reward: 9  Episode Reward:  -1182
xxxxx
x..gx
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 13246.97719412 -1407.26597233  1460.9765133 ]
------
Step:13, Action:South
State  193
Old Q Values:  [-5922.26708831 13246.97719412 -1407.26597233  1460.9765133 ]
New Q values:  [-5922.26708831  6644.44563031 -1407.26597233  1460.9765133 ]
Reward: -1  Episode Reward:  -1183
xxxxx
x...x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[4487.51584222 -168.92307549  865.70911657 3135.04299246]
------
Step:14, Action:North
State  273
Old Q Values:  [4487.51584222 -168.92307549  865.70911657 3135.04299246]
New Q values:  [4509.69846154 -168.92307549  865.70911657 3135.04299246]
Reward: -1  Episode Reward:  -1184
xxxxx
x...x
x a.x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 4885.27337691 9050.97374885 1169.39963074]
------
Step:15, Action:East
State  193
Old Q Values:  [-5922.26708831  6644.44563031 -1407.26597233  1460.9765133 ]
New Q values:  [-5922.26708831  6644.44563031 -5836.96436833  1460.9765133 ]
Reward: -9991  Episode Reward:  -11175
xxxxx
x...x
x  gx
x   x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.71684510e+03 3.99024182e+03 7.32028793e+03]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.71684510e+03 3.99024182e+03 7.32028793e+03]
New Q values:  [3.89777037e-01 4.26325818e+03 3.99024182e+03 7.32028793e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5237.06711471 -8521.23367799  3274.51798356  1954.20025295]
------
Step:2, Action:East
State  273
Old Q Values:  [4509.69846154 -168.92307549  865.70911657 3135.04299246]
New Q values:  [4509.69846154 -168.92307549  835.31385151 3135.04299246]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3313.57589356 -6396.61506955   -16.60353521  1612.10068294]
------
Step:3, Action:West
State  288
Old Q Values:  [-3313.57589356 -6396.61506955   -16.60353521  1612.10068294]
New Q values:  [-3313.57589356 -6396.61506955   -16.60353521  1997.14981164]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[4509.69846154 -168.92307549  835.31385151 3135.04299246]
------
Step:4, Action:North
State  273
Old Q Values:  [4509.69846154 -168.92307549  835.31385151 3135.04299246]
New Q values:  [3796.61307371 -168.92307549  835.31385151 3135.04299246]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  6644.44563031 -5836.96436833  1460.9765133 ]
------
Step:5, Action:South
State  193
Old Q Values:  [-5922.26708831  6644.44563031 -5836.96436833  1460.9765133 ]
New Q values:  [-5922.26708831  3796.16217424 -5836.96436833  1460.9765133 ]
Reward: -1  Episode Reward:  15
xxxxx
x..gx
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3796.61307371 -168.92307549  835.31385151 3135.04299246]
------
Step:6, Action:North
State  273
Old Q Values:  [3796.61307371 -168.92307549  835.31385151 3135.04299246]
New Q values:  [2656.89388176 -168.92307549  835.31385151 3135.04299246]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3796.16217424 -5836.96436833  1460.9765133 ]
------
Step:7, Action:South
State  193
Old Q Values:  [-5922.26708831  3796.16217424 -5836.96436833  1460.9765133 ]
New Q values:  [-5922.26708831  2458.37776743 -5836.96436833  1460.9765133 ]
Reward: -1  Episode Reward:  13
xxxxx
x..gx
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2656.89388176 -168.92307549  835.31385151 3135.04299246]
------
Step:8, Action:West
State  276
Old Q Values:  [ 5473.51807038 -5807.06396197 24702.30720332  3048.64912778]
New Q values:  [ 5473.51807038 -5807.06396197 24702.30720332  1866.59090796]
Reward: 9  Episode Reward:  22
xxxxx
x.g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2139.10418949   26.73544252 -149.94533648  123.6214372 ]
------
Step:9, Action:North
State  261
Old Q Values:  [2139.10418949   26.73544252 -149.94533648  123.6214372 ]
New Q values:  [2574.64336635   26.73544252 -149.94533648  123.6214372 ]
Reward: -1  Episode Reward:  21
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 854.4436561  1012.37797677 5732.00563518  262.76946019]
------
Step:10, Action:South
State  181
Old Q Values:  [ 854.4436561  1012.37797677 5732.00563518  262.76946019]
New Q values:  [ 854.4436561  1176.74420061 5732.00563518  262.76946019]
Reward: -1  Episode Reward:  20
xxxxx
x.g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2574.64336635   26.73544252 -149.94533648  123.6214372 ]
------
Step:11, Action:North
State  261
Old Q Values:  [2574.64336635   26.73544252 -149.94533648  123.6214372 ]
New Q values:  [2748.85903709   26.73544252 -149.94533648  123.6214372 ]
Reward: -1  Episode Reward:  19
xxxxx
x..gx
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 854.4436561  1176.74420061 5732.00563518  262.76946019]
------
Step:12, Action:East
State  181
Old Q Values:  [ 854.4436561  1176.74420061 5732.00563518  262.76946019]
New Q values:  [ 854.4436561  1176.74420061 3029.7155843   262.76946019]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2458.37776743 -5836.96436833  1460.9765133 ]
------
Step:13, Action:South
State  192
Old Q Values:  [3.89777037e-01 4.26325818e+03 3.99024182e+03 7.32028793e+03]
New Q values:  [3.89777037e-01 3.27582340e+03 3.99024182e+03 7.32028793e+03]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5237.06711471 -8521.23367799  3274.51798356  1954.20025295]
------
Step:14, Action:East
State  272
Old Q Values:  [ 5237.06711471 -8521.23367799  3274.51798356  1954.20025295]
New Q values:  [ 5237.06711471 -8521.23367799  1908.35213691  1954.20025295]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3313.57589356 -6396.61506955   -16.60353521  1997.14981164]
------
Step:15, Action:East
State  288
Old Q Values:  [-3313.57589356 -6396.61506955   -16.60353521  1997.14981164]
New Q values:  [-3313.57589356 -6396.61506955 -5588.09647059  1997.14981164]
Reward: -10301  Episode Reward:  -10285
xxxxx
x...x
x  .x
x  gx
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2401.80673533  2066.89366753 -4584.50430574 -1713.91177491]
------
Step:1, Action:North
State  208
Old Q Values:  [ 2401.80673533  2066.89366753 -4584.50430574 -1713.91177491]
New Q values:  [-4244.01532712  2066.89366753 -4584.50430574 -1713.91177491]
Reward: -9991  Episode Reward:  -9991
xxxxx
x. gx
x.. x
x...x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.27582340e+03 3.99024182e+03 7.32028793e+03]
------
Step:1, Action:West
State  196
Old Q Values:  [-2469.90645144 11024.59842256 11989.01001575  3512.17261526]
New Q values:  [-2469.90645144 11024.59842256 11989.01001575 -2817.66043403]
Reward: -9991  Episode Reward:  -9991
xxxxx
x ..x
xg .x
x...x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3313.57589356 -6396.61506955 -5588.09647059  1997.14981164]
------
Step:1, Action:West
State  288
Old Q Values:  [-3313.57589356 -6396.61506955 -5588.09647059  1997.14981164]
New Q values:  [-3313.57589356 -6396.61506955 -5588.09647059  2375.38005907]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5237.06711471 -8521.23367799  1908.35213691  1954.20025295]
------
Step:2, Action:North
State  272
Old Q Values:  [ 5237.06711471 -8521.23367799  1908.35213691  1954.20025295]
New Q values:  [ 5696.92985061 -8521.23367799  1908.35213691  1954.20025295]
Reward: 9  Episode Reward:  18
xxxxx
x g.x
x.a.x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144 11024.59842256 11989.01001575 -2817.66043403]
------
Step:3, Action:East
State  193
Old Q Values:  [-5922.26708831  2458.37776743 -5836.96436833  1460.9765133 ]
New Q values:  [-5922.26708831  2458.37776743 -1709.31764708  1460.9765133 ]
Reward: 9  Episode Reward:  27
xxxxx
x .gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4244.01532712  2066.89366753 -4584.50430574 -1713.91177491]
------
Step:4, Action:South
State  208
Old Q Values:  [-4244.01532712  2066.89366753 -4584.50430574 -1713.91177491]
New Q values:  [-4244.01532712  1538.77148473 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3313.57589356 -6396.61506955 -5588.09647059  2375.38005907]
------
Step:5, Action:West
State  288
Old Q Values:  [-3313.57589356 -6396.61506955 -5588.09647059  2375.38005907]
New Q values:  [-3313.57589356 -6396.61506955 -5588.09647059  2658.63097881]
Reward: -1  Episode Reward:  25
xxxxx
xg..x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5696.92985061 -8521.23367799  1908.35213691  1954.20025295]
------
Step:6, Action:North
State  272
Old Q Values:  [ 5696.92985061 -8521.23367799  1908.35213691  1954.20025295]
New Q values:  [ 4474.2583195  -8521.23367799  1908.35213691  1954.20025295]
Reward: -1  Episode Reward:  24
xxxxx
x g.x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.27582340e+03 3.99024182e+03 7.32028793e+03]
------
Step:7, Action:West
State  192
Old Q Values:  [3.89777037e-01 3.27582340e+03 3.99024182e+03 7.32028793e+03]
New Q values:  [3.89777037e-01 3.27582340e+03 3.99024182e+03 4.70558569e+03]
Reward: 9  Episode Reward:  33
xxxxx
xg..x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2840.23360603  5906.90173288  4511.12266518 -4966.32149798]
------
Step:8, Action:South
State  181
Old Q Values:  [ 854.4436561  1176.74420061 3029.7155843   262.76946019]
New Q values:  [ 854.4436561  1300.75539137 3029.7155843   262.76946019]
Reward: 9  Episode Reward:  42
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2748.85903709   26.73544252 -149.94533648  123.6214372 ]
------
Step:9, Action:North
State  260
Old Q Values:  [ 3137.64859301 -5704.51612281  6883.41632892 -5679.36893145]
New Q values:  [ 3026.52995707 -5704.51612281  6883.41632892 -5679.36893145]
Reward: -1  Episode Reward:  41
xxxxx
xg..x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2840.23360603  5906.90173288  4511.12266518 -4966.32149798]
------
Step:10, Action:South
State  181
Old Q Values:  [ 854.4436561  1300.75539137 3029.7155843   262.76946019]
New Q values:  [ 854.4436561  1344.35986768 3029.7155843   262.76946019]
Reward: -1  Episode Reward:  40
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2748.85903709   26.73544252 -149.94533648  123.6214372 ]
------
Step:11, Action:North
State  261
Old Q Values:  [2748.85903709   26.73544252 -149.94533648  123.6214372 ]
New Q values:  [2007.85829013   26.73544252 -149.94533648  123.6214372 ]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 854.4436561  1344.35986768 3029.7155843   262.76946019]
------
Step:12, Action:South
State  181
Old Q Values:  [ 854.4436561  1344.35986768 3029.7155843   262.76946019]
New Q values:  [ 854.4436561  1139.50143411 3029.7155843   262.76946019]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
x  gx
xa  x
xxxxx
Step:13, Action:North
State  261
Old Q Values:  [2007.85829013   26.73544252 -149.94533648  123.6214372 ]
New Q values:  [2495.09250609   26.73544252 -149.94533648  123.6214372 ]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2174.36956339 5245.81661103 5641.83063345 1554.80203889]
------
Step:14, Action:East
State  181
Old Q Values:  [ 854.4436561  1139.50143411 3029.7155843   262.76946019]
New Q values:  [ 854.4436561  1139.50143411 1948.79956395  262.76946019]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  2458.37776743 -1709.31764708  1460.9765133 ]
------
Step:15, Action:South
State  193
Old Q Values:  [-5922.26708831  2458.37776743 -1709.31764708  1460.9765133 ]
New Q values:  [-5922.26708831  1923.26400471 -1709.31764708  1460.9765133 ]
Reward: -1  Episode Reward:  35
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2656.89388176 -168.92307549  835.31385151 3135.04299246]
------
Step:16, Action:West
State  276
Old Q Values:  [ 5473.51807038 -5807.06396197 24702.30720332  1866.59090796]
New Q values:  [ 5473.51807038 -5807.06396197 24702.30720332  1494.56411501]
Reward: -1  Episode Reward:  34
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2495.09250609   26.73544252 -149.94533648  123.6214372 ]
------
Step:17, Action:North
State  260
Old Q Values:  [ 3026.52995707 -5704.51612281  6883.41632892 -5679.36893145]
New Q values:  [ 2982.08250269 -5704.51612281  6883.41632892 -5679.36893145]
Reward: -1  Episode Reward:  33
xxxxx
xg..x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-2840.23360603  5906.90173288  4511.12266518 -4966.32149798]
------
Step:18, Action:South
State  180
Old Q Values:  [-2840.23360603  5906.90173288  4511.12266518 -4966.32149798]
New Q values:  [-2840.23360603  4427.18559183  4511.12266518 -4966.32149798]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2982.08250269 -5704.51612281  6883.41632892 -5679.36893145]
------
Step:19, Action:East
State  261
Old Q Values:  [2495.09250609   26.73544252 -149.94533648  123.6214372 ]
New Q values:  [2495.09250609   26.73544252 1281.69936126  123.6214372 ]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4474.2583195  -8521.23367799  1908.35213691  1954.20025295]
------
Step:20, Action:West
State  273
Old Q Values:  [2656.89388176 -168.92307549  835.31385151 3135.04299246]
New Q values:  [2656.89388176 -168.92307549  835.31385151 2001.94494881]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2495.09250609   26.73544252 1281.69936126  123.6214372 ]
------
Step:21, Action:North
State  261
Old Q Values:  [2495.09250609   26.73544252 1281.69936126  123.6214372 ]
New Q values:  [1582.07687162   26.73544252 1281.69936126  123.6214372 ]
Reward: -1  Episode Reward:  29
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 854.4436561  1139.50143411 1948.79956395  262.76946019]
------
Step:22, Action:East
State  181
Old Q Values:  [ 854.4436561  1139.50143411 1948.79956395  262.76946019]
New Q values:  [ 854.4436561  1139.50143411 4375.62283031  262.76946019]
Reward: -1  Episode Reward:  28
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144 11024.59842256 11989.01001575 -2817.66043403]
------
Step:23, Action:East
State  193
Old Q Values:  [-5922.26708831  1923.26400471 -1709.31764708  1460.9765133 ]
New Q values:  [-5922.26708831  1923.26400471  -222.69561341  1460.9765133 ]
Reward: -1  Episode Reward:  27
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4244.01532712  1538.77148473 -4584.50430574 -1713.91177491]
------
Step:24, Action:South
State  208
Old Q Values:  [-4244.01532712  1538.77148473 -4584.50430574 -1713.91177491]
New Q values:  [-4244.01532712  1412.49788754 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3313.57589356 -6396.61506955 -5588.09647059  2658.63097881]
------
Step:25, Action:West
State  288
Old Q Values:  [-3313.57589356 -6396.61506955 -5588.09647059  2658.63097881]
New Q values:  [-3313.57589356 -6396.61506955 -5588.09647059  2405.12988737]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4474.2583195  -8521.23367799  1908.35213691  1954.20025295]
------
Step:26, Action:West
State  276
Old Q Values:  [ 5473.51807038 -5807.06396197 24702.30720332  1494.56411501]
New Q values:  [ 5473.51807038 -5807.06396197 24702.30720332  1071.84870749]
Reward: -1  Episode Reward:  24
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1582.07687162   26.73544252 1281.69936126  123.6214372 ]
------
Step:27, Action:North
State  261
Old Q Values:  [1582.07687162   26.73544252 1281.69936126  123.6214372 ]
New Q values:  [1944.91759774   26.73544252 1281.69936126  123.6214372 ]
Reward: -1  Episode Reward:  23
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 854.4436561  1139.50143411 4375.62283031  262.76946019]
------
Step:28, Action:East
State  181
Old Q Values:  [ 854.4436561  1139.50143411 4375.62283031  262.76946019]
New Q values:  [ 854.4436561  1139.50143411 2326.62833354  262.76946019]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1923.26400471  -222.69561341  1460.9765133 ]
------
Step:29, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.27582340e+03 3.99024182e+03 4.70558569e+03]
New Q values:  [3.89777037e-01 2.65200686e+03 3.99024182e+03 4.70558569e+03]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4474.2583195  -8521.23367799  1908.35213691  1954.20025295]
------
Step:30, Action:West
State  276
Old Q Values:  [ 5473.51807038 -5807.06396197 24702.30720332  1071.84870749]
New Q values:  [ 5473.51807038 -5807.06396197 24702.30720332  1011.61476232]
Reward: -1  Episode Reward:  20
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1944.91759774   26.73544252 1281.69936126  123.6214372 ]
------
Step:31, Action:North
State  261
Old Q Values:  [1944.91759774   26.73544252 1281.69936126  123.6214372 ]
New Q values:  [1475.35553916   26.73544252 1281.69936126  123.6214372 ]
Reward: -1  Episode Reward:  19
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 854.4436561  1139.50143411 2326.62833354  262.76946019]
------
Step:32, Action:South
State  183
Old Q Values:  [2174.36956339 5245.81661103 5641.83063345 1554.80203889]
New Q values:  [2174.36956339 2540.33330616 5641.83063345 1554.80203889]
Reward: -1  Episode Reward:  18
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1475.35553916   26.73544252 1281.69936126  123.6214372 ]
------
Step:33, Action:North
State  261
Old Q Values:  [1475.35553916   26.73544252 1281.69936126  123.6214372 ]
New Q values:  [1287.53071572   26.73544252 1281.69936126  123.6214372 ]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 854.4436561  1139.50143411 2326.62833354  262.76946019]
------
Step:34, Action:South
State  180
Old Q Values:  [-2840.23360603  4427.18559183  4511.12266518 -4966.32149798]
New Q values:  [-2840.23360603  3835.29913541  4511.12266518 -4966.32149798]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2982.08250269 -5704.51612281  6883.41632892 -5679.36893145]
------
Step:35, Action:East
State  260
Old Q Values:  [ 2982.08250269 -5704.51612281  6883.41632892 -5679.36893145]
New Q values:  [ 2982.08250269 -5704.51612281  4095.04402742 -5679.36893145]
Reward: -1  Episode Reward:  15
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4474.2583195  -8521.23367799  1908.35213691  1954.20025295]
------
Step:36, Action:North
State  272
Old Q Values:  [ 4474.2583195  -8521.23367799  1908.35213691  1954.20025295]
New Q values:  [ 3200.77903546 -8521.23367799  1908.35213691  1954.20025295]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.65200686e+03 3.99024182e+03 4.70558569e+03]
------
Step:37, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.65200686e+03 3.99024182e+03 4.70558569e+03]
New Q values:  [3.89777037e-01 2.65200686e+03 2.01924609e+03 4.70558569e+03]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4244.01532712  1412.49788754 -4584.50430574 -1713.91177491]
------
Step:38, Action:South
State  208
Old Q Values:  [-4244.01532712  1412.49788754 -4584.50430574 -1713.91177491]
New Q values:  [-4244.01532712  1285.93812123 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-3313.57589356 -6396.61506955 -5588.09647059  2405.12988737]
------
Step:39, Action:West
State  288
Old Q Values:  [-3313.57589356 -6396.61506955 -5588.09647059  2405.12988737]
New Q values:  [-3313.57589356 -6396.61506955 -5588.09647059  1921.68566559]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3200.77903546 -8521.23367799  1908.35213691  1954.20025295]
------
Step:40, Action:West
State  272
Old Q Values:  [ 3200.77903546 -8521.23367799  1908.35213691  1954.20025295]
New Q values:  [ 3200.77903546 -8521.23367799  1908.35213691  2009.5933094 ]
Reward: -1  Episode Reward:  10
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2982.08250269 -5704.51612281  4095.04402742 -5679.36893145]
------
Step:41, Action:East
State  261
Old Q Values:  [1287.53071572   26.73544252 1281.69936126  123.6214372 ]
New Q values:  [1287.53071572   26.73544252 1472.31345514  123.6214372 ]
Reward: -1  Episode Reward:  9
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3200.77903546 -8521.23367799  1908.35213691  2009.5933094 ]
------
Step:42, Action:West
State  272
Old Q Values:  [ 3200.77903546 -8521.23367799  1908.35213691  2009.5933094 ]
New Q values:  [ 3200.77903546 -8521.23367799  1908.35213691  2031.75053199]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2982.08250269 -5704.51612281  4095.04402742 -5679.36893145]
------
Step:43, Action:East
State  260
Old Q Values:  [ 2982.08250269 -5704.51612281  4095.04402742 -5679.36893145]
New Q values:  [ 2982.08250269 -5704.51612281  2597.6513216  -5679.36893145]
Reward: -1  Episode Reward:  7
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3200.77903546 -8521.23367799  1908.35213691  2031.75053199]
------
Step:44, Action:North
State  276
Old Q Values:  [ 5473.51807038 -5807.06396197 24702.30720332  1011.61476232]
New Q values:  [ 5785.51023288 -5807.06396197 24702.30720332  1011.61476232]
Reward: -1  Episode Reward:  6
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144 11024.59842256 11989.01001575 -2817.66043403]
------
Step:45, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.65200686e+03 2.01924609e+03 4.70558569e+03]
New Q values:  [3.89777037e-01 2.65200686e+03 1.19287987e+03 4.70558569e+03]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4244.01532712  1285.93812123 -4584.50430574 -1713.91177491]
------
Step:46, Action:South
State  210
Old Q Values:  [11506.23689144  2592.77958649   790.72804752  1050.85266124]
New Q values:  [11506.23689144  1613.01753427   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
x   x
x gax
xxxxx
Step:47, Action:North
State  288
Old Q Values:  [-3313.57589356 -6396.61506955 -5588.09647059  1921.68566559]
New Q values:  [ 2125.84071001 -6396.61506955 -5588.09647059  1921.68566559]
Reward: -1  Episode Reward:  3
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11506.23689144  1613.01753427   790.72804752  1050.85266124]
------
Step:48, Action:North
State  210
Old Q Values:  [11506.23689144  1613.01753427   790.72804752  1050.85266124]
New Q values:  [26388.04404449  1613.01753427   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  12
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344  7770.77360059  -180.00807518 72600.49762637]
------
Step:49, Action:West
State  130
Old Q Values:  [33256.45600344  7770.77360059  -180.00807518 72600.49762637]
New Q values:  [ 33256.45600344   7770.77360059   -180.00807518 126769.18757242]
Reward: 100009  Episode Reward:  100021
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3200.77903546 -8521.23367799  1908.35213691  2031.75053199]
------
Step:1, Action:North
State  276
Old Q Values:  [ 5785.51023288 -5807.06396197 24702.30720332  1011.61476232]
New Q values:  [ 4757.67028198 -5807.06396197 24702.30720332  1011.61476232]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  8.12688730e+03  1.03161518e+03]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.65200686e+03 1.19287987e+03 4.70558569e+03]
New Q values:  [3.89777037e-01 2.65200686e+03 8.68333386e+02 4.70558569e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4244.01532712  1285.93812123 -4584.50430574 -1713.91177491]
------
Step:3, Action:South
State  208
Old Q Values:  [-4244.01532712  1285.93812123 -4584.50430574 -1713.91177491]
New Q values:  [-4244.01532712  1157.52746149 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2125.84071001 -6396.61506955 -5588.09647059  1921.68566559]
------
Step:4, Action:North
State  288
Old Q Values:  [ 2125.84071001 -6396.61506955 -5588.09647059  1921.68566559]
New Q values:  [ 1196.99452245 -6396.61506955 -5588.09647059  1921.68566559]
Reward: -1  Episode Reward:  26
xxxxx
x...x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4244.01532712  1157.52746149 -4584.50430574 -1713.91177491]
------
Step:5, Action:South
State  210
Old Q Values:  [26388.04404449  1613.01753427   790.72804752  1050.85266124]
New Q values:  [26388.04404449  1221.11271339   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1196.99452245 -6396.61506955 -5588.09647059  1921.68566559]
------
Step:6, Action:West
State  288
Old Q Values:  [ 1196.99452245 -6396.61506955 -5588.09647059  1921.68566559]
New Q values:  [ 1196.99452245 -6396.61506955 -5588.09647059  1728.30797687]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3200.77903546 -8521.23367799  1908.35213691  2031.75053199]
------
Step:7, Action:North
State  272
Old Q Values:  [ 3200.77903546 -8521.23367799  1908.35213691  2031.75053199]
New Q values:  [ 2691.38732184 -8521.23367799  1908.35213691  2031.75053199]
Reward: -1  Episode Reward:  23
xxxxx
x...x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.65200686e+03 8.68333386e+02 4.70558569e+03]
------
Step:8, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.65200686e+03 8.68333386e+02 4.70558569e+03]
New Q values:  [3.89777037e-01 1.86761894e+03 8.68333386e+02 4.70558569e+03]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2691.38732184 -8521.23367799  1908.35213691  2031.75053199]
------
Step:9, Action:West
State  272
Old Q Values:  [ 2691.38732184 -8521.23367799  1908.35213691  2031.75053199]
New Q values:  [ 2691.38732184 -8521.23367799  1908.35213691  6030.33382803]
Reward: 9  Episode Reward:  31
xxxxx
x.g.x
x   x
xa  x
xxxxx
Step:10, Action:North
State  257
Old Q Values:  [17374.11205079 12764.58618105  6430.48514739  1875.31501677]
New Q values:  [17002.21199032 12764.58618105  6430.48514739  1875.31501677]
Reward: -1  Episode Reward:  30
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 29124.7048717  20101.94546536     0.        ]
------
Step:11, Action:North
State  181
Old Q Values:  [ 854.4436561  1139.50143411 2326.62833354  262.76946019]
New Q values:  [1561.59235033 1139.50143411 2326.62833354  262.76946019]
Reward: 9  Episode Reward:  39
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 4048.04962631  238.35800069    0.        ]
------
Step:12, Action:South
State  103
Old Q Values:  [ 221.30610858 4048.04962631  238.35800069    0.        ]
New Q values:  [ 221.30610858 2316.60835059  238.35800069    0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1561.59235033 1139.50143411 2326.62833354  262.76946019]
------
Step:13, Action:North
State  181
Old Q Values:  [1561.59235033 1139.50143411 2326.62833354  262.76946019]
New Q values:  [1225.29733293 1139.50143411 2326.62833354  262.76946019]
Reward: -1  Episode Reward:  37
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         2004.20130931    0.            0.        ]
------
Step:14, Action:South
State  101
Old Q Values:  [   0.         2004.20130931    0.            0.        ]
New Q values:  [   0.         1499.06902379    0.            0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1225.29733293 1139.50143411 2326.62833354  262.76946019]
------
Step:15, Action:East
State  181
Old Q Values:  [1225.29733293 1139.50143411 2326.62833354  262.76946019]
New Q values:  [1225.29733293 1139.50143411 4526.75433814  262.76946019]
Reward: -1  Episode Reward:  35
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144 11024.59842256 11989.01001575 -2817.66043403]
------
Step:16, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.86761894e+03 8.68333386e+02 4.70558569e+03]
New Q values:  [3.89777037e-01 1.86761894e+03 6.93991593e+02 4.70558569e+03]
Reward: -1  Episode Reward:  34
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4244.01532712  1157.52746149 -4584.50430574 -1713.91177491]
------
Step:17, Action:South
State  208
Old Q Values:  [-4244.01532712  1157.52746149 -4584.50430574 -1713.91177491]
New Q values:  [-4244.01532712   980.90337766 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1196.99452245 -6396.61506955 -5588.09647059  1728.30797687]
------
Step:18, Action:West
State  288
Old Q Values:  [ 1196.99452245 -6396.61506955 -5588.09647059  1728.30797687]
New Q values:  [ 1196.99452245 -6396.61506955 -5588.09647059  2499.82333916]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2691.38732184 -8521.23367799  1908.35213691  6030.33382803]
------
Step:19, Action:North
State  272
Old Q Values:  [ 2691.38732184 -8521.23367799  1908.35213691  6030.33382803]
New Q values:  [ 3514.02111757 -8521.23367799  1908.35213691  6030.33382803]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  8.12688730e+03  1.03161518e+03]
------
Step:20, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.86761894e+03 6.93991593e+02 4.70558569e+03]
New Q values:  [3.89777037e-01 1.86761894e+03 5.71267650e+02 4.70558569e+03]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4244.01532712   980.90337766 -4584.50430574 -1713.91177491]
------
Step:21, Action:South
State  210
Old Q Values:  [26388.04404449  1221.11271339   790.72804752  1050.85266124]
New Q values:  [26388.04404449  1237.7920871    790.72804752  1050.85266124]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1196.99452245 -6396.61506955 -5588.09647059  2499.82333916]
------
Step:22, Action:North
State  288
Old Q Values:  [ 1196.99452245 -6396.61506955 -5588.09647059  2499.82333916]
New Q values:  [  772.46882228 -6396.61506955 -5588.09647059  2499.82333916]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4244.01532712   980.90337766 -4584.50430574 -1713.91177491]
------
Step:23, Action:South
State  208
Old Q Values:  [-4244.01532712   980.90337766 -4584.50430574 -1713.91177491]
New Q values:  [-4244.01532712  1141.70835281 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  772.46882228 -6396.61506955 -5588.09647059  2499.82333916]
------
Step:24, Action:West
State  288
Old Q Values:  [  772.46882228 -6396.61506955 -5588.09647059  2499.82333916]
New Q values:  [  772.46882228 -6396.61506955 -5588.09647059  1796.39750019]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2656.89388176 -168.92307549  835.31385151 2001.94494881]
------
Step:25, Action:North
State  273
Old Q Values:  [2656.89388176 -168.92307549  835.31385151 2001.94494881]
New Q values:  [1639.13675412 -168.92307549  835.31385151 2001.94494881]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1923.26400471  -222.69561341  1460.9765133 ]
------
Step:26, Action:South
State  195
Old Q Values:  [  38.85388605 4885.27337691 9050.97374885 1169.39963074]
New Q values:  [  38.85388605 2554.09283541 9050.97374885 1169.39963074]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1639.13675412 -168.92307549  835.31385151 2001.94494881]
------
Step:27, Action:West
State  273
Old Q Values:  [1639.13675412 -168.92307549  835.31385151 2001.94494881]
New Q values:  [1639.13675412 -168.92307549  835.31385151 1241.87201607]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1287.53071572   26.73544252 1472.31345514  123.6214372 ]
------
Step:28, Action:East
State  261
Old Q Values:  [1287.53071572   26.73544252 1472.31345514  123.6214372 ]
New Q values:  [1287.53071572   26.73544252 1080.06640829  123.6214372 ]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1639.13675412 -168.92307549  835.31385151 1241.87201607]
------
Step:29, Action:North
State  273
Old Q Values:  [1639.13675412 -168.92307549  835.31385151 1241.87201607]
New Q values:  [1232.03390306 -168.92307549  835.31385151 1241.87201607]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1923.26400471  -222.69561341  1460.9765133 ]
------
Step:30, Action:South
State  193
Old Q Values:  [-5922.26708831  1923.26400471  -222.69561341  1460.9765133 ]
New Q values:  [-5922.26708831  1141.2672067   -222.69561341  1460.9765133 ]
Reward: -1  Episode Reward:  20
xxxxx
x .gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1232.03390306 -168.92307549  835.31385151 1241.87201607]
------
Step:31, Action:West
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197 24702.30720332  1011.61476232]
New Q values:  [ 4757.67028198 -5807.06396197 24702.30720332   790.30511964]
Reward: -1  Episode Reward:  19
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1287.53071572   26.73544252 1080.06640829  123.6214372 ]
------
Step:32, Action:North
State  261
Old Q Values:  [1287.53071572   26.73544252 1080.06640829  123.6214372 ]
New Q values:  [1872.43858773   26.73544252 1080.06640829  123.6214372 ]
Reward: -1  Episode Reward:  18
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1225.29733293 1139.50143411 4526.75433814  262.76946019]
------
Step:33, Action:North
State  183
Old Q Values:  [2174.36956339 2540.33330616 5641.83063345 1554.80203889]
New Q values:  [1564.13033053 2540.33330616 5641.83063345 1554.80203889]
Reward: -1  Episode Reward:  17
xxxxx
xa..x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2316.60835059  238.35800069    0.        ]
------
Step:34, Action:South
State  103
Old Q Values:  [ 221.30610858 2316.60835059  238.35800069    0.        ]
New Q values:  [ 221.30610858 2284.06964168  238.35800069    0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1225.29733293 1139.50143411 4526.75433814  262.76946019]
------
Step:35, Action:North
State  181
Old Q Values:  [1225.29733293 1139.50143411 4526.75433814  262.76946019]
New Q values:  [1174.73982567 1139.50143411 4526.75433814  262.76946019]
Reward: -1  Episode Reward:  15
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2284.06964168  238.35800069    0.        ]
------
Step:36, Action:South
State  103
Old Q Values:  [ 221.30610858 2284.06964168  238.35800069    0.        ]
New Q values:  [ 221.30610858 2271.05415811  238.35800069    0.        ]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1174.73982567 1139.50143411 4526.75433814  262.76946019]
------
Step:37, Action:North
State  180
Old Q Values:  [-2840.23360603  3835.29913541  4511.12266518 -4966.32149798]
New Q values:  [ -553.96922156  3835.29913541  4511.12266518 -4966.32149798]
Reward: -1  Episode Reward:  13
xxxxx
xa..x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        1942.41406952  522.17020433 -180.6       ]
------
Step:38, Action:East
State  102
Old Q Values:  [-180.6        1942.41406952  522.17020433 -180.6       ]
New Q values:  [-180.6        1942.41406952 2846.56389321 -180.6       ]
Reward: 9  Episode Reward:  22
xxxxx
x a.x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[ 617.06804554 8774.3193716     0.          503.49427758]
------
Step:39, Action:South
State  118
Old Q Values:  [ 617.06804554 8774.3193716     0.          503.49427758]
New Q values:  [ 617.06804554 9204.39250305    0.          503.49427758]
Reward: -1  Episode Reward:  21
xxxxx
x  .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.89842158e+04  0.00000000e+00]
------
Step:40, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -2.00610230e+02  1.89842158e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.00610230e+02  1.55094996e+04  0.00000000e+00]
Reward: -1  Episode Reward:  20
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[26388.04404449  1237.7920871    790.72804752  1050.85266124]
------
Step:41, Action:North
State  210
Old Q Values:  [26388.04404449  1237.7920871    790.72804752  1050.85266124]
New Q values:  [108591.37388952   1237.7920871     790.72804752   1050.85266124]
Reward: 100009  Episode Reward:  100029
xxxxx
x  ax
x   x
x g x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 2554.09283541 9050.97374885 1169.39963074]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  8.12688730e+03  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  3.58335671e+04  1.03161518e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[108591.37388952   1237.7920871     790.72804752   1050.85266124]
------
Step:2, Action:North
State  210
Old Q Values:  [108591.37388952   1237.7920871     790.72804752   1050.85266124]
New Q values:  [81472.70582754  1237.7920871    790.72804752  1050.85266124]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 33256.45600344   7770.77360059   -180.00807518 126769.18757242]
------
Step:3, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  3.83725180e+04]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.56196283e+04]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   884.07015101   377.81590642]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   884.07015101   377.81590642]
New Q values:  [ -281.736      -1150.91067548  5038.91653936   377.81590642]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.56196283e+04]
------
Step:5, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.56196283e+04]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  6.74287602e+03]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1652.08237144  1298.18990105]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  5038.91653936   377.81590642]
New Q values:  [ -281.736      -1150.91067548  4037.82942075   377.81590642]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  6.74287602e+03]
------
Step:7, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  6.74287602e+03]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  3.90789923e+03]
Reward: -1  Episode Reward:  23
xxxxx
x.a x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4037.82942075   377.81590642]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  4037.82942075   377.81590642]
New Q values:  [ -281.736      -1150.91067548  2786.90153817   377.81590642]
Reward: -1  Episode Reward:  22
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  3.90789923e+03]
------
Step:9, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  3.90789923e+03]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  2.05818440e+03]
Reward: -1  Episode Reward:  21
xxxxx
x.a x
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1652.08237144  1298.18990105]
------
Step:10, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1652.08237144  1298.18990105]
New Q values:  [ -253.44886264 -1902.20915811  1277.68826995  1298.18990105]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  2.05818440e+03]
------
Step:11, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  2.05818440e+03]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.65874422e+03]
Reward: -1  Episode Reward:  19
xxxxx
x.a x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2786.90153817   377.81590642]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2786.90153817   377.81590642]
New Q values:  [ -281.736      -1150.91067548  1611.78388226   377.81590642]
Reward: -1  Episode Reward:  18
xxxxx
x. ax
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.65874422e+03]
------
Step:13, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.65874422e+03]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.14643285e+03]
Reward: -1  Episode Reward:  17
xxxxx
x.a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1611.78388226   377.81590642]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1611.78388226   377.81590642]
New Q values:  [ -281.736      -1150.91067548   988.0434091    377.81590642]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.14643285e+03]
------
Step:15, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  1.14643285e+03]
New Q values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  7.54386164e+02]
Reward: -1  Episode Reward:  15
xxxxx
x.a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   988.0434091    377.81590642]
------
Step:16, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1937.16053612   -98.79429602]
New Q values:  [-9594.56523706 -8069.05606225  1564.1261932    -98.79429602]
Reward: -1  Episode Reward:  14
xxxxx
xg ax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  2632.87326251   660.86649319 -5740.77528236]
------
Step:17, Action:South
State  138
Old Q Values:  [ 1.39201587e+02 -6.96599730e+02 -3.22965309e-01  7.54386164e+02]
New Q values:  [ 1.39201587e+02  9.11752731e+02 -3.22965309e-01  7.54386164e+02]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1020.50511808  3969.97540941 -8220.10378799   911.8961227 ]
------
Step:18, Action:South
State  210
Old Q Values:  [81472.70582754  1237.7920871    790.72804752  1050.85266124]
New Q values:  [81472.70582754  1033.4360849    790.72804752  1050.85266124]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
x.  x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  772.46882228 -6396.61506955 -5588.09647059  1796.39750019]
------
Step:19, Action:West
State  288
Old Q Values:  [  772.46882228 -6396.61506955 -5588.09647059  1796.39750019]
New Q values:  [  772.46882228 -6396.61506955 -5588.09647059 -3466.94085151]
Reward: -9991  Episode Reward:  -9979
xxxxx
x.  x
x.  x
x.g x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1872.43858773   26.73544252 1080.06640829  123.6214372 ]
------
Step:1, Action:North
State  261
Old Q Values:  [1872.43858773   26.73544252 1080.06640829  123.6214372 ]
New Q values:  [2112.40173653   26.73544252 1080.06640829  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1174.73982567 1139.50143411 4526.75433814  262.76946019]
------
Step:2, Action:East
State  181
Old Q Values:  [1174.73982567 1139.50143411 4526.75433814  262.76946019]
New Q values:  [ 1174.73982567  1139.50143411 -2772.22255708   262.76946019]
Reward: -9991  Episode Reward:  -9982
xxxxx
x.. x
x g.x
x ..x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4244.01532712  1141.70835281 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [-4244.01532712  1141.70835281 -4584.50430574 -1713.91177491]
New Q values:  [-4244.01532712   693.82398781 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  772.46882228 -6396.61506955 -5588.09647059 -3466.94085151]
------
Step:2, Action:North
State  288
Old Q Values:  [  772.46882228 -6396.61506955 -5588.09647059 -3466.94085151]
New Q values:  [  516.53472525 -6396.61506955 -5588.09647059 -3466.94085151]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[-4244.01532712   693.82398781 -4584.50430574 -1713.91177491]
------
Step:3, Action:South
State  208
Old Q Values:  [-4244.01532712   693.82398781 -4584.50430574 -1713.91177491]
New Q values:  [-4244.01532712   431.8900127  -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x. gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  516.53472525 -6396.61506955 -5588.09647059 -3466.94085151]
------
Step:4, Action:West
State  288
Old Q Values:  [  516.53472525 -6396.61506955 -5588.09647059 -3466.94085151]
New Q values:  [  516.53472525 -6396.61506955 -5588.09647059   427.7238078 ]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  1908.35213691  6030.33382803]
------
Step:5, Action:West
State  273
Old Q Values:  [1232.03390306 -168.92307549  835.31385151 1241.87201607]
New Q values:  [1232.03390306 -168.92307549  835.31385151 5602.81240352]
Reward: 9  Episode Reward:  25
xxxxx
x...x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[17002.21199032 12764.58618105  6430.48514739  1875.31501677]
------
Step:6, Action:North
State  261
Old Q Values:  [2112.40173653   26.73544252 1080.06640829  123.6214372 ]
New Q values:  [1202.78264232   26.73544252 1080.06640829  123.6214372 ]
Reward: 9  Episode Reward:  34
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1174.73982567  1139.50143411 -2772.22255708   262.76946019]
------
Step:7, Action:North
State  181
Old Q Values:  [ 1174.73982567  1139.50143411 -2772.22255708   262.76946019]
New Q values:  [  925.0166374   1139.50143411 -2772.22255708   262.76946019]
Reward: 9  Episode Reward:  43
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         1499.06902379    0.            0.        ]
------
Step:8, Action:South
State  101
Old Q Values:  [   0.         1499.06902379    0.            0.        ]
New Q values:  [  0.         940.87803975   0.           0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  925.0166374   1139.50143411 -2772.22255708   262.76946019]
------
Step:9, Action:South
State  181
Old Q Values:  [  925.0166374   1139.50143411 -2772.22255708   262.76946019]
New Q values:  [  925.0166374    816.03536634 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  41
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1202.78264232   26.73544252 1080.06640829  123.6214372 ]
------
Step:10, Action:North
State  261
Old Q Values:  [1202.78264232   26.73544252 1080.06640829  123.6214372 ]
New Q values:  [ 758.01804815   26.73544252 1080.06640829  123.6214372 ]
Reward: -1  Episode Reward:  40
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  925.0166374    816.03536634 -2772.22255708   262.76946019]
------
Step:11, Action:North
State  181
Old Q Values:  [  925.0166374    816.03536634 -2772.22255708   262.76946019]
New Q values:  [ 1050.7229024    816.03536634 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  39
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2271.05415811  238.35800069    0.        ]
------
Step:12, Action:South
State  103
Old Q Values:  [ 221.30610858 2271.05415811  238.35800069    0.        ]
New Q values:  [ 221.30610858 2600.37085328  238.35800069    0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1564.13033053 2540.33330616 5641.83063345 1554.80203889]
------
Step:13, Action:East
State  183
Old Q Values:  [1564.13033053 2540.33330616 5641.83063345 1554.80203889]
New Q values:  [ 1564.13033053  2540.33330616 13006.20237897  1554.80203889]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  3.58335671e+04  1.03161518e+03]
------
Step:14, Action:East
State  195
Old Q Values:  [  38.85388605 2554.09283541 9050.97374885 1169.39963074]
New Q values:  [   38.85388605  2554.09283541 28061.6012478   1169.39963074]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[81472.70582754  1033.4360849    790.72804752  1050.85266124]
------
Step:15, Action:North
State  208
Old Q Values:  [-4244.01532712   431.8900127  -4584.50430574 -1713.91177491]
New Q values:  [36338.55014088   431.8900127  -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  45
xxxxx
x .ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 33256.45600344   7770.77360059   -180.00807518 126769.18757242]
------
Step:16, Action:West
State  130
Old Q Values:  [ 33256.45600344   7770.77360059   -180.00807518 126769.18757242]
New Q values:  [ 33256.45600344   7770.77360059   -180.00807518 150966.89771801]
Reward: 100009  Episode Reward:  100054
xxxxx
x a x
x   x
x  gx
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1047.83736048 -2165.66138672  -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094  1047.83736048 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   739.75181491 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x g x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1050.7229024    816.03536634 -2772.22255708   262.76946019]
------
Step:2, Action:North
State  181
Old Q Values:  [ 1050.7229024    816.03536634 -2772.22255708   262.76946019]
New Q values:  [ 1307.20878388   816.03536634 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  8
xxxxx
xa. x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2958.39874307  751.03868094 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 2958.39874307  751.03868094 -120.29354603]
New Q values:  [-177.44732869 1574.92213239  751.03868094 -120.29354603]
Reward: -1  Episode Reward:  7
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1307.20878388   816.03536634 -2772.22255708   262.76946019]
------
Step:4, Action:North
State  183
Old Q Values:  [ 1564.13033053  2540.33330616 13006.20237897  1554.80203889]
New Q values:  [ 1097.52877193  2540.33330616 13006.20237897  1554.80203889]
Reward: -1  Episode Reward:  6
xxxxx
xa. x
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1574.92213239  751.03868094 -120.29354603]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 1574.92213239  751.03868094 -120.29354603]
New Q values:  [-177.44732869 4531.22956665  751.03868094 -120.29354603]
Reward: -1  Episode Reward:  5
xxxxx
x . x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1097.52877193  2540.33330616 13006.20237897  1554.80203889]
------
Step:6, Action:East
State  183
Old Q Values:  [ 1097.52877193  2540.33330616 13006.20237897  1554.80203889]
New Q values:  [1097.52877193 2540.33330616 7928.328963   1554.80203889]
Reward: 9  Episode Reward:  14
xxxxx
x . x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 3.97827546e+03 2.05741271e+03 9.06816004e+03]
------
Step:7, Action:West
State  201
Old Q Values:  [ 613.33320563 4708.31908668 -789.02220255  529.38906334]
New Q values:  [ 613.33320563 4708.31908668 -789.02220255 1702.07235953]
Reward: -1  Episode Reward:  13
xxxxx
x . x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 3059.00845116 4969.72244732  154.04646645]
------
Step:8, Action:East
State  183
Old Q Values:  [1097.52877193 2540.33330616 7928.328963   1554.80203889]
New Q values:  [1097.52877193 2540.33330616 5891.17959661 1554.80203889]
Reward: -1  Episode Reward:  12
xxxxx
x . x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 3.97827546e+03 2.05741271e+03 9.06816004e+03]
------
Step:9, Action:West
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  3.58335671e+04  1.03161518e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  3.58335671e+04  2.17939995e+03]
Reward: -1  Episode Reward:  11
xxxxx
x . x
xa .x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1097.52877193 2540.33330616 5891.17959661 1554.80203889]
------
Step:10, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243 14893.01024342     0.        ]
New Q values:  [    0.         -5536.05678243 16706.67422296     0.        ]
Reward: -1  Episode Reward:  10
xxxxx
x . x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  3.58335671e+04  2.17939995e+03]
------
Step:11, Action:East
State  200
Old Q Values:  [ 169.9257398  7582.92139682 2492.21342009  568.38654082]
New Q values:  [ 169.9257398  7582.92139682 2193.27799086  568.38654082]
Reward: 9  Episode Reward:  19
xxxxx
x . x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1020.50511808  3969.97540941 -8220.10378799   911.8961227 ]
------
Step:12, Action:South
State  216
Old Q Values:  [ 1020.50511808  3969.97540941 -8220.10378799   911.8961227 ]
New Q values:  [ 1020.50511808  1748.35058134 -8220.10378799   911.8961227 ]
Reward: 9  Episode Reward:  28
xxxxx
x . x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  516.53472525 -6396.61506955 -5588.09647059   427.7238078 ]
------
Step:13, Action:North
State  288
Old Q Values:  [  516.53472525 -6396.61506955 -5588.09647059   427.7238078 ]
New Q values:  [24647.82563836 -6396.61506955 -5588.09647059   427.7238078 ]
Reward: -1  Episode Reward:  27
xxxxx
x . x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[81472.70582754  1033.4360849    790.72804752  1050.85266124]
------
Step:14, Action:North
State  218
Old Q Values:  [ 426.3715412  2184.8079221     0.         1540.00681929]
New Q values:  [ 443.4744357  2184.8079221     0.         1540.00681929]
Reward: -1  Episode Reward:  26
xxxxx
x .ax
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02  9.11752731e+02 -3.22965309e-01  7.54386164e+02]
------
Step:15, Action:South
State  130
Old Q Values:  [ 33256.45600344   7770.77360059   -180.00807518 150966.89771801]
New Q values:  [ 33256.45600344  27549.5211885    -180.00807518 150966.89771801]
Reward: -1  Episode Reward:  25
xxxxx
x . x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[81472.70582754  1033.4360849    790.72804752  1050.85266124]
------
Step:16, Action:North
State  216
Old Q Values:  [ 1020.50511808  1748.35058134 -8220.10378799   911.8961227 ]
New Q values:  [  681.12786645  1748.35058134 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  24
xxxxx
x .ax
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02  9.11752731e+02 -3.22965309e-01  7.54386164e+02]
------
Step:17, Action:South
State  138
Old Q Values:  [ 1.39201587e+02  9.11752731e+02 -3.22965309e-01  7.54386164e+02]
New Q values:  [ 1.39201587e+02 -5.11139373e+03 -3.22965309e-01  7.54386164e+02]
Reward: -10001  Episode Reward:  -9977
xxxxx
x . x
x  gx
x.. x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36338.55014088   431.8900127  -4584.50430574 -1713.91177491]
------
Step:1, Action:North
State  216
Old Q Values:  [  681.12786645  1748.35058134 -8220.10378799   911.8961227 ]
New Q values:  [ 1067.71312533  1748.35058134 -8220.10378799   911.8961227 ]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  2632.87326251   660.86649319 -5740.77528236]
------
Step:2, Action:South
State  138
Old Q Values:  [ 1.39201587e+02 -5.11139373e+03 -3.22965309e-01  7.54386164e+02]
New Q values:  [ 1.39201587e+02 -1.52065232e+03 -3.22965309e-01  7.54386164e+02]
Reward: -1  Episode Reward:  8
xxxxx
x . x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1067.71312533  1748.35058134 -8220.10378799   911.8961227 ]
------
Step:3, Action:South
State  208
Old Q Values:  [36338.55014088   431.8900127  -4584.50430574 -1713.91177491]
New Q values:  [36338.55014088  7572.50369659 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  17
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[24647.82563836 -6396.61506955 -5588.09647059   427.7238078 ]
------
Step:4, Action:North
State  288
Old Q Values:  [24647.82563836 -6396.61506955 -5588.09647059   427.7238078 ]
New Q values:  [20760.09529761 -6396.61506955 -5588.09647059   427.7238078 ]
Reward: -1  Episode Reward:  16
xxxxx
x .gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36338.55014088  7572.50369659 -4584.50430574 -1713.91177491]
------
Step:5, Action:South
State  208
Old Q Values:  [36338.55014088  7572.50369659 -4584.50430574 -1713.91177491]
New Q values:  [36338.55014088  9256.43006792 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  15
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20760.09529761 -6396.61506955 -5588.09647059   427.7238078 ]
------
Step:6, Action:North
State  288
Old Q Values:  [20760.09529761 -6396.61506955 -5588.09647059   427.7238078 ]
New Q values:  [19205.00316131 -6396.61506955 -5588.09647059   427.7238078 ]
Reward: -1  Episode Reward:  14
xxxxx
x .gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36338.55014088  9256.43006792 -4584.50430574 -1713.91177491]
------
Step:7, Action:South
State  208
Old Q Values:  [36338.55014088  9256.43006792 -4584.50430574 -1713.91177491]
New Q values:  [36338.55014088  9463.47297556 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  13
xxxxx
x . x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19205.00316131 -6396.61506955 -5588.09647059   427.7238078 ]
------
Step:8, Action:West
State  288
Old Q Values:  [19205.00316131 -6396.61506955 -5588.09647059   427.7238078 ]
New Q values:  [19205.00316131 -6396.61506955 -5588.09647059  1857.33324418]
Reward: 9  Episode Reward:  22
xxxxx
x .gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1232.03390306 -168.92307549  835.31385151 5602.81240352]
------
Step:9, Action:West
State  273
Old Q Values:  [1232.03390306 -168.92307549  835.31385151 5602.81240352]
New Q values:  [1232.03390306 -168.92307549  835.31385151 2570.5448839 ]
Reward: 9  Episode Reward:  31
xxxxx
x . x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 758.01804815   26.73544252 1080.06640829  123.6214372 ]
------
Step:10, Action:East
State  257
Old Q Values:  [17002.21199032 12764.58618105  6430.48514739  1875.31501677]
New Q values:  [17002.21199032 12764.58618105  4380.69420737  1875.31501677]
Reward: -1  Episode Reward:  30
xxxxx
x . x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  1908.35213691  6030.33382803]
------
Step:11, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  1908.35213691  6030.33382803]
New Q values:  [ 3514.02111757 -8521.23367799  1908.35213691  3306.15828202]
Reward: -1  Episode Reward:  29
xxxxx
x . x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2982.08250269 -5704.51612281  2597.6513216  -5679.36893145]
------
Step:12, Action:East
State  257
Old Q Values:  [17002.21199032 12764.58618105  4380.69420737  1875.31501677]
New Q values:  [17002.21199032 12764.58618105  2805.88401822  1875.31501677]
Reward: -1  Episode Reward:  28
xxxxx
x . x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  1908.35213691  3306.15828202]
------
Step:13, Action:West
State  273
Old Q Values:  [1232.03390306 -168.92307549  835.31385151 2570.5448839 ]
New Q values:  [1232.03390306 -168.92307549  835.31385151 1351.63787605]
Reward: -1  Episode Reward:  27
xxxxx
x . x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 758.01804815   26.73544252 1080.06640829  123.6214372 ]
------
Step:14, Action:East
State  261
Old Q Values:  [ 758.01804815   26.73544252 1080.06640829  123.6214372 ]
New Q values:  [758.01804815  26.73544252 836.91792613 123.6214372 ]
Reward: -1  Episode Reward:  26
xxxxx
x . x
x.. x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1232.03390306 -168.92307549  835.31385151 1351.63787605]
------
Step:15, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  1908.35213691  3306.15828202]
New Q values:  [ 3514.02111757 -8521.23367799  1908.35213691  1572.93869065]
Reward: -1  Episode Reward:  25
xxxxx
x . x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[758.01804815  26.73544252 836.91792613 123.6214372 ]
------
Step:16, Action:North
State  261
Old Q Values:  [758.01804815  26.73544252 836.91792613 123.6214372 ]
New Q values:  [2075.96109824   26.73544252  836.91792613  123.6214372 ]
Reward: 9  Episode Reward:  34
xxxxx
x . x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1097.52877193 2540.33330616 5891.17959661 1554.80203889]
------
Step:17, Action:East
State  183
Old Q Values:  [1097.52877193 2540.33330616 5891.17959661 1554.80203889]
New Q values:  [ 1097.52877193  2540.33330616 13111.94196423  1554.80203889]
Reward: 9  Episode Reward:  43
xxxxx
x . x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  3.58335671e+04  2.17939995e+03]
------
Step:18, Action:East
State  195
Old Q Values:  [   38.85388605  2554.09283541 28061.6012478   1169.39963074]
New Q values:  [   38.85388605  2554.09283541 35665.85224738  1169.39963074]
Reward: -1  Episode Reward:  42
xxxxx
x . x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[81472.70582754  1033.4360849    790.72804752  1050.85266124]
------
Step:19, Action:North
State  208
Old Q Values:  [36338.55014088  9463.47297556 -4584.50430574 -1713.91177491]
New Q values:  [59824.88937175  9463.47297556 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  41
xxxxx
x .ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 33256.45600344  27549.5211885    -180.00807518 150966.89771801]
------
Step:20, Action:West
State  128
Old Q Values:  [27558.53877122 12144.48195471 -8652.84       69406.76602238]
New Q values:  [ 27558.53877122  12144.48195471  -8652.84       109071.6178973 ]
Reward: 100009  Episode Reward:  100050
xxxxx
x agx
x   x
x   x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.86761894e+03 5.71267650e+02 4.70558569e+03]
------
Step:1, Action:West
State  196
Old Q Values:  [-2469.90645144 11024.59842256 11989.01001575 -2817.66043403]
New Q values:  [-2469.90645144 11024.59842256 11989.01001575   231.67262594]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ -553.96922156  3835.29913541  4511.12266518 -4966.32149798]
------
Step:2, Action:East
State  180
Old Q Values:  [ -553.96922156  3835.29913541  4511.12266518 -4966.32149798]
New Q values:  [ -553.96922156  3835.29913541  5400.5520708  -4966.32149798]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144 11024.59842256 11989.01001575   231.67262594]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  3.58335671e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  3.87806386e+04  2.17939995e+03]
Reward: 9  Episode Reward:  17
xxxxx
x. .x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[81472.70582754  1033.4360849    790.72804752  1050.85266124]
------
Step:4, Action:North
State  218
Old Q Values:  [ 443.4744357  2184.8079221     0.         1540.00681929]
New Q values:  [ 409.10562358 2184.8079221     0.         1540.00681929]
Reward: 9  Episode Reward:  26
xxxxx
x. ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -1.52065232e+03 -3.22965309e-01  7.54386164e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -1.52065232e+03 -3.22965309e-01  7.54386164e+02]
New Q values:  [ 1.39201587e+02 -1.52065232e+03 -3.22965309e-01  6.90611436e+02]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1277.68826995  1298.18990105]
------
Step:6, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1277.68826995  1298.18990105]
New Q values:  [ -253.44886264 -1902.20915811  1277.68826995  1884.04483041]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4531.22956665  751.03868094 -120.29354603]
------
Step:7, Action:South
State  107
Old Q Values:  [-252.35169558  485.90543269 1236.56402417 -252.78192178]
New Q values:  [-252.35169558  945.61729938 1236.56402417 -252.78192178]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         2506.18375434    0.        ]
------
Step:8, Action:East
State  189
Old Q Values:  [ 275.08817949 3059.00845116 4969.72244732  154.04646645]
New Q values:  [ 275.08817949 3059.00845116 3399.78470493  154.04646645]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 4708.31908668 -789.02220255 1702.07235953]
------
Step:9, Action:South
State  200
Old Q Values:  [ 169.9257398  7582.92139682 2193.27799086  568.38654082]
New Q values:  [ 169.9257398  4092.774894   2193.27799086  568.38654082]
Reward: 9  Episode Reward:  41
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  1908.35213691  1572.93869065]
------
Step:10, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  1908.35213691  1572.93869065]
New Q values:  [ 3514.02111757 -8521.23367799  6530.24180316  1572.93869065]
Reward: 9  Episode Reward:  50
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19205.00316131 -6396.61506955 -5588.09647059  1857.33324418]
------
Step:11, Action:North
State  288
Old Q Values:  [19205.00316131 -6396.61506955 -5588.09647059  1857.33324418]
New Q values:  [ 8205.90643892 -6396.61506955 -5588.09647059  1857.33324418]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1067.71312533  1748.35058134 -8220.10378799   911.8961227 ]
------
Step:12, Action:South
State  218
Old Q Values:  [ 409.10562358 2184.8079221     0.         1540.00681929]
New Q values:  [ 409.10562358 3335.09510052    0.         1540.00681929]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8205.90643892 -6396.61506955 -5588.09647059  1857.33324418]
------
Step:13, Action:North
State  288
Old Q Values:  [ 8205.90643892 -6396.61506955 -5588.09647059  1857.33324418]
New Q values:  [ 3806.26774997 -6396.61506955 -5588.09647059  1857.33324418]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1067.71312533  1748.35058134 -8220.10378799   911.8961227 ]
------
Step:14, Action:South
State  216
Old Q Values:  [ 1067.71312533  1748.35058134 -8220.10378799   911.8961227 ]
New Q values:  [ 1067.71312533  1840.62055753 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  46
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3806.26774997 -6396.61506955 -5588.09647059  1857.33324418]
------
Step:15, Action:North
State  288
Old Q Values:  [ 3806.26774997 -6396.61506955 -5588.09647059  1857.33324418]
New Q values:  [ 2074.09326725 -6396.61506955 -5588.09647059  1857.33324418]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1067.71312533  1840.62055753 -8220.10378799   911.8961227 ]
------
Step:16, Action:South
State  216
Old Q Values:  [ 1067.71312533  1840.62055753 -8220.10378799   911.8961227 ]
New Q values:  [ 1067.71312533  1357.87620318 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2074.09326725 -6396.61506955 -5588.09647059  1857.33324418]
------
Step:17, Action:North
State  288
Old Q Values:  [ 2074.09326725 -6396.61506955 -5588.09647059  1857.33324418]
New Q values:  [ 1236.40016785 -6396.61506955 -5588.09647059  1857.33324418]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1067.71312533  1357.87620318 -8220.10378799   911.8961227 ]
------
Step:18, Action:South
State  216
Old Q Values:  [ 1067.71312533  1357.87620318 -8220.10378799   911.8961227 ]
New Q values:  [ 1067.71312533  1099.75045453 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1236.40016785 -6396.61506955 -5588.09647059  1857.33324418]
------
Step:19, Action:West
State  288
Old Q Values:  [ 1236.40016785 -6396.61506955 -5588.09647059  1857.33324418]
New Q values:  [ 1236.40016785 -6396.61506955 -5588.09647059  1147.82466048]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1232.03390306 -168.92307549  835.31385151 1351.63787605]
------
Step:20, Action:West
State  273
Old Q Values:  [1232.03390306 -168.92307549  835.31385151 1351.63787605]
New Q values:  [ 1232.03390306  -168.92307549   835.31385151 65646.71874751]
Reward: 100009  Episode Reward:  100050
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1236.40016785 -6396.61506955 -5588.09647059  1147.82466048]
------
Step:1, Action:North
State  288
Old Q Values:  [ 1236.40016785 -6396.61506955 -5588.09647059  1147.82466048]
New Q values:  [18447.42687867 -6396.61506955 -5588.09647059  1147.82466048]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[59824.88937175  9463.47297556 -4584.50430574 -1713.91177491]
------
Step:2, Action:North
State  208
Old Q Values:  [59824.88937175  9463.47297556 -4584.50430574 -1713.91177491]
New Q values:  [18725.21772745  9463.47297556 -4584.50430574 -1713.91177491]
Reward: -9991  Episode Reward:  -9982
xxxxx
x..gx
x.  x
x.. x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2075.96109824   26.73544252  836.91792613  123.6214372 ]
------
Step:1, Action:North
State  261
Old Q Values:  [2075.96109824   26.73544252  836.91792613  123.6214372 ]
New Q values:  [1227.94707446   26.73544252  836.91792613  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1307.20878388   816.03536634 -2772.22255708   262.76946019]
------
Step:2, Action:North
State  181
Old Q Values:  [ 1307.20878388   816.03536634 -2772.22255708   262.76946019]
New Q values:  [ 1887.65238355   816.03536634 -2772.22255708   262.76946019]
Reward: 9  Episode Reward:  18
xxxxx
xa..x
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4531.22956665  751.03868094 -120.29354603]
------
Step:3, Action:South
State  103
Old Q Values:  [ 221.30610858 2600.37085328  238.35800069    0.        ]
New Q values:  [ 221.30610858 4973.13093058  238.35800069    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xa. x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1097.52877193  2540.33330616 13111.94196423  1554.80203889]
------
Step:4, Action:East
State  183
Old Q Values:  [ 1097.52877193  2540.33330616 13111.94196423  1554.80203889]
New Q values:  [ 1097.52877193  2540.33330616 16884.36836041  1554.80203889]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  3.87806386e+04  2.17939995e+03]
------
Step:5, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  3.87806386e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  3.99534672e+04  2.17939995e+03]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[81472.70582754  1033.4360849    790.72804752  1050.85266124]
------
Step:6, Action:North
State  210
Old Q Values:  [81472.70582754  1033.4360849    790.72804752  1050.85266124]
New Q values:  [77884.55164642  1033.4360849    790.72804752  1050.85266124]
Reward: 9  Episode Reward:  34
xxxxx
x .ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 33256.45600344  27549.5211885    -180.00807518 150966.89771801]
------
Step:7, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -1.52065232e+03 -3.22965309e-01  6.90611436e+02]
New Q values:  [ 1.39201587e+02 -1.52065232e+03 -3.22965309e-01  8.46858024e+02]
Reward: 9  Episode Reward:  43
xxxxx
x a x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1277.68826995  1884.04483041]
------
Step:8, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1277.68826995  1884.04483041]
New Q values:  [ -253.44886264 -1902.20915811  1277.68826995  2112.38680216]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4531.22956665  751.03868094 -120.29354603]
------
Step:9, Action:South
State  111
Old Q Values:  [-177.44732869 4531.22956665  751.03868094 -120.29354603]
New Q values:  [-177.44732869 2831.82723814  751.03868094 -120.29354603]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 3059.00845116 3399.78470493  154.04646645]
------
Step:10, Action:South
State  189
Old Q Values:  [ 275.08817949 3059.00845116 3399.78470493  154.04646645]
New Q values:  [ 275.08817949 1591.3875028  3399.78470493  154.04646645]
Reward: -1  Episode Reward:  40
xxxxx
x g x
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1227.94707446   26.73544252  836.91792613  123.6214372 ]
------
Step:11, Action:North
State  261
Old Q Values:  [1227.94707446   26.73544252  836.91792613  123.6214372 ]
New Q values:  [1510.51424126   26.73544252  836.91792613  123.6214372 ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1591.3875028  3399.78470493  154.04646645]
------
Step:12, Action:South
State  189
Old Q Values:  [ 275.08817949 1591.3875028  3399.78470493  154.04646645]
New Q values:  [ 275.08817949 1089.1092735  3399.78470493  154.04646645]
Reward: -1  Episode Reward:  38
xxxxx
x g x
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1510.51424126   26.73544252  836.91792613  123.6214372 ]
------
Step:13, Action:North
State  261
Old Q Values:  [1510.51424126   26.73544252  836.91792613  123.6214372 ]
New Q values:  [1623.54110798   26.73544252  836.91792613  123.6214372 ]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949 1089.1092735  3399.78470493  154.04646645]
------
Step:14, Action:South
State  189
Old Q Values:  [ 275.08817949 1089.1092735  3399.78470493  154.04646645]
New Q values:  [ 275.08817949  922.1060418  3399.78470493  154.04646645]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1623.54110798   26.73544252  836.91792613  123.6214372 ]
------
Step:15, Action:North
State  261
Old Q Values:  [1623.54110798   26.73544252  836.91792613  123.6214372 ]
New Q values:  [1668.75185467   26.73544252  836.91792613  123.6214372 ]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 275.08817949  922.1060418  3399.78470493  154.04646645]
------
Step:16, Action:East
State  189
Old Q Values:  [ 275.08817949  922.1060418  3399.78470493  154.04646645]
New Q values:  [ 275.08817949  922.1060418  2587.14635017  154.04646645]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  4092.774894   2193.27799086  568.38654082]
------
Step:17, Action:South
State  204
Old Q Values:  [   0.         2790.09447719 3881.10435421  441.58769553]
New Q values:  [   0.         8532.12995187 3881.10435421  441.58769553]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197 24702.30720332   790.30511964]
------
Step:18, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  6530.24180316  1572.93869065]
New Q values:  [ 3514.02111757 -8521.23367799 68151.72478486  1572.93869065]
Reward: 100009  Episode Reward:  100052
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2982.08250269 -5704.51612281  2597.6513216  -5679.36893145]
------
Step:1, Action:North
State  261
Old Q Values:  [1668.75185467   26.73544252  836.91792613  123.6214372 ]
New Q values:  [1239.19645693   26.73544252  836.91792613  123.6214372 ]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1887.65238355   816.03536634 -2772.22255708   262.76946019]
------
Step:2, Action:North
State  180
Old Q Values:  [ -553.96922156  3835.29913541  5400.5520708  -4966.32149798]
New Q values:  [-5631.58166229  3835.29913541  5400.5520708  -4966.32149798]
Reward: -10001  Episode Reward:  -9992
xxxxx
xg..x
x ..x
x ..x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18725.21772745  9463.47297556 -4584.50430574 -1713.91177491]
------
Step:1, Action:South
State  208
Old Q Values:  [18725.21772745  9463.47297556 -4584.50430574 -1713.91177491]
New Q values:  [18725.21772745  9325.01725382 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18447.42687867 -6396.61506955 -5588.09647059  1147.82466048]
------
Step:2, Action:North
State  288
Old Q Values:  [18447.42687867 -6396.61506955 -5588.09647059  1147.82466048]
New Q values:  [12995.9360697  -6396.61506955 -5588.09647059  1147.82466048]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18725.21772745  9325.01725382 -4584.50430574 -1713.91177491]
------
Step:3, Action:North
State  208
Old Q Values:  [18725.21772745  9325.01725382 -4584.50430574 -1713.91177491]
New Q values:  [ 7743.54449804  9325.01725382 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -1.52065232e+03 -3.22965309e-01  8.46858024e+02]
------
Step:4, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -1.52065232e+03 -3.22965309e-01  8.46858024e+02]
New Q values:  [ 1.39201587e+02 -1.52065232e+03 -3.22965309e-01  9.77859250e+02]
Reward: 9  Episode Reward:  16
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1277.68826995  2112.38680216]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1277.68826995  2112.38680216]
New Q values:  [ -253.44886264 -1902.20915811  1277.68826995  1221.32392811]
Reward: 9  Episode Reward:  25
xxxxx
xa  x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  945.61729938 1236.56402417 -252.78192178]
------
Step:6, Action:East
State  107
Old Q Values:  [-252.35169558  945.61729938 1236.56402417 -252.78192178]
New Q values:  [-252.35169558  945.61729938  790.4386324  -252.78192178]
Reward: -1  Episode Reward:  24
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   988.0434091    377.81590642]
------
Step:7, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1564.1261932    -98.79429602]
New Q values:  [-9594.56523706 -8069.05606225  1414.91245603   -98.79429602]
Reward: -1  Episode Reward:  23
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  2632.87326251   660.86649319 -5740.77528236]
------
Step:8, Action:South
State  136
Old Q Values:  [-2129.37064562  2632.87326251   660.86649319 -5740.77528236]
New Q values:  [-2129.37064562  3850.05448115   660.86649319 -5740.77528236]
Reward: -1  Episode Reward:  22
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7743.54449804  9325.01725382 -4584.50430574 -1713.91177491]
------
Step:9, Action:South
State  208
Old Q Values:  [ 7743.54449804  9325.01725382 -4584.50430574 -1713.91177491]
New Q values:  [ 7743.54449804  7628.18772244 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  21
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12995.9360697  -6396.61506955 -5588.09647059  1147.82466048]
------
Step:10, Action:North
State  288
Old Q Values:  [12995.9360697  -6396.61506955 -5588.09647059  1147.82466048]
New Q values:  [ 7520.83777729 -6396.61506955 -5588.09647059  1147.82466048]
Reward: -1  Episode Reward:  20
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7743.54449804  7628.18772244 -4584.50430574 -1713.91177491]
------
Step:11, Action:North
State  208
Old Q Values:  [ 7743.54449804  7628.18772244 -4584.50430574 -1713.91177491]
New Q values:  [ 4251.83414356  7628.18772244 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  19
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  3850.05448115   660.86649319 -5740.77528236]
------
Step:12, Action:South
State  138
Old Q Values:  [ 1.39201587e+02 -1.52065232e+03 -3.22965309e-01  9.77859250e+02]
New Q values:  [ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  9.77859250e+02]
Reward: -1  Episode Reward:  18
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1067.71312533  1099.75045453 -8220.10378799   911.8961227 ]
------
Step:13, Action:South
State  208
Old Q Values:  [ 4251.83414356  7628.18772244 -4584.50430574 -1713.91177491]
New Q values:  [ 4251.83414356  5306.92642216 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  17
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7520.83777729 -6396.61506955 -5588.09647059  1147.82466048]
------
Step:14, Action:North
State  288
Old Q Values:  [ 7520.83777729 -6396.61506955 -5588.09647059  1147.82466048]
New Q values:  [ 4599.81303757 -6396.61506955 -5588.09647059  1147.82466048]
Reward: -1  Episode Reward:  16
xxxxx
x  gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4251.83414356  5306.92642216 -4584.50430574 -1713.91177491]
------
Step:15, Action:South
State  208
Old Q Values:  [ 4251.83414356  5306.92642216 -4584.50430574 -1713.91177491]
New Q values:  [ 4251.83414356  3502.11448014 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  15
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4599.81303757 -6396.61506955 -5588.09647059  1147.82466048]
------
Step:16, Action:North
State  288
Old Q Values:  [ 4599.81303757 -6396.61506955 -5588.09647059  1147.82466048]
New Q values:  [ 2169.25035139 -6396.61506955 -5588.09647059  1147.82466048]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1067.71312533  1099.75045453 -8220.10378799   911.8961227 ]
------
Step:17, Action:South
State  208
Old Q Values:  [ 4251.83414356  3502.11448014 -4584.50430574 -1713.91177491]
New Q values:  [ 4251.83414356  2051.02089747 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  13
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2169.25035139 -6396.61506955 -5588.09647059  1147.82466048]
------
Step:18, Action:North
State  288
Old Q Values:  [ 2169.25035139 -6396.61506955 -5588.09647059  1147.82466048]
New Q values:  [ 1197.02527691 -6396.61506955 -5588.09647059  1147.82466048]
Reward: -1  Episode Reward:  12
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1067.71312533  1099.75045453 -8220.10378799   911.8961227 ]
------
Step:19, Action:South
State  208
Old Q Values:  [ 4251.83414356  2051.02089747 -4584.50430574 -1713.91177491]
New Q values:  [ 4251.83414356  1178.91594206 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  11
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1197.02527691 -6396.61506955 -5588.09647059  1147.82466048]
------
Step:20, Action:North
State  288
Old Q Values:  [ 1197.02527691 -6396.61506955 -5588.09647059  1147.82466048]
New Q values:  [ 1753.76035383 -6396.61506955 -5588.09647059  1147.82466048]
Reward: -1  Episode Reward:  10
xxxxx
x  gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4251.83414356  1178.91594206 -4584.50430574 -1713.91177491]
------
Step:21, Action:South
State  208
Old Q Values:  [ 4251.83414356  1178.91594206 -4584.50430574 -1713.91177491]
New Q values:  [ 4251.83414356   997.09448297 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  9
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1753.76035383 -6396.61506955 -5588.09647059  1147.82466048]
------
Step:22, Action:North
State  288
Old Q Values:  [ 1753.76035383 -6396.61506955 -5588.09647059  1147.82466048]
New Q values:  [ 1976.4543846  -6396.61506955 -5588.09647059  1147.82466048]
Reward: -1  Episode Reward:  8
xxxxx
x  gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4251.83414356   997.09448297 -4584.50430574 -1713.91177491]
------
Step:23, Action:South
State  208
Old Q Values:  [ 4251.83414356   997.09448297 -4584.50430574 -1713.91177491]
New Q values:  [ 4251.83414356   991.17410857 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  7
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1976.4543846  -6396.61506955 -5588.09647059  1147.82466048]
------
Step:24, Action:North
State  288
Old Q Values:  [ 1976.4543846  -6396.61506955 -5588.09647059  1147.82466048]
New Q values:  [ 2065.53199691 -6396.61506955 -5588.09647059  1147.82466048]
Reward: -1  Episode Reward:  6
xxxxx
x  gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4251.83414356   991.17410857 -4584.50430574 -1713.91177491]
------
Step:25, Action:South
State  208
Old Q Values:  [ 4251.83414356   991.17410857 -4584.50430574 -1713.91177491]
New Q values:  [ 4251.83414356  1015.5292425  -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  5
xxxxx
x g x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2065.53199691 -6396.61506955 -5588.09647059  1147.82466048]
------
Step:26, Action:North
State  288
Old Q Values:  [ 2065.53199691 -6396.61506955 -5588.09647059  1147.82466048]
New Q values:  [ 1155.53793512 -6396.61506955 -5588.09647059  1147.82466048]
Reward: -1  Episode Reward:  4
xxxxx
x   x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1067.71312533  1099.75045453 -8220.10378799   911.8961227 ]
------
Step:27, Action:South
State  210
Old Q Values:  [77884.55164642  1033.4360849    790.72804752  1050.85266124]
New Q values:  [77884.55164642   759.4358145    790.72804752  1050.85266124]
Reward: -1  Episode Reward:  3
xxxxx
x   x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1155.53793512 -6396.61506955 -5588.09647059  1147.82466048]
------
Step:28, Action:North
State  288
Old Q Values:  [ 1155.53793512 -6396.61506955 -5588.09647059  1147.82466048]
New Q values:  [23826.98066797 -6396.61506955 -5588.09647059  1147.82466048]
Reward: -1  Episode Reward:  2
xxxxx
x   x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[77884.55164642   759.4358145    790.72804752  1050.85266124]
------
Step:29, Action:North
State  210
Old Q Values:  [77884.55164642   759.4358145    790.72804752  1050.85266124]
New Q values:  [31446.57843359   759.4358145    790.72804752  1050.85266124]
Reward: -1  Episode Reward:  1
xxxxx
x  ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  9.77859250e+02]
------
Step:30, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  9.77859250e+02]
New Q values:  [ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  6.86956723e+02]
Reward: -1  Episode Reward:  0
xxxxx
x a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   988.0434091    377.81590642]
------
Step:31, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1414.91245603   -98.79429602]
New Q values:  [-9594.56523706 -8069.05606225  1720.38132676   -98.79429602]
Reward: -1  Episode Reward:  -1
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  3850.05448115   660.86649319 -5740.77528236]
------
Step:32, Action:South
State  136
Old Q Values:  [-2129.37064562  3850.05448115   660.86649319 -5740.77528236]
New Q values:  [-2129.37064562  2814.97203553   660.86649319 -5740.77528236]
Reward: -1  Episode Reward:  -2
xxxxx
xg  x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4251.83414356  1015.5292425  -4584.50430574 -1713.91177491]
------
Step:33, Action:North
State  208
Old Q Values:  [ 4251.83414356  1015.5292425  -4584.50430574 -1713.91177491]
New Q values:  [ 2544.62526808  1015.5292425  -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -3
xxxxx
x gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  2814.97203553   660.86649319 -5740.77528236]
------
Step:34, Action:South
State  136
Old Q Values:  [-2129.37064562  2814.97203553   660.86649319 -5740.77528236]
New Q values:  [-2129.37064562  1888.77639464   660.86649319 -5740.77528236]
Reward: -1  Episode Reward:  -4
xxxxx
x  gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2544.62526808  1015.5292425  -4584.50430574 -1713.91177491]
------
Step:35, Action:South
State  208
Old Q Values:  [ 2544.62526808  1015.5292425  -4584.50430574 -1713.91177491]
New Q values:  [ 2544.62526808  7553.70589739 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  -5
xxxxx
x   x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[23826.98066797 -6396.61506955 -5588.09647059  1147.82466048]
------
Step:36, Action:West
State  288
Old Q Values:  [23826.98066797 -6396.61506955 -5588.09647059  1147.82466048]
New Q values:  [23826.98066797 -6396.61506955 -5588.09647059 20158.54548845]
Reward: 9  Episode Reward:  4
xxxxx
x   x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  -168.92307549   835.31385151 65646.71874751]
------
Step:37, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 68151.72478486  1572.93869065]
New Q values:  [ 3514.02111757 -8521.23367799 68151.72478486  5735.23907335]
Reward: 9  Episode Reward:  13
xxxxx
x   x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[17002.21199032 12764.58618105  2805.88401822  1875.31501677]
------
Step:38, Action:North
State  260
Old Q Values:  [ 2982.08250269 -5704.51612281  2597.6513216  -5679.36893145]
New Q values:  [ 6210.23526796 -5704.51612281  2597.6513216  -5679.36893145]
Reward: 9  Episode Reward:  22
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243 16706.67422296     0.        ]
------
Step:39, Action:East
State  176
Old Q Values:  [103770.24881615   1621.55095326  73423.54542925      0.        ]
New Q values:  [103770.24881615   1621.55095326  90786.49387936      0.        ]
Reward: 100009  Episode Reward:  100031
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1141.2672067   -222.69561341  1460.9765133 ]
------
Step:1, Action:West
State  192
Old Q Values:  [3.89777037e-01 1.86761894e+03 5.71267650e+02 4.70558569e+03]
New Q values:  [3.89777037e-01 1.86761894e+03 5.71267650e+02 2.45392999e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1887.65238355   816.03536634 -2772.22255708   262.76946019]
------
Step:2, Action:North
State  189
Old Q Values:  [ 275.08817949  922.1060418  2587.14635017  154.04646645]
New Q values:  [ 337.36081627  922.1060418  2587.14635017  154.04646645]
Reward: 9  Episode Reward:  18
xxxxx
xag.x
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   739.75181491 -2165.66138672  -180.6       ]
------
Step:3, Action:South
State  100
Old Q Values:  [   0.         6523.04371704  443.65288404    0.        ]
New Q values:  [   0.         4228.78310806  443.65288404    0.        ]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
xa  x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5631.58166229  3835.29913541  5400.5520708  -4966.32149798]
------
Step:4, Action:East
State  180
Old Q Values:  [-5631.58166229  3835.29913541  5400.5520708  -4966.32149798]
New Q values:  [-5631.58166229  3835.29913541  3387.45329652 -4966.32149798]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  4092.774894   2193.27799086  568.38654082]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.86761894e+03 5.71267650e+02 2.45392999e+03]
New Q values:  [3.89777037e-01 2.11979650e+04 5.71267650e+02 2.45392999e+03]
Reward: 9  Episode Reward:  25
xxxxx
x ..x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 68151.72478486  5735.23907335]
------
Step:6, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 68151.72478486  5735.23907335]
New Q values:  [ 3514.02111757 -8521.23367799 34414.18411434  5735.23907335]
Reward: 9  Episode Reward:  34
xxxxx
x g.x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[23826.98066797 -6396.61506955 -5588.09647059 20158.54548845]
------
Step:7, Action:North
State  288
Old Q Values:  [23826.98066797 -6396.61506955 -5588.09647059 20158.54548845]
New Q values:  [11796.30403641 -6396.61506955 -5588.09647059 20158.54548845]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2544.62526808  7553.70589739 -4584.50430574 -1713.91177491]
------
Step:8, Action:South
State  210
Old Q Values:  [31446.57843359   759.4358145    790.72804752  1050.85266124]
New Q values:  [31446.57843359  6350.73797233   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11796.30403641 -6396.61506955 -5588.09647059 20158.54548845]
------
Step:9, Action:North
State  288
Old Q Values:  [11796.30403641 -6396.61506955 -5588.09647059 20158.54548845]
New Q values:  [14151.89514464 -6396.61506955 -5588.09647059 20158.54548845]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[31446.57843359  6350.73797233   790.72804752  1050.85266124]
------
Step:10, Action:North
State  210
Old Q Values:  [31446.57843359  6350.73797233   790.72804752  1050.85266124]
New Q values:  [57874.10068884  6350.73797233   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  40
xxxxx
x .ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 33256.45600344  27549.5211885    -180.00807518 150966.89771801]
------
Step:11, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  6.86956723e+02]
New Q values:  [ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  5.76595712e+02]
Reward: 9  Episode Reward:  49
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   988.0434091    377.81590642]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   988.0434091    377.81590642]
New Q values:  [ -281.736      -1150.91067548   567.59607719   377.81590642]
Reward: -1  Episode Reward:  48
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  5.76595712e+02]
------
Step:13, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  5.76595712e+02]
New Q values:  [ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  4.00317108e+02]
Reward: -1  Episode Reward:  47
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   567.59607719   377.81590642]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   567.59607719   377.81590642]
New Q values:  [ -281.736      -1150.91067548   346.53356324   377.81590642]
Reward: -1  Episode Reward:  46
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  4.00317108e+02]
------
Step:15, Action:West
State  130
Old Q Values:  [ 33256.45600344  27549.5211885    -180.00807518 150966.89771801]
New Q values:  [33256.45600344 27549.5211885   -180.00807518 98109.74760908]
Reward: -1  Episode Reward:  45
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 125745.29507292]
------
Step:16, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   346.53356324   377.81590642]
New Q values:  [ -281.736      -1150.91067548   346.53356324   434.21155238]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  945.61729938  790.4386324  -252.78192178]
------
Step:17, Action:South
State  107
Old Q Values:  [-252.35169558  945.61729938  790.4386324  -252.78192178]
New Q values:  [-252.35169558 1129.50204605  790.4386324  -252.78192178]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         2506.18375434    0.        ]
------
Step:18, Action:East
State  187
Old Q Values:  [ 320.07341842    0.         2506.18375434    0.        ]
New Q values:  [ 320.07341842    0.         1803.59625152    0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  2672.40916596  1278.19575341]
------
Step:19, Action:East
State  200
Old Q Values:  [ 169.9257398  4092.774894   2193.27799086  568.38654082]
New Q values:  [ 169.9257398  4092.774894   1206.6363327   568.38654082]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1067.71312533  1099.75045453 -8220.10378799   911.8961227 ]
------
Step:20, Action:South
State  216
Old Q Values:  [ 1067.71312533  1099.75045453 -8220.10378799   911.8961227 ]
New Q values:  [ 1067.71312533  6486.86382835 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14151.89514464 -6396.61506955 -5588.09647059 20158.54548845]
------
Step:21, Action:West
State  288
Old Q Values:  [14151.89514464 -6396.61506955 -5588.09647059 20158.54548845]
New Q values:  [14151.89514464 -6396.61506955 -5588.09647059 27756.83381963]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  -168.92307549   835.31385151 65646.71874751]
------
Step:22, Action:West
State  273
Old Q Values:  [ 1232.03390306  -168.92307549   835.31385151 65646.71874751]
New Q values:  [ 1232.03390306  -168.92307549   835.31385151 91364.7510961 ]
Reward: 100009  Episode Reward:  100048
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  4.00317108e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  4.00317108e+02]
New Q values:  [ 139.20158703 -278.93579121   -0.32296531  295.79030887]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   346.53356324   434.21155238]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1277.68826995  1221.32392811]
New Q values:  [ -253.44886264 -1902.20915811  1277.68826995  1343.47774269]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2831.82723814  751.03868094 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 2831.82723814  751.03868094 -120.29354603]
New Q values:  [-177.44732869 1914.27480031  751.03868094 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627  922.1060418  2587.14635017  154.04646645]
------
Step:4, Action:South
State  188
Old Q Values:  [-6523.78898263  4132.20912457  1485.98871489     0.        ]
New Q values:  [-6523.78898263  3521.35423022  1485.98871489     0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xg. x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 6210.23526796 -5704.51612281  2597.6513216  -5679.36893145]
------
Step:5, Action:East
State  260
Old Q Values:  [ 6210.23526796 -5704.51612281  2597.6513216  -5679.36893145]
New Q values:  [ 6210.23526796 -5704.51612281 11368.71576294 -5679.36893145]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x . x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 34414.18411434  5735.23907335]
------
Step:6, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 34414.18411434  5735.23907335]
New Q values:  [ 3514.02111757 -8521.23367799 22098.12379163  5735.23907335]
Reward: 9  Episode Reward:  54
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14151.89514464 -6396.61506955 -5588.09647059 27756.83381963]
------
Step:7, Action:West
State  288
Old Q Values:  [14151.89514464 -6396.61506955 -5588.09647059 27756.83381963]
New Q values:  [14151.89514464 -6396.61506955 -5588.09647059 17731.57066534]
Reward: -1  Episode Reward:  53
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 22098.12379163  5735.23907335]
------
Step:8, Action:East
State  273
Old Q Values:  [ 1232.03390306  -168.92307549   835.31385151 91364.7510961 ]
New Q values:  [ 1232.03390306  -168.92307549  5652.99674021 91364.7510961 ]
Reward: -1  Episode Reward:  52
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14151.89514464 -6396.61506955 -5588.09647059 17731.57066534]
------
Step:9, Action:West
State  288
Old Q Values:  [14151.89514464 -6396.61506955 -5588.09647059 17731.57066534]
New Q values:  [14151.89514464 -6396.61506955 -5588.09647059 13721.46540362]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 22098.12379163  5735.23907335]
------
Step:10, Action:East
State  273
Old Q Values:  [ 1232.03390306  -168.92307549  5652.99674021 91364.7510961 ]
New Q values:  [ 1232.03390306  -168.92307549  6506.16723947 91364.7510961 ]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14151.89514464 -6396.61506955 -5588.09647059 13721.46540362]
------
Step:11, Action:West
State  288
Old Q Values:  [14151.89514464 -6396.61506955 -5588.09647059 13721.46540362]
New Q values:  [14151.89514464 -6396.61506955 -5588.09647059 32897.41149028]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  -168.92307549  6506.16723947 91364.7510961 ]
------
Step:12, Action:West
State  273
Old Q Values:  [ 1232.03390306  -168.92307549  6506.16723947 91364.7510961 ]
New Q values:  [ 1232.03390306  -168.92307549  6506.16723947 36917.05937552]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x .gx
xa  x
xxxxx
Step:13, Action:West
State  261
Old Q Values:  [1239.19645693   26.73544252  836.91792613  123.6214372 ]
New Q values:  [1239.19645693   26.73544252  836.91792613  240.60751196]
Reward: -301  Episode Reward:  -253
xxxxx
x  gx
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1239.19645693   26.73544252  836.91792613  240.60751196]
------
Step:14, Action:North
State  261
Old Q Values:  [1239.19645693   26.73544252  836.91792613  240.60751196]
New Q values:  [1061.37429784   26.73544252  836.91792613  240.60751196]
Reward: -1  Episode Reward:  -254
xxxxx
x g x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1887.65238355   816.03536634 -2772.22255708   262.76946019]
------
Step:15, Action:North
State  181
Old Q Values:  [ 1887.65238355   816.03536634 -2772.22255708   262.76946019]
New Q values:  [  976.38649789   816.03536634 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  -255
xxxxx
xa gx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   739.75181491 -2165.66138672  -180.6       ]
------
Step:16, Action:South
State  111
Old Q Values:  [-177.44732869 1914.27480031  751.03868094 -120.29354603]
New Q values:  [-177.44732869 1058.02586949  751.03868094 -120.29354603]
Reward: -1  Episode Reward:  -256
xxxxx
x   x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  976.38649789   816.03536634 -2772.22255708   262.76946019]
------
Step:17, Action:North
State  183
Old Q Values:  [ 1097.52877193  2540.33330616 16884.36836041  1554.80203889]
New Q values:  [  755.81926962  2540.33330616 16884.36836041  1554.80203889]
Reward: -1  Episode Reward:  -257
xxxxx
xa  x
x . x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1058.02586949  751.03868094 -120.29354603]
------
Step:18, Action:South
State  111
Old Q Values:  [-177.44732869 1058.02586949  751.03868094 -120.29354603]
New Q values:  [-177.44732869 5487.92085592  751.03868094 -120.29354603]
Reward: -1  Episode Reward:  -258
xxxxx
x   x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  755.81926962  2540.33330616 16884.36836041  1554.80203889]
------
Step:19, Action:East
State  177
Old Q Values:  [33510.55723334 29124.7048717  20101.94546536     0.        ]
New Q values:  [33510.55723334 29124.7048717  68405.56768954     0.        ]
Reward: 90009  Episode Reward:  89751
xxxxx
x   x
x g x
x   x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14151.89514464 -6396.61506955 -5588.09647059 32897.41149028]
------
Step:1, Action:West
State  288
Old Q Values:  [14151.89514464 -6396.61506955 -5588.09647059 32897.41149028]
New Q values:  [14151.89514464 -6396.61506955 -5588.09647059 19793.8017336 ]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 22098.12379163  5735.23907335]
------
Step:2, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 22098.12379163  5735.23907335]
New Q values:  [ 3514.02111757 -8521.23367799 14776.79003673  5735.23907335]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x...x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14151.89514464 -6396.61506955 -5588.09647059 19793.8017336 ]
------
Step:3, Action:West
State  288
Old Q Values:  [14151.89514464 -6396.61506955 -5588.09647059 19793.8017336 ]
New Q values:  [14151.89514464 -6396.61506955 -5588.09647059 12349.95770446]
Reward: -1  Episode Reward:  7
xxxxx
x. .x
x.g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 14776.79003673  5735.23907335]
------
Step:4, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 14776.79003673  5735.23907335]
New Q values:  [ 3514.02111757 -8521.23367799 10155.68455808  5735.23907335]
Reward: -1  Episode Reward:  6
xxxxx
x. .x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14151.89514464 -6396.61506955 -5588.09647059 12349.95770446]
------
Step:5, Action:North
State  288
Old Q Values:  [14151.89514464 -6396.61506955 -5588.09647059 12349.95770446]
New Q values:  [23028.38826451 -6396.61506955 -5588.09647059 12349.95770446]
Reward: 9  Episode Reward:  15
xxxxx
x. .x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[57874.10068884  6350.73797233   790.72804752  1050.85266124]
------
Step:6, Action:North
State  210
Old Q Values:  [57874.10068884  6350.73797233   790.72804752  1050.85266124]
New Q values:  [23243.7773682   6350.73797233   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  24
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 139.20158703 -278.93579121   -0.32296531  295.79030887]
------
Step:7, Action:West
State  138
Old Q Values:  [ 139.20158703 -278.93579121   -0.32296531  295.79030887]
New Q values:  [ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  5.20759446e+02]
Reward: -1  Episode Reward:  23
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1277.68826995  1343.47774269]
------
Step:8, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1277.68826995  1343.47774269]
New Q values:  [ -253.44886264 -1902.20915811  1277.68826995   881.64171089]
Reward: 9  Episode Reward:  32
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1129.50204605  790.4386324  -252.78192178]
------
Step:9, Action:South
State  107
Old Q Values:  [-252.35169558 1129.50204605  790.4386324  -252.78192178]
New Q values:  [-252.35169558 1056.31164251  790.4386324  -252.78192178]
Reward: 9  Episode Reward:  41
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 189.19059642    0.         1997.0360803  -178.98      ]
------
Step:10, Action:North
State  181
Old Q Values:  [  976.38649789   816.03536634 -2772.22255708   262.76946019]
New Q values:  [  456.41785852   816.03536634 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  40
xxxxx
xag x
x . x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         221.54419787 -764.93196255    0.        ]
------
Step:11, Action:South
State  105
Old Q Values:  [-180.6         221.54419787 -764.93196255    0.        ]
New Q values:  [-180.6         332.82828905 -764.93196255    0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  456.41785852   816.03536634 -2772.22255708   262.76946019]
------
Step:12, Action:South
State  181
Old Q Values:  [  456.41785852   816.03536634 -2772.22255708   262.76946019]
New Q values:  [  456.41785852   650.22643589 -2772.22255708   262.76946019]
Reward: 9  Episode Reward:  48
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1061.37429784   26.73544252  836.91792613  240.60751196]
------
Step:13, Action:North
State  257
Old Q Values:  [17002.21199032 12764.58618105  2805.88401822  1875.31501677]
New Q values:  [27321.95510299 12764.58618105  2805.88401822  1875.31501677]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 29124.7048717  68405.56768954     0.        ]
------
Step:14, Action:North
State  181
Old Q Values:  [  456.41785852   650.22643589 -2772.22255708   262.76946019]
New Q values:  [  403.89268788   650.22643589 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  46
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   739.75181491 -2165.66138672  -180.6       ]
------
Step:15, Action:South
State  109
Old Q Values:  [ -241.10880094   739.75181491 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   490.36865673 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  45
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  403.89268788   650.22643589 -2772.22255708   262.76946019]
------
Step:16, Action:South
State  181
Old Q Values:  [  403.89268788   650.22643589 -2772.22255708   262.76946019]
New Q values:  [  403.89268788   577.90286371 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  44
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1061.37429784   26.73544252  836.91792613  240.60751196]
------
Step:17, Action:North
State  260
Old Q Values:  [ 6210.23526796 -5704.51612281 11368.71576294 -5679.36893145]
New Q values:  [ 3634.08384781 -5704.51612281 11368.71576294 -5679.36893145]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5631.58166229  3835.29913541  3387.45329652 -4966.32149798]
------
Step:18, Action:South
State  181
Old Q Values:  [  403.89268788   577.90286371 -2772.22255708   262.76946019]
New Q values:  [  403.89268788   548.97343483 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1061.37429784   26.73544252  836.91792613  240.60751196]
------
Step:19, Action:North
State  261
Old Q Values:  [1061.37429784   26.73544252  836.91792613  240.60751196]
New Q values:  [588.64174958  26.73544252 836.91792613 240.60751196]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  403.89268788   548.97343483 -2772.22255708   262.76946019]
------
Step:20, Action:South
State  181
Old Q Values:  [  403.89268788   548.97343483 -2772.22255708   262.76946019]
New Q values:  [  403.89268788   470.06475177 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  40
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[588.64174958  26.73544252 836.91792613 240.60751196]
------
Step:21, Action:East
State  260
Old Q Values:  [ 3634.08384781 -5704.51612281 11368.71576294 -5679.36893145]
New Q values:  [ 3634.08384781 -5704.51612281  7593.5916726  -5679.36893145]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 10155.68455808  5735.23907335]
------
Step:22, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 10155.68455808  5735.23907335]
New Q values:  [ 3514.02111757 -8521.23367799 10970.19030259  5735.23907335]
Reward: -1  Episode Reward:  38
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[23028.38826451 -6396.61506955 -5588.09647059 12349.95770446]
------
Step:23, Action:North
State  288
Old Q Values:  [23028.38826451 -6396.61506955 -5588.09647059 12349.95770446]
New Q values:  [11476.86707502 -6396.61506955 -5588.09647059 12349.95770446]
Reward: -1  Episode Reward:  37
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2544.62526808  7553.70589739 -4584.50430574 -1713.91177491]
------
Step:24, Action:South
State  208
Old Q Values:  [ 2544.62526808  7553.70589739 -4584.50430574 -1713.91177491]
New Q values:  [ 2544.62526808  6725.86967029 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  36
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11476.86707502 -6396.61506955 -5588.09647059 12349.95770446]
------
Step:25, Action:West
State  288
Old Q Values:  [11476.86707502 -6396.61506955 -5588.09647059 12349.95770446]
New Q values:  [11476.86707502 -6396.61506955 -5588.09647059  8230.44017256]
Reward: -1  Episode Reward:  35
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 10970.19030259  5735.23907335]
------
Step:26, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 10970.19030259  5735.23907335]
New Q values:  [ 3514.02111757 -8521.23367799  7830.53624354  5735.23907335]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11476.86707502 -6396.61506955 -5588.09647059  8230.44017256]
------
Step:27, Action:North
State  288
Old Q Values:  [11476.86707502 -6396.61506955 -5588.09647059  8230.44017256]
New Q values:  [ 6607.9077311  -6396.61506955 -5588.09647059  8230.44017256]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2544.62526808  6725.86967029 -4584.50430574 -1713.91177491]
------
Step:28, Action:South
State  210
Old Q Values:  [23243.7773682   6350.73797233   790.72804752  1050.85266124]
New Q values:  [23243.7773682   5008.8272407    790.72804752  1050.85266124]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x . x
x gax
xxxxx
Step:29, Action:West
State  288
Old Q Values:  [ 6607.9077311  -6396.61506955 -5588.09647059  8230.44017256]
New Q values:  [ 6607.9077311  -6396.61506955 -5588.09647059  -239.29187921]
Reward: -10001  Episode Reward:  -9969
xxxxx
x   x
x . x
xg ax
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  403.89268788   470.06475177 -2772.22255708   262.76946019]
------
Step:1, Action:South
State  183
Old Q Values:  [  755.81926962  2540.33330616 16884.36836041  1554.80203889]
New Q values:  [  755.81926962  1272.6087003  16884.36836041  1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x . x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[588.64174958  26.73544252 836.91792613 240.60751196]
------
Step:2, Action:East
State  261
Old Q Values:  [588.64174958  26.73544252 836.91792613 240.60751196]
New Q values:  [  588.64174958    26.73544252 -3310.67195649   240.60751196]
Reward: -9991  Episode Reward:  -9982
xxxxx
x...x
x . x
x g.x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  -168.92307549  6506.16723947 36917.05937552]
------
Step:1, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  7830.53624354  5735.23907335]
New Q values:  [ 3514.02111757 -8521.23367799  7830.53624354  2476.08815422]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  588.64174958    26.73544252 -3310.67195649   240.60751196]
------
Step:2, Action:North
State  261
Old Q Values:  [  588.64174958    26.73544252 -3310.67195649   240.60751196]
New Q values:  [  381.87612537    26.73544252 -3310.67195649   240.60751196]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  403.89268788   470.06475177 -2772.22255708   262.76946019]
------
Step:3, Action:South
State  181
Old Q Values:  [  403.89268788   470.06475177 -2772.22255708   262.76946019]
New Q values:  [  403.89268788   301.98873832 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  381.87612537    26.73544252 -3310.67195649   240.60751196]
------
Step:4, Action:North
State  260
Old Q Values:  [ 3634.08384781 -5704.51612281  7593.5916726  -5679.36893145]
New Q values:  [-3396.37672025 -5704.51612281  7593.5916726  -5679.36893145]
Reward: -10001  Episode Reward:  -9984
xxxxx
x...x
xg. x
x  .x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  403.89268788   301.98873832 -2772.22255708   262.76946019]
------
Step:1, Action:North
State  181
Old Q Values:  [  403.89268788   301.98873832 -2772.22255708   262.76946019]
New Q values:  [  314.06767217   301.98873832 -2772.22255708   262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
xa gx
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   490.36865673 -2165.66138672  -180.6       ]
------
Step:2, Action:South
State  109
Old Q Values:  [ -241.10880094   490.36865673 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   289.76776434 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  314.06767217   301.98873832 -2772.22255708   262.76946019]
------
Step:3, Action:North
State  181
Old Q Values:  [  314.06767217   301.98873832 -2772.22255708   262.76946019]
New Q values:  [ 1771.40332564   301.98873832 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5487.92085592  751.03868094 -120.29354603]
------
Step:4, Action:South
State  110
Old Q Values:  [ -239.29051573 -3902.35021231   405.81070979  -180.6       ]
New Q values:  [ -239.29051573 -6410.9503443    405.81070979  -180.6       ]
Reward: -10001  Episode Reward:  -9994
xxxxx
x  .x
xg..x
x...x
xxxxx
xxxxx
x.g.x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  381.87612537    26.73544252 -3310.67195649   240.60751196]
------
Step:1, Action:North
State  261
Old Q Values:  [  381.87612537    26.73544252 -3310.67195649   240.60751196]
New Q values:  [  689.57144784    26.73544252 -3310.67195649   240.60751196]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1771.40332564   301.98873832 -2772.22255708   262.76946019]
------
Step:2, Action:North
State  181
Old Q Values:  [ 1771.40332564   301.98873832 -2772.22255708   262.76946019]
New Q values:  [ 2360.33758703   301.98873832 -2772.22255708   262.76946019]
Reward: 9  Episode Reward:  18
xxxxx
xa .x
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5487.92085592  751.03868094 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 5487.92085592  751.03868094 -120.29354603]
New Q values:  [-177.44732869 7259.87885049  751.03868094 -120.29354603]
Reward: -1  Episode Reward:  17
xxxxx
x  .x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  755.81926962  1272.6087003  16884.36836041  1554.80203889]
------
Step:4, Action:East
State  183
Old Q Values:  [  755.81926962  1272.6087003  16884.36836041  1554.80203889]
New Q values:  [  755.81926962  1272.6087003  11411.99720993  1554.80203889]
Reward: 9  Episode Reward:  26
xxxxx
x  .x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  1.55094996e+04  0.00000000e+00]
------
Step:5, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -2.00610230e+02  1.55094996e+04  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.00610230e+02  7.20972835e+03  0.00000000e+00]
Reward: 9  Episode Reward:  35
xxxxx
x  .x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 409.10562358 3335.09510052    0.         1540.00681929]
------
Step:6, Action:South
State  208
Old Q Values:  [ 2544.62526808  6725.86967029 -4584.50430574 -1713.91177491]
New Q values:  [ 2544.62526808  4678.12018745 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  44
xxxxx
x  .x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6607.9077311  -6396.61506955 -5588.09647059  -239.29187921]
------
Step:7, Action:North
State  288
Old Q Values:  [ 6607.9077311  -6396.61506955 -5588.09647059  -239.29187921]
New Q values:  [ 9615.6963029  -6396.61506955 -5588.09647059  -239.29187921]
Reward: -1  Episode Reward:  43
xxxxx
x  .x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[23243.7773682   5008.8272407    790.72804752  1050.85266124]
------
Step:8, Action:North
State  210
Old Q Values:  [23243.7773682   5008.8272407    790.72804752  1050.85266124]
New Q values:  [38735.83523     5008.8272407    790.72804752  1050.85266124]
Reward: 9  Episode Reward:  52
xxxxx
x  ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344 27549.5211885   -180.00807518 98109.74760908]
------
Step:9, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  5.20759446e+02]
New Q values:  [ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  3.37967244e+02]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   346.53356324   434.21155238]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   346.53356324   434.21155238]
New Q values:  [ -281.736      -1150.91067548   346.53356324   294.82783389]
Reward: -1  Episode Reward:  50
xxxxx
xa  x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -6410.9503443    405.81070979  -180.6       ]
------
Step:11, Action:East
State  111
Old Q Values:  [-177.44732869 7259.87885049  751.03868094 -120.29354603]
New Q values:  [-177.44732869 7259.87885049  403.77554135 -120.29354603]
Reward: -1  Episode Reward:  49
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   346.53356324   294.82783389]
------
Step:12, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1277.68826995   881.64171089]
New Q values:  [ -253.44886264 -1902.20915811   611.86548126   881.64171089]
Reward: -1  Episode Reward:  48
xxxxx
x  ax
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  3.37967244e+02]
------
Step:13, Action:West
State  138
Old Q Values:  [ 1.39201587e+02 -2.78935791e+02 -3.22965309e-01  3.37967244e+02]
New Q values:  [ 139.20158703 -278.93579121   -0.32296531  238.54696668]
Reward: -1  Episode Reward:  47
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   346.53356324   294.82783389]
------
Step:14, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1720.38132676   -98.79429602]
New Q values:  [-9594.56523706 -8069.05606225  1254.18544909   -98.79429602]
Reward: -1  Episode Reward:  46
xxxxx
x gax
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  1888.77639464   660.86649319 -5740.77528236]
------
Step:15, Action:South
State  136
Old Q Values:  [-2129.37064562  1888.77639464   660.86649319 -5740.77528236]
New Q values:  [-2129.37064562  2700.96970636   660.86649319 -5740.77528236]
Reward: -1  Episode Reward:  45
xxxxx
x  gx
x  ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1067.71312533  6486.86382835 -8220.10378799   911.8961227 ]
------
Step:16, Action:South
State  216
Old Q Values:  [ 1067.71312533  6486.86382835 -8220.10378799   911.8961227 ]
New Q values:  [ 1067.71312533  5478.85442221 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x  gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9615.6963029  -6396.61506955 -5588.09647059  -239.29187921]
------
Step:17, Action:West
State  288
Old Q Values:  [ 9615.6963029  -6396.61506955 -5588.09647059  -239.29187921]
New Q values:  [ 9615.6963029  -6396.61506955 -5588.09647059 70984.80106097]
Reward: 100009  Episode Reward:  100053
xxxxx
x   x
x   x
x agx
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2360.33758703   301.98873832 -2772.22255708   262.76946019]
------
Step:1, Action:North
State  181
Old Q Values:  [ 2360.33758703   301.98873832 -2772.22255708   262.76946019]
New Q values:  [ 1036.46536412   301.98873832 -2772.22255708   262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
xag.x
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   289.76776434 -2165.66138672  -180.6       ]
------
Step:2, Action:South
State  108
Old Q Values:  [-8463.16477134  1968.68675445  1513.28721073     0.        ]
New Q values:  [-8463.16477134  1937.4644424   1513.28721073     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5631.58166229  3835.29913541  3387.45329652 -4966.32149798]
------
Step:3, Action:South
State  180
Old Q Values:  [-5631.58166229  3835.29913541  3387.45329652 -4966.32149798]
New Q values:  [-5631.58166229  3817.59715594  3387.45329652 -4966.32149798]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
xg .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3396.37672025 -5704.51612281  7593.5916726  -5679.36893145]
------
Step:4, Action:East
State  261
Old Q Values:  [  689.57144784    26.73544252 -3310.67195649   240.60751196]
New Q values:  [ 689.57144784   26.73544252 6091.8233784   240.60751196]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197 24702.30720332   790.30511964]
------
Step:5, Action:East
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197 24702.30720332   790.30511964]
New Q values:  [ 4757.67028198 -5807.06396197 31181.76319962   790.30511964]
Reward: 9  Episode Reward:  35
xxxxx
x g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9615.6963029  -6396.61506955 -5588.09647059 70984.80106097]
------
Step:6, Action:West
State  288
Old Q Values:  [ 9615.6963029  -6396.61506955 -5588.09647059 70984.80106097]
New Q values:  [ 9615.6963029  -6396.61506955 -5588.09647059 37747.84938428]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197 31181.76319962   790.30511964]
------
Step:7, Action:East
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197 31181.76319962   790.30511964]
New Q values:  [ 4757.67028198 -5807.06396197 23796.46009513   790.30511964]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9615.6963029  -6396.61506955 -5588.09647059 37747.84938428]
------
Step:8, Action:North
State  288
Old Q Values:  [ 9615.6963029  -6396.61506955 -5588.09647059 37747.84938428]
New Q values:  [15472.42909016 -6396.61506955 -5588.09647059 37747.84938428]
Reward: 9  Episode Reward:  42
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[38735.83523     5008.8272407    790.72804752  1050.85266124]
------
Step:9, Action:North
State  208
Old Q Values:  [ 2544.62526808  4678.12018745 -4584.50430574 -1713.91177491]
New Q values:  [30456.17438996  4678.12018745 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  51
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344 27549.5211885   -180.00807518 98109.74760908]
------
Step:10, Action:West
State  130
Old Q Values:  [33256.45600344 27549.5211885   -180.00807518 98109.74760908]
New Q values:  [ 33256.45600344  27549.5211885    -180.00807518 136972.88756551]
Reward: 100009  Episode Reward:  100060
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -6410.9503443    405.81070979  -180.6       ]
------
Step:1, Action:East
State  110
Old Q Values:  [ -239.29051573 -6410.9503443    405.81070979  -180.6       ]
New Q values:  [ -239.29051573 -6410.9503443    271.68435289  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   346.53356324   294.82783389]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   346.53356324   294.82783389]
New Q values:  [ -281.736      -1150.91067548   215.5775153    294.82783389]
Reward: 9  Episode Reward:  18
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 139.20158703 -278.93579121   -0.32296531  238.54696668]
------
Step:3, Action:West
State  136
Old Q Values:  [-2129.37064562  2700.96970636   660.86649319 -5740.77528236]
New Q values:  [-2129.37064562  2700.96970636   660.86649319 -1920.65447821]
Reward: -1  Episode Reward:  17
xxxxx
xga x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1254.18544909   -98.79429602]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   215.5775153    294.82783389]
New Q values:  [ -281.736      -1150.91067548   157.19509612   294.82783389]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 139.20158703 -278.93579121   -0.32296531  238.54696668]
------
Step:5, Action:West
State  138
Old Q Values:  [ 139.20158703 -278.93579121   -0.32296531  238.54696668]
New Q values:  [ 139.20158703 -278.93579121   -0.32296531  183.26713684]
Reward: -1  Episode Reward:  15
xxxxx
x a x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   157.19509612   294.82783389]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   157.19509612   294.82783389]
New Q values:  [ -281.736      -1150.91067548   157.19509612   198.83643942]
Reward: -1  Episode Reward:  14
xxxxx
xa  x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -6410.9503443    271.68435289  -180.6       ]
------
Step:7, Action:East
State  108
Old Q Values:  [-8463.16477134  1937.4644424   1513.28721073     0.        ]
New Q values:  [-8463.16477134  1937.4644424    980.97051902     0.        ]
Reward: -1  Episode Reward:  13
xxxxx
xga x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1254.18544909   -98.79429602]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   157.19509612   198.83643942]
New Q values:  [ -281.736      -1150.91067548   117.2581795    198.83643942]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 139.20158703 -278.93579121   -0.32296531  183.26713684]
------
Step:9, Action:West
State  138
Old Q Values:  [ 139.20158703 -278.93579121   -0.32296531  183.26713684]
New Q values:  [ 139.20158703 -278.93579121   -0.32296531  132.35778656]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   117.2581795    198.83643942]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   117.2581795    198.83643942]
New Q values:  [ -281.736      -1150.91067548   117.2581795    160.43988164]
Reward: -1  Episode Reward:  10
xxxxx
xa  x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -6410.9503443    271.68435289  -180.6       ]
------
Step:11, Action:East
State  110
Old Q Values:  [ -239.29051573 -6410.9503443    271.68435289  -180.6       ]
New Q values:  [ -239.29051573 -6410.9503443    156.20570565  -180.6       ]
Reward: -1  Episode Reward:  9
xxxxx
x a x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   117.2581795    160.43988164]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   117.2581795    160.43988164]
New Q values:  [ -281.736      -1150.91067548   117.2581795    110.43766435]
Reward: -1  Episode Reward:  8
xxxxx
xa  x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -6410.9503443    156.20570565  -180.6       ]
------
Step:13, Action:East
State  110
Old Q Values:  [ -239.29051573 -6410.9503443    156.20570565  -180.6       ]
New Q values:  [ -239.29051573 -6410.9503443     97.05973611  -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x a x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   117.2581795    110.43766435]
------
Step:14, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   117.2581795    110.43766435]
New Q values:  [ -281.736      -1150.91067548    88.06374791   110.43766435]
Reward: -1  Episode Reward:  6
xxxxx
x  ax
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 139.20158703 -278.93579121   -0.32296531  132.35778656]
------
Step:15, Action:North
State  138
Old Q Values:  [ 139.20158703 -278.93579121   -0.32296531  132.35778656]
New Q values:  [ -83.15888908 -278.93579121   -0.32296531  132.35778656]
Reward: -301  Episode Reward:  -295
xxxxx
x  ax
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -83.15888908 -278.93579121   -0.32296531  132.35778656]
------
Step:16, Action:West
State  138
Old Q Values:  [ -83.15888908 -278.93579121   -0.32296531  132.35778656]
New Q values:  [ -83.15888908 -278.93579121   -0.32296531  316.83562789]
Reward: -1  Episode Reward:  -296
xxxxx
x a x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   611.86548126   881.64171089]
------
Step:17, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548    88.06374791   110.43766435]
New Q values:  [ -281.736      -1150.91067548    88.06374791  2221.53872089]
Reward: -1  Episode Reward:  -297
xxxxx
xa  x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 7259.87885049  403.77554135 -120.29354603]
------
Step:18, Action:South
State  110
Old Q Values:  [ -239.29051573 -6410.9503443     97.05973611  -180.6       ]
New Q values:  [ -239.29051573 -7419.70099094    97.05973611  -180.6       ]
Reward: -10001  Episode Reward:  -10298
xxxxx
x   x
xg..x
x...x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15472.42909016 -6396.61506955 -5588.09647059 37747.84938428]
------
Step:1, Action:West
State  288
Old Q Values:  [15472.42909016 -6396.61506955 -5588.09647059 37747.84938428]
New Q values:  [15472.42909016 -6396.61506955 -5588.09647059 26179.65756637]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  -168.92307549  6506.16723947 36917.05937552]
------
Step:2, Action:West
State  273
Old Q Values:  [ 1232.03390306  -168.92307549  6506.16723947 36917.05937552]
New Q values:  [ 1232.03390306  -168.92307549  6506.16723947 16599.77076373]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 689.57144784   26.73544252 6091.8233784   240.60751196]
------
Step:3, Action:East
State  257
Old Q Values:  [27321.95510299 12764.58618105  2805.88401822  1875.31501677]
New Q values:  [27321.95510299 12764.58618105  3470.91448035  1875.31501677]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  7830.53624354  2476.08815422]
------
Step:4, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  7830.53624354  2476.08815422]
New Q values:  [ 3514.02111757 -8521.23367799 10985.51176733  2476.08815422]
Reward: -1  Episode Reward:  16
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15472.42909016 -6396.61506955 -5588.09647059 26179.65756637]
------
Step:5, Action:West
State  288
Old Q Values:  [15472.42909016 -6396.61506955 -5588.09647059 26179.65756637]
New Q values:  [15472.42909016 -6396.61506955 -5588.09647059 13766.91655674]
Reward: -1  Episode Reward:  15
xxxxx
xg..x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 10985.51176733  2476.08815422]
------
Step:6, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 10985.51176733  2476.08815422]
New Q values:  [ 3514.02111757 -8521.23367799  9035.33343398  2476.08815422]
Reward: -1  Episode Reward:  14
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15472.42909016 -6396.61506955 -5588.09647059 13766.91655674]
------
Step:7, Action:North
State  288
Old Q Values:  [15472.42909016 -6396.61506955 -5588.09647059 13766.91655674]
New Q values:  [15325.22395305 -6396.61506955 -5588.09647059 13766.91655674]
Reward: -1  Episode Reward:  13
xxxxx
xg..x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30456.17438996  4678.12018745 -4584.50430574 -1713.91177491]
------
Step:8, Action:North
State  208
Old Q Values:  [30456.17438996  4678.12018745 -4584.50430574 -1713.91177491]
New Q values:  [53279.73602563  4678.12018745 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  22
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 33256.45600344  27549.5211885    -180.00807518 136972.88756551]
------
Step:9, Action:West
State  130
Old Q Values:  [ 33256.45600344  27549.5211885    -180.00807518 136972.88756551]
New Q values:  [33256.45600344 27549.5211885   -180.00807518 92518.14354808]
Reward: 9  Episode Reward:  31
xxxxx
x.a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 125745.29507292]
------
Step:10, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1254.18544909   -98.79429602]
New Q values:  [-9594.56523706 -8069.05606225  1254.18544909    65.73076831]
Reward: 9  Episode Reward:  40
xxxxx
xag x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         332.82828905 -764.93196255    0.        ]
------
Step:11, Action:South
State  109
Old Q Values:  [ -241.10880094   289.76776434 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   432.24671497 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  49
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1036.46536412   301.98873832 -2772.22255708   262.76946019]
------
Step:12, Action:North
State  181
Old Q Values:  [ 1036.46536412   301.98873832 -2772.22255708   262.76946019]
New Q values:  [ 2591.94980079   301.98873832 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 7259.87885049  403.77554135 -120.29354603]
------
Step:13, Action:South
State  109
Old Q Values:  [ -241.10880094   432.24671497 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   949.88362623 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2591.94980079   301.98873832 -2772.22255708   262.76946019]
------
Step:14, Action:North
State  181
Old Q Values:  [ 2591.94980079   301.98873832 -2772.22255708   262.76946019]
New Q values:  [ 1321.14500819   301.98873832 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  46
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   949.88362623 -2165.66138672  -180.6       ]
------
Step:15, Action:South
State  108
Old Q Values:  [-8463.16477134  1937.4644424    980.97051902     0.        ]
New Q values:  [-8463.16477134  1919.66492374   980.97051902     0.        ]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5631.58166229  3817.59715594  3387.45329652 -4966.32149798]
------
Step:16, Action:South
State  180
Old Q Values:  [-5631.58166229  3817.59715594  3387.45329652 -4966.32149798]
New Q values:  [-5631.58166229  3804.51636416  3387.45329652 -4966.32149798]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3396.37672025 -5704.51612281  7593.5916726  -5679.36893145]
------
Step:17, Action:East
State  260
Old Q Values:  [-3396.37672025 -5704.51612281  7593.5916726  -5679.36893145]
New Q values:  [-3396.37672025 -5704.51612281  5747.43669923 -5679.36893145]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  9035.33343398  2476.08815422]
------
Step:18, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  9035.33343398  2476.08815422]
New Q values:  [ 3514.02111757 -8521.23367799  8211.10055951  2476.08815422]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15325.22395305 -6396.61506955 -5588.09647059 13766.91655674]
------
Step:19, Action:North
State  288
Old Q Values:  [15325.22395305 -6396.61506955 -5588.09647059 13766.91655674]
New Q values:  [22113.41038891 -6396.61506955 -5588.09647059 13766.91655674]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[53279.73602563  4678.12018745 -4584.50430574 -1713.91177491]
------
Step:20, Action:North
State  208
Old Q Values:  [53279.73602563  4678.12018745 -4584.50430574 -1713.91177491]
New Q values:  [22121.58532216  4678.12018745 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  40
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  2700.96970636   660.86649319 -1920.65447821]
------
Step:21, Action:South
State  136
Old Q Values:  [-2129.37064562  2700.96970636   660.86649319 -1920.65447821]
New Q values:  [-2129.37064562  7716.26347919   660.86649319 -1920.65447821]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22121.58532216  4678.12018745 -4584.50430574 -1713.91177491]
------
Step:22, Action:South
State  208
Old Q Values:  [22121.58532216  4678.12018745 -4584.50430574 -1713.91177491]
New Q values:  [22121.58532216  8504.67119165 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  38
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[22113.41038891 -6396.61506955 -5588.09647059 13766.91655674]
------
Step:23, Action:North
State  288
Old Q Values:  [22113.41038891 -6396.61506955 -5588.09647059 13766.91655674]
New Q values:  [15481.23975221 -6396.61506955 -5588.09647059 13766.91655674]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22121.58532216  8504.67119165 -4584.50430574 -1713.91177491]
------
Step:24, Action:South
State  208
Old Q Values:  [22121.58532216  8504.67119165 -4584.50430574 -1713.91177491]
New Q values:  [22121.58532216  8045.64040232 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  36
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15481.23975221 -6396.61506955 -5588.09647059 13766.91655674]
------
Step:25, Action:North
State  288
Old Q Values:  [15481.23975221 -6396.61506955 -5588.09647059 13766.91655674]
New Q values:  [12828.37149753 -6396.61506955 -5588.09647059 13766.91655674]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22121.58532216  8045.64040232 -4584.50430574 -1713.91177491]
------
Step:26, Action:South
State  208
Old Q Values:  [22121.58532216  8045.64040232 -4584.50430574 -1713.91177491]
New Q values:  [22121.58532216  7347.73112795 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12828.37149753 -6396.61506955 -5588.09647059 13766.91655674]
------
Step:27, Action:West
State  288
Old Q Values:  [12828.37149753 -6396.61506955 -5588.09647059 13766.91655674]
New Q values:  [12828.37149753 -6396.61506955 -5588.09647059 10486.09785182]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  -168.92307549  6506.16723947 16599.77076373]
------
Step:28, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  8211.10055951  2476.08815422]
New Q values:  [ 3514.02111757 -8521.23367799  8211.10055951  2817.38227521]
Reward: -1  Episode Reward:  32
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 689.57144784   26.73544252 6091.8233784   240.60751196]
------
Step:29, Action:East
State  257
Old Q Values:  [27321.95510299 12764.58618105  3470.91448035  1875.31501677]
New Q values:  [27321.95510299 12764.58618105  3851.09595999  1875.31501677]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  8211.10055951  2817.38227521]
------
Step:30, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  8211.10055951  2817.38227521]
New Q values:  [ 3514.02111757 -8521.23367799  7132.35167306  2817.38227521]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12828.37149753 -6396.61506955 -5588.09647059 10486.09785182]
------
Step:31, Action:North
State  288
Old Q Values:  [12828.37149753 -6396.61506955 -5588.09647059 10486.09785182]
New Q values:  [16751.49916801 -6396.61506955 -5588.09647059 10486.09785182]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[38735.83523     5008.8272407    790.72804752  1050.85266124]
------
Step:32, Action:North
State  208
Old Q Values:  [22121.58532216  7347.73112795 -4584.50430574 -1713.91177491]
New Q values:  [ 8943.08481723  7347.73112795 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -83.15888908 -278.93579121   -0.32296531  316.83562789]
------
Step:33, Action:West
State  130
Old Q Values:  [33256.45600344 27549.5211885   -180.00807518 92518.14354808]
New Q values:  [33256.45600344 27549.5211885   -180.00807518 74730.24594111]
Reward: -1  Episode Reward:  27
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 125745.29507292]
------
Step:34, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548    88.06374791  2221.53872089]
New Q values:  [ -281.736      -1150.91067548    88.06374791  3065.9791435 ]
Reward: -1  Episode Reward:  26
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 7259.87885049  403.77554135 -120.29354603]
------
Step:35, Action:South
State  111
Old Q Values:  [-177.44732869 7259.87885049  403.77554135 -120.29354603]
New Q values:  [-177.44732869 6326.95070318  403.77554135 -120.29354603]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  755.81926962  1272.6087003  11411.99720993  1554.80203889]
------
Step:36, Action:East
State  179
Old Q Values:  [82228.67666629 16101.90751562 47160.04204891     0.        ]
New Q values:  [82228.67666629 16101.90751562 90855.45697393     0.        ]
Reward: 100009  Episode Reward:  100034
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16751.49916801 -6396.61506955 -5588.09647059 10486.09785182]
------
Step:1, Action:North
State  288
Old Q Values:  [16751.49916801 -6396.61506955 -5588.09647059 10486.09785182]
New Q values:  [ 9388.92511238 -6396.61506955 -5588.09647059 10486.09785182]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8943.08481723  7347.73112795 -4584.50430574 -1713.91177491]
------
Step:2, Action:South
State  208
Old Q Values:  [ 8943.08481723  7347.73112795 -4584.50430574 -1713.91177491]
New Q values:  [ 8943.08481723  6084.32180673 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9388.92511238 -6396.61506955 -5588.09647059 10486.09785182]
------
Step:3, Action:West
State  288
Old Q Values:  [ 9388.92511238 -6396.61506955 -5588.09647059 10486.09785182]
New Q values:  [ 9388.92511238 -6396.61506955 -5588.09647059  6339.54464265]
Reward: 9  Episode Reward:  17
xxxxx
xg .x
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  7132.35167306  2817.38227521]
------
Step:4, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  7132.35167306  2817.38227521]
New Q values:  [ 3514.02111757 -8521.23367799  5669.01820294  2817.38227521]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9388.92511238 -6396.61506955 -5588.09647059  6339.54464265]
------
Step:5, Action:North
State  288
Old Q Values:  [ 9388.92511238 -6396.61506955 -5588.09647059  6339.54464265]
New Q values:  [ 6437.89549012 -6396.61506955 -5588.09647059  6339.54464265]
Reward: -1  Episode Reward:  15
xxxxx
x. .x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8943.08481723  6084.32180673 -4584.50430574 -1713.91177491]
------
Step:6, Action:North
State  208
Old Q Values:  [ 8943.08481723  6084.32180673 -4584.50430574 -1713.91177491]
New Q values:  [ 5897.51297065  6084.32180673 -4584.50430574 -1713.91177491]
Reward: 9  Episode Reward:  24
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  7716.26347919   660.86649319 -1920.65447821]
------
Step:7, Action:South
State  136
Old Q Values:  [-2129.37064562  7716.26347919   660.86649319 -1920.65447821]
New Q values:  [-2129.37064562  4911.20193369   660.86649319 -1920.65447821]
Reward: -1  Episode Reward:  23
xxxxx
x. gx
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5897.51297065  6084.32180673 -4584.50430574 -1713.91177491]
------
Step:8, Action:South
State  208
Old Q Values:  [ 5897.51297065  6084.32180673 -4584.50430574 -1713.91177491]
New Q values:  [ 5897.51297065  4364.49736973 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6437.89549012 -6396.61506955 -5588.09647059  6339.54464265]
------
Step:9, Action:West
State  288
Old Q Values:  [ 6437.89549012 -6396.61506955 -5588.09647059  6339.54464265]
New Q values:  [ 6437.89549012 -6396.61506955 -5588.09647059  7515.14908618]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  -168.92307549  6506.16723947 16599.77076373]
------
Step:10, Action:West
State  273
Old Q Values:  [ 1232.03390306  -168.92307549  6506.16723947 16599.77076373]
New Q values:  [ 1232.03390306  -168.92307549  6506.16723947 14841.89483639]
Reward: 9  Episode Reward:  30
xxxxx
x.  x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[27321.95510299 12764.58618105  3851.09595999  1875.31501677]
------
Step:11, Action:North
State  257
Old Q Values:  [27321.95510299 12764.58618105  3851.09595999  1875.31501677]
New Q values:  [31455.85234806 12764.58618105  3851.09595999  1875.31501677]
Reward: 9  Episode Reward:  39
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 29124.7048717  68405.56768954     0.        ]
------
Step:12, Action:North
State  181
Old Q Values:  [ 1321.14500819   301.98873832 -2772.22255708   262.76946019]
New Q values:  [  818.82309114   301.98873832 -2772.22255708   262.76946019]
Reward: 9  Episode Reward:  48
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   949.88362623 -2165.66138672  -180.6       ]
------
Step:13, Action:South
State  99
Old Q Values:  [    0.         25781.38066566 51580.7370385      0.        ]
New Q values:  [    0.         30833.62257313 51580.7370385      0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 29124.7048717  68405.56768954     0.        ]
------
Step:14, Action:North
State  181
Old Q Values:  [  818.82309114   301.98873832 -2772.22255708   262.76946019]
New Q values:  [  611.89432433   301.98873832 -2772.22255708   262.76946019]
Reward: -1  Episode Reward:  46
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   949.88362623 -2165.66138672  -180.6       ]
------
Step:15, Action:South
State  108
Old Q Values:  [-8463.16477134  1919.66492374   980.97051902     0.        ]
New Q values:  [-8463.16477134  1908.62087874   980.97051902     0.        ]
Reward: -1  Episode Reward:  45
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5631.58166229  3804.51636416  3387.45329652 -4966.32149798]
------
Step:16, Action:South
State  180
Old Q Values:  [-5631.58166229  3804.51636416  3387.45329652 -4966.32149798]
New Q values:  [-5631.58166229  3245.43755543  3387.45329652 -4966.32149798]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3396.37672025 -5704.51612281  5747.43669923 -5679.36893145]
------
Step:17, Action:East
State  260
Old Q Values:  [-3396.37672025 -5704.51612281  5747.43669923 -5679.36893145]
New Q values:  [-3396.37672025 -5704.51612281  3999.08014058 -5679.36893145]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  5669.01820294  2817.38227521]
------
Step:18, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  5669.01820294  2817.38227521]
New Q values:  [ 3514.02111757 -8521.23367799  4521.55200703  2817.38227521]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6437.89549012 -6396.61506955 -5588.09647059  7515.14908618]
------
Step:19, Action:West
State  288
Old Q Values:  [ 6437.89549012 -6396.61506955 -5588.09647059  7515.14908618]
New Q values:  [ 6437.89549012 -6396.61506955 -5588.09647059  4361.92523658]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  4521.55200703  2817.38227521]
------
Step:20, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  4521.55200703  2817.38227521]
New Q values:  [ 3514.02111757 -8521.23367799  3739.38944985  2817.38227521]
Reward: -1  Episode Reward:  40
xxxxx
x g x
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6437.89549012 -6396.61506955 -5588.09647059  4361.92523658]
------
Step:21, Action:North
State  288
Old Q Values:  [ 6437.89549012 -6396.61506955 -5588.09647059  4361.92523658]
New Q values:  [ 4343.81208724 -6396.61506955 -5588.09647059  4361.92523658]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5897.51297065  4364.49736973 -4584.50430574 -1713.91177491]
------
Step:22, Action:South
State  208
Old Q Values:  [ 5897.51297065  4364.49736973 -4584.50430574 -1713.91177491]
New Q values:  [ 5897.51297065  3053.77651886 -4584.50430574 -1713.91177491]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4343.81208724 -6396.61506955 -5588.09647059  4361.92523658]
------
Step:23, Action:West
State  288
Old Q Values:  [ 4343.81208724 -6396.61506955 -5588.09647059  4361.92523658]
New Q values:  [ 4343.81208724 -6396.61506955 -5588.09647059  6196.73854555]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x . x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  -168.92307549  6506.16723947 14841.89483639]
------
Step:24, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  3739.38944985  2817.38227521]
New Q values:  [ 3514.02111757 -8521.23367799  3739.38944985  2953.8999236 ]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 689.57144784   26.73544252 6091.8233784   240.60751196]
------
Step:25, Action:North
State  260
Old Q Values:  [-3396.37672025 -5704.51612281  3999.08014058 -5679.36893145]
New Q values:  [ 3652.85157879 -5704.51612281  3999.08014058 -5679.36893145]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243 16706.67422296     0.        ]
------
Step:26, Action:East
State  176
Old Q Values:  [103770.24881615   1621.55095326  90786.49387936      0.        ]
New Q values:  [103770.24881615   1621.55095326 102679.38705514      0.        ]
Reward: 100009  Episode Reward:  100044
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5897.51297065  3053.77651886 -4584.50430574 -1713.91177491]
------
Step:1, Action:North
State  216
Old Q Values:  [ 1067.71312533  5478.85442221 -8220.10378799   911.8961227 ]
New Q values:  [ 1905.84583024  5478.85442221 -8220.10378799   911.8961227 ]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  4911.20193369   660.86649319 -1920.65447821]
------
Step:2, Action:South
State  136
Old Q Values:  [-2129.37064562  4911.20193369   660.86649319 -1920.65447821]
New Q values:  [-2129.37064562  3733.13466467   660.86649319 -1920.65447821]
Reward: -1  Episode Reward:  8
xxxxx
xg. x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5897.51297065  3053.77651886 -4584.50430574 -1713.91177491]
------
Step:3, Action:North
State  216
Old Q Values:  [ 1905.84583024  5478.85442221 -8220.10378799   911.8961227 ]
New Q values:  [  856.78902046  5478.85442221 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  7
xxxxx
x .ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -83.15888908 -278.93579121   -0.32296531  316.83562789]
------
Step:4, Action:West
State  136
Old Q Values:  [-2129.37064562  3733.13466467   660.86649319 -1920.65447821]
New Q values:  [-2129.37064562  3733.13466467   660.86649319  -386.60615656]
Reward: 9  Episode Reward:  16
xxxxx
xga x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1254.18544909    65.73076831]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548    88.06374791  3065.9791435 ]
New Q values:  [ -281.736      -1150.91067548   129.67618753  3065.9791435 ]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ -83.15888908 -278.93579121   -0.32296531  316.83562789]
------
Step:6, Action:West
State  138
Old Q Values:  [ -83.15888908 -278.93579121   -0.32296531  316.83562789]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  1.04592799e+03]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   129.67618753  3065.9791435 ]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753  3065.9791435 ]
New Q values:  [ -281.736      -1150.91067548   129.67618753  1542.68515015]
Reward: -1  Episode Reward:  13
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1056.31164251  790.4386324  -252.78192178]
------
Step:8, Action:South
State  111
Old Q Values:  [-177.44732869 6326.95070318  403.77554135 -120.29354603]
New Q values:  [-177.44732869 3312.32418632  403.77554135 -120.29354603]
Reward: 9  Episode Reward:  22
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627  922.1060418  2587.14635017  154.04646645]
------
Step:9, Action:South
State  183
Old Q Values:  [  755.81926962  1272.6087003  11411.99720993  1554.80203889]
New Q values:  [  755.81926962  2341.99049364 11411.99720993  1554.80203889]
Reward: 9  Episode Reward:  31
xxxxx
x   x
x . x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 689.57144784   26.73544252 6091.8233784   240.60751196]
------
Step:10, Action:North
State  261
Old Q Values:  [ 689.57144784   26.73544252 6091.8233784   240.60751196]
New Q values:  [3698.82774212   26.73544252 6091.8233784   240.60751196]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xa. x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  755.81926962  2341.99049364 11411.99720993  1554.80203889]
------
Step:11, Action:East
State  191
Old Q Values:  [   3.06655861  941.79461398 1211.41037433    0.        ]
New Q values:  [   3.06655861  941.79461398 1241.2088357     0.        ]
Reward: 9  Episode Reward:  39
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[   0.         2504.14895323 1418.80979599    0.        ]
------
Step:12, Action:East
State  200
Old Q Values:  [ 169.9257398  4092.774894   1206.6363327   568.38654082]
New Q values:  [ 169.9257398  4092.774894   2125.71085974  568.38654082]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  856.78902046  5478.85442221 -8220.10378799   911.8961227 ]
------
Step:13, Action:South
State  210
Old Q Values:  [38735.83523     5008.8272407    790.72804752  1050.85266124]
New Q values:  [38735.83523     3867.95245994   790.72804752  1050.85266124]
Reward: 9  Episode Reward:  47
xxxxx
x   x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4343.81208724 -6396.61506955 -5588.09647059  6196.73854555]
------
Step:14, Action:North
State  288
Old Q Values:  [ 4343.81208724 -6396.61506955 -5588.09647059  6196.73854555]
New Q values:  [ 2737.45336505 -6396.61506955 -5588.09647059  6196.73854555]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 409.10562358 3335.09510052    0.         1540.00681929]
------
Step:15, Action:South
State  210
Old Q Values:  [38735.83523     3867.95245994   790.72804752  1050.85266124]
New Q values:  [38735.83523     3405.60254764   790.72804752  1050.85266124]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2737.45336505 -6396.61506955 -5588.09647059  6196.73854555]
------
Step:16, Action:North
State  288
Old Q Values:  [ 2737.45336505 -6396.61506955 -5588.09647059  6196.73854555]
New Q values:  [ 2094.90987618 -6396.61506955 -5588.09647059  6196.73854555]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 409.10562358 3335.09510052    0.         1540.00681929]
------
Step:17, Action:West
State  210
Old Q Values:  [38735.83523     3405.60254764   790.72804752  1050.85266124]
New Q values:  [38735.83523     3405.60254764   790.72804752 12405.78121886]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  3.99534672e+04  2.17939995e+03]
------
Step:18, Action:East
State  202
Old Q Values:  [    0.         -8753.98842238  2672.40916596  1278.19575341]
New Q values:  [    0.         -8753.98842238  2068.89219654  1278.19575341]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 409.10562358 3335.09510052    0.         1540.00681929]
------
Step:19, Action:South
State  210
Old Q Values:  [38735.83523     3405.60254764   790.72804752 12405.78121886]
New Q values:  [38735.83523     3220.66258272   790.72804752 12405.78121886]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2094.90987618 -6396.61506955 -5588.09647059  6196.73854555]
------
Step:20, Action:North
State  288
Old Q Values:  [ 2094.90987618 -6396.61506955 -5588.09647059  6196.73854555]
New Q values:  [ 2481.02027713 -6396.61506955 -5588.09647059  6196.73854555]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  856.78902046  5478.85442221 -8220.10378799   911.8961227 ]
------
Step:21, Action:South
State  216
Old Q Values:  [  856.78902046  5478.85442221 -8220.10378799   911.8961227 ]
New Q values:  [  856.78902046  4049.96333255 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2481.02027713 -6396.61506955 -5588.09647059  6196.73854555]
------
Step:22, Action:West
State  288
Old Q Values:  [ 2481.02027713 -6396.61506955 -5588.09647059  6196.73854555]
New Q values:  [ 2481.02027713 -6396.61506955 -5588.09647059 63605.91225317]
Reward: 100009  Episode Reward:  100048
xxxxx
xg  x
x   x
x a x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  3739.38944985  2953.8999236 ]
------
Step:1, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  3739.38944985  2953.8999236 ]
New Q values:  [ 3514.02111757 -8521.23367799 20582.92945589  2953.8999236 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2481.02027713 -6396.61506955 -5588.09647059 63605.91225317]
------
Step:2, Action:West
State  288
Old Q Values:  [ 2481.02027713 -6396.61506955 -5588.09647059 63605.91225317]
New Q values:  [ 2481.02027713 -6396.61506955 -5588.09647059 31616.64373804]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x ..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 20582.92945589  2953.8999236 ]
------
Step:3, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 20582.92945589  2953.8999236 ]
New Q values:  [ 3514.02111757 -8521.23367799 17717.56490377  2953.8999236 ]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2481.02027713 -6396.61506955 -5588.09647059 31616.64373804]
------
Step:4, Action:West
State  288
Old Q Values:  [ 2481.02027713 -6396.61506955 -5588.09647059 31616.64373804]
New Q values:  [ 2481.02027713 -6396.61506955 -5588.09647059 17961.32696634]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xg..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 17717.56490377  2953.8999236 ]
------
Step:5, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 17717.56490377  2953.8999236 ]
New Q values:  [ 3514.02111757 -8521.23367799 12474.82405141  2953.8999236 ]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2481.02027713 -6396.61506955 -5588.09647059 17961.32696634]
------
Step:6, Action:West
State  288
Old Q Values:  [ 2481.02027713 -6396.61506955 -5588.09647059 17961.32696634]
New Q values:  [ 2481.02027713 -6396.61506955 -5588.09647059 11636.49923745]
Reward: -1  Episode Reward:  4
xxxxx
x...x
x .gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  -168.92307549  6506.16723947 14841.89483639]
------
Step:7, Action:West
State  273
Old Q Values:  [ 1232.03390306  -168.92307549  6506.16723947 14841.89483639]
New Q values:  [1232.03390306 -168.92307549 6506.16723947 7769.70494808]
Reward: 9  Episode Reward:  13
xxxxx
x...x
x ..x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3698.82774212   26.73544252 6091.8233784   240.60751196]
------
Step:8, Action:East
State  261
Old Q Values:  [3698.82774212   26.73544252 6091.8233784   240.60751196]
New Q values:  [3698.82774212   26.73544252 4767.04083578  240.60751196]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1232.03390306 -168.92307549 6506.16723947 7769.70494808]
------
Step:9, Action:West
State  273
Old Q Values:  [1232.03390306 -168.92307549 6506.16723947 7769.70494808]
New Q values:  [1232.03390306 -168.92307549 6506.16723947 4537.39422997]
Reward: -1  Episode Reward:  11
xxxxx
x..gx
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3698.82774212   26.73544252 4767.04083578  240.60751196]
------
Step:10, Action:East
State  261
Old Q Values:  [3698.82774212   26.73544252 4767.04083578  240.60751196]
New Q values:  [3698.82774212   26.73544252 5648.66354974  240.60751196]
Reward: -1  Episode Reward:  10
xxxxx
x.g.x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 12474.82405141  2953.8999236 ]
------
Step:11, Action:East
State  273
Old Q Values:  [1232.03390306 -168.92307549 6506.16723947 4537.39422997]
New Q values:  [1232.03390306 -168.92307549 6092.81666703 4537.39422997]
Reward: -1  Episode Reward:  9
xxxxx
x..gx
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2481.02027713 -6396.61506955 -5588.09647059 11636.49923745]
------
Step:12, Action:West
State  288
Old Q Values:  [ 2481.02027713 -6396.61506955 -5588.09647059 11636.49923745]
New Q values:  [ 2481.02027713 -6396.61506955 -5588.09647059  6481.84469509]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x .gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1232.03390306 -168.92307549 6092.81666703 4537.39422997]
------
Step:13, Action:East
State  273
Old Q Values:  [1232.03390306 -168.92307549 6092.81666703 4537.39422997]
New Q values:  [ 1232.03390306  -168.92307549 -1618.91992466  4537.39422997]
Reward: -10001  Episode Reward:  -9993
xxxxx
x...x
x ..x
x  gx
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3312.32418632  403.77554135 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 4973.13093058  238.35800069    0.        ]
New Q values:  [ 221.30610858 5418.25153521  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  755.81926962  2341.99049364 11411.99720993  1554.80203889]
------
Step:2, Action:East
State  181
Old Q Values:  [  611.89432433   301.98873832 -2772.22255708   262.76946019]
New Q values:  [ 611.89432433  301.98873832 -665.19606884  262.76946019]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1141.2672067   -222.69561341  1460.9765133 ]
------
Step:3, Action:West
State  193
Old Q Values:  [-5922.26708831  1141.2672067   -222.69561341  1460.9765133 ]
New Q values:  [-5922.26708831  1141.2672067   -222.69561341   767.35890262]
Reward: -1  Episode Reward:  17
xxxxx
x .gx
xa .x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 611.89432433  301.98873832 -665.19606884  262.76946019]
------
Step:4, Action:North
State  181
Old Q Values:  [ 611.89432433  301.98873832 -665.19606884  262.76946019]
New Q values:  [ 529.1228176   301.98873832 -665.19606884  262.76946019]
Reward: -1  Episode Reward:  16
xxxxx
xag.x
x  .x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   949.88362623 -2165.66138672  -180.6       ]
------
Step:5, Action:South
State  103
Old Q Values:  [ 221.30610858 5418.25153521  238.35800069    0.        ]
New Q values:  [ 221.30610858 2325.43745936  238.35800069    0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xag.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 529.1228176   301.98873832 -665.19606884  262.76946019]
------
Step:6, Action:North
State  183
Old Q Values:  [  755.81926962  2341.99049364 11411.99720993  1554.80203889]
New Q values:  [  999.35894566  2341.99049364 11411.99720993  1554.80203889]
Reward: -1  Episode Reward:  14
xxxxx
xa..x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2325.43745936  238.35800069    0.        ]
------
Step:7, Action:South
State  103
Old Q Values:  [ 221.30610858 2325.43745936  238.35800069    0.        ]
New Q values:  [ 221.30610858 4353.17414673  238.35800069    0.        ]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
xa .x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  999.35894566  2341.99049364 11411.99720993  1554.80203889]
------
Step:8, Action:East
State  181
Old Q Values:  [ 529.1228176   301.98873832 -665.19606884  262.76946019]
New Q values:  [529.1228176  301.98873832  75.70173447 262.76946019]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1141.2672067   -222.69561341   767.35890262]
------
Step:9, Action:South
State  193
Old Q Values:  [-5922.26708831  1141.2672067   -222.69561341   767.35890262]
New Q values:  [-5922.26708831  1817.12515167  -222.69561341   767.35890262]
Reward: -1  Episode Reward:  11
xxxxx
x .gx
x  .x
x.a.x
xxxxx
Step:10, Action:South
State  273
Old Q Values:  [ 1232.03390306  -168.92307549 -1618.91992466  4537.39422997]
New Q values:  [ 1232.03390306  1113.04903879 -1618.91992466  4537.39422997]
Reward: -301  Episode Reward:  -290
xxxxx
x ..x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 -1618.91992466  4537.39422997]
------
Step:11, Action:West
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197 23796.46009513   790.30511964]
New Q values:  [ 4757.67028198 -5807.06396197 23796.46009513  2016.12111278]
Reward: 9  Episode Reward:  -281
xxxxx
x ..x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3698.82774212   26.73544252 5648.66354974  240.60751196]
------
Step:12, Action:East
State  260
Old Q Values:  [ 3652.85157879 -5704.51612281  3999.08014058 -5679.36893145]
New Q values:  [ 3652.85157879 -5704.51612281  8737.97008477 -5679.36893145]
Reward: -1  Episode Reward:  -282
xxxxx
x ..x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197 23796.46009513  2016.12111278]
------
Step:13, Action:East
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197 23796.46009513  2016.12111278]
New Q values:  [ 4757.67028198 -5807.06396197 11468.53744658  2016.12111278]
Reward: 9  Episode Reward:  -273
xxxxx
x ..x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2481.02027713 -6396.61506955 -5588.09647059  6481.84469509]
------
Step:14, Action:West
State  288
Old Q Values:  [ 2481.02027713 -6396.61506955 -5588.09647059  6481.84469509]
New Q values:  [ 2481.02027713 -6396.61506955 -5588.09647059  3953.35614703]
Reward: -1  Episode Reward:  -274
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 -1618.91992466  4537.39422997]
------
Step:15, Action:West
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197 11468.53744658  2016.12111278]
New Q values:  [ 4757.67028198 -5807.06396197 11468.53744658  2500.44751003]
Reward: -1  Episode Reward:  -275
xxxxx
x ..x
x g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3698.82774212   26.73544252 5648.66354974  240.60751196]
------
Step:16, Action:East
State  261
Old Q Values:  [3698.82774212   26.73544252 5648.66354974  240.60751196]
New Q values:  [3698.82774212   26.73544252 3620.08368888  240.60751196]
Reward: -1  Episode Reward:  -276
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 -1618.91992466  4537.39422997]
------
Step:17, Action:West
State  273
Old Q Values:  [ 1232.03390306  1113.04903879 -1618.91992466  4537.39422997]
New Q values:  [ 1232.03390306  1113.04903879 -1618.91992466  2924.00601462]
Reward: -1  Episode Reward:  -277
xxxxx
x .gx
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3698.82774212   26.73544252 3620.08368888  240.60751196]
------
Step:18, Action:North
State  261
Old Q Values:  [3698.82774212   26.73544252 3620.08368888  240.60751196]
New Q values:  [1637.66794213   26.73544252 3620.08368888  240.60751196]
Reward: -1  Episode Reward:  -278
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[529.1228176  301.98873832  75.70173447 262.76946019]
------
Step:19, Action:North
State  181
Old Q Values:  [529.1228176  301.98873832  75.70173447 262.76946019]
New Q values:  [493.31253896 301.98873832  75.70173447 262.76946019]
Reward: -1  Episode Reward:  -279
xxxxx
xa.gx
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[  0.         940.87803975   0.           0.        ]
------
Step:20, Action:South
State  101
Old Q Values:  [  0.         940.87803975   0.           0.        ]
New Q values:  [  0.         523.74497759   0.           0.        ]
Reward: -1  Episode Reward:  -280
xxxxx
x g.x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[493.31253896 301.98873832  75.70173447 262.76946019]
------
Step:21, Action:North
State  181
Old Q Values:  [493.31253896 301.98873832  75.70173447 262.76946019]
New Q values:  [1502.6772596   301.98873832   75.70173447  262.76946019]
Reward: -1  Episode Reward:  -281
xxxxx
xa..x
x g.x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 4353.17414673  238.35800069    0.        ]
------
Step:22, Action:South
State  103
Old Q Values:  [ 221.30610858 4353.17414673  238.35800069    0.        ]
New Q values:  [ 221.30610858 2191.47283657  238.35800069    0.        ]
Reward: -1  Episode Reward:  -282
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1502.6772596   301.98873832   75.70173447  262.76946019]
------
Step:23, Action:North
State  183
Old Q Values:  [  999.35894566  2341.99049364 11411.99720993  1554.80203889]
New Q values:  [ 1056.58542923  2341.99049364 11411.99720993  1554.80203889]
Reward: -1  Episode Reward:  -283
xxxxx
xa..x
x  .x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2191.47283657  238.35800069    0.        ]
------
Step:24, Action:South
State  103
Old Q Values:  [ 221.30610858 2191.47283657  238.35800069    0.        ]
New Q values:  [ 221.30610858 4299.58829761  238.35800069    0.        ]
Reward: -1  Episode Reward:  -284
xxxxx
x ..x
xa .x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1056.58542923  2341.99049364 11411.99720993  1554.80203889]
------
Step:25, Action:East
State  181
Old Q Values:  [1502.6772596   301.98873832   75.70173447  262.76946019]
New Q values:  [ 1502.6772596    301.98873832 -2373.61630148   262.76946019]
Reward: -10001  Episode Reward:  -10285
xxxxx
x ..x
x g.x
x   x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[38735.83523     3220.66258272   790.72804752 12405.78121886]
------
Step:1, Action:North
State  210
Old Q Values:  [38735.83523     3220.66258272   790.72804752 12405.78121886]
New Q values:  [15813.51249026  3220.66258272   790.72804752 12405.78121886]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  1.04592799e+03]
------
Step:2, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  1.04592799e+03]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  6.88263711e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   611.86548126   881.64171089]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753  1542.68515015]
New Q values:  [ -281.736      -1150.91067548   129.67618753   939.36755281]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1056.31164251  790.4386324  -252.78192178]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 3312.32418632  403.77554135 -120.29354603]
New Q values:  [-177.44732869 4753.92883751  403.77554135 -120.29354603]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1056.58542923  2341.99049364 11411.99720993  1554.80203889]
------
Step:5, Action:East
State  185
Old Q Values:  [ 189.19059642    0.         1997.0360803  -178.98      ]
New Q values:  [ 189.19059642    0.         2216.71015812 -178.98      ]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 4708.31908668 -789.02220255 1702.07235953]
------
Step:6, Action:South
State  203
Old Q Values:  [3.60604218e+00 3.97827546e+03 2.05741271e+03 9.06816004e+03]
New Q values:  [3.60604218e+00 2.47391199e+03 2.05741271e+03 9.06816004e+03]
Reward: 9  Episode Reward:  54
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 -1618.91992466  2924.00601462]
------
Step:7, Action:West
State  273
Old Q Values:  [ 1232.03390306  1113.04903879 -1618.91992466  2924.00601462]
New Q values:  [ 1232.03390306  1113.04903879 -1618.91992466 70611.75811027]
Reward: 100009  Episode Reward:  100063
xxxxx
x   x
x   x
xa gx
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1056.58542923  2341.99049364 11411.99720993  1554.80203889]
------
Step:1, Action:East
State  183
Old Q Values:  [ 1056.58542923  2341.99049364 11411.99720993  1554.80203889]
New Q values:  [ 1056.58542923  2341.99049364 16556.23903834  1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  3.99534672e+04  2.17939995e+03]
------
Step:2, Action:East
State  195
Old Q Values:  [   38.85388605  2554.09283541 35665.85224738  1169.39963074]
New Q values:  [   38.85388605  2554.09283541 19015.79464603  1169.39963074]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[15813.51249026  3220.66258272   790.72804752 12405.78121886]
------
Step:3, Action:North
State  210
Old Q Values:  [15813.51249026  3220.66258272   790.72804752 12405.78121886]
New Q values:  [28749.87877844  3220.66258272   790.72804752 12405.78121886]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[33256.45600344 27549.5211885   -180.00807518 74730.24594111]
------
Step:4, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  6.88263711e+02]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.62515750e+02]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   129.67618753   939.36755281]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753   939.36755281]
New Q values:  [ -281.736      -1150.91067548   129.67618753   908.13410532]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494  1756.62361399  -180.6       ]
------
Step:6, Action:East
State  110
Old Q Values:  [ -239.29051573 -7419.70099094    97.05973611  -180.6       ]
New Q values:  [ -239.29051573 -7419.70099094   310.66412604  -180.6       ]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   129.67618753   908.13410532]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753   908.13410532]
New Q values:  [ -281.736      -1150.91067548   129.67618753   679.54713488]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1056.31164251  790.4386324  -252.78192178]
------
Step:8, Action:South
State  107
Old Q Values:  [-252.35169558 1056.31164251  790.4386324  -252.78192178]
New Q values:  [-252.35169558  963.00353246  790.4386324  -252.78192178]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         1803.59625152    0.        ]
------
Step:9, Action:East
State  185
Old Q Values:  [ 189.19059642    0.         2216.71015812 -178.98      ]
New Q values:  [ 189.19059642    0.         2298.57978925 -178.98      ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 4708.31908668 -789.02220255 1702.07235953]
------
Step:10, Action:South
State  200
Old Q Values:  [ 169.9257398  4092.774894   2125.71085974  568.38654082]
New Q values:  [ 169.9257398  5384.95717302 2125.71085974  568.38654082]
Reward: 9  Episode Reward:  50
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 12474.82405141  2953.8999236 ]
------
Step:11, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 12474.82405141  2953.8999236 ]
New Q values:  [ 3514.02111757 -8521.23367799  6175.33646467  2953.8999236 ]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2481.02027713 -6396.61506955 -5588.09647059  3953.35614703]
------
Step:12, Action:West
State  288
Old Q Values:  [ 2481.02027713 -6396.61506955 -5588.09647059  3953.35614703]
New Q values:  [ 2481.02027713 -6396.61506955 -5588.09647059  3433.34339821]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  6175.33646467  2953.8999236 ]
------
Step:13, Action:East
State  273
Old Q Values:  [ 1232.03390306  1113.04903879 -1618.91992466 70611.75811027]
New Q values:  [ 1232.03390306  1113.04903879   381.8350496  70611.75811027]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2481.02027713 -6396.61506955 -5588.09647059  3433.34339821]
------
Step:14, Action:West
State  288
Old Q Values:  [ 2481.02027713 -6396.61506955 -5588.09647059  3433.34339821]
New Q values:  [ 2481.02027713 -6396.61506955 -5588.09647059 22556.26479236]
Reward: -1  Episode Reward:  46
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879   381.8350496  70611.75811027]
------
Step:15, Action:West
State  273
Old Q Values:  [ 1232.03390306  1113.04903879   381.8350496  70611.75811027]
New Q values:  [ 1232.03390306  1113.04903879   381.8350496  97686.85894852]
Reward: 100009  Episode Reward:  100055
xxxxx
x  gx
x   x
xa  x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28749.87877844  3220.66258272   790.72804752 12405.78121886]
------
Step:1, Action:North
State  210
Old Q Values:  [28749.87877844  3220.66258272   790.72804752 12405.78121886]
New Q values:  [11674.10623644  3220.66258272   790.72804752 12405.78121886]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.62515750e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.62515750e+02]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  4.94898813e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.. x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   611.86548126   881.64171089]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753   679.54713488]
New Q values:  [ -281.736      -1150.91067548   129.67618753  1703.39750521]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x.. x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4753.92883751  403.77554135 -120.29354603]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 4753.92883751  403.77554135 -120.29354603]
New Q values:  [-177.44732869 2683.11544005  403.77554135 -120.29354603]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627  922.1060418  2587.14635017  154.04646645]
------
Step:5, Action:South
State  189
Old Q Values:  [ 337.36081627  922.1060418  2587.14635017  154.04646645]
New Q values:  [ 337.36081627 1454.26752338 2587.14635017  154.04646645]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1637.66794213   26.73544252 3620.08368888  240.60751196]
------
Step:6, Action:East
State  261
Old Q Values:  [1637.66794213   26.73544252 3620.08368888  240.60751196]
New Q values:  [1.63766794e+03 2.67354425e+01 3.07594912e+04 2.40607512e+02]
Reward: 9  Episode Reward:  44
xxxxx
x  gx
x . x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879   381.8350496  97686.85894852]
------
Step:7, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  6175.33646467  2953.8999236 ]
New Q values:  [ 3514.02111757 -8521.23367799  6175.33646467 10408.80731747]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x . x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1.63766794e+03 2.67354425e+01 3.07594912e+04 2.40607512e+02]
------
Step:8, Action:East
State  261
Old Q Values:  [1.63766794e+03 2.67354425e+01 3.07594912e+04 2.40607512e+02]
New Q values:  [ 1637.66794213    26.73544252 15743.75769802   240.60751196]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197 11468.53744658  2500.44751003]
------
Step:9, Action:East
State  273
Old Q Values:  [ 1232.03390306  1113.04903879   381.8350496  97686.85894852]
New Q values:  [ 1232.03390306  1113.04903879  6925.01345755 97686.85894852]
Reward: 9  Episode Reward:  51
xxxxx
x   x
x .gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2481.02027713 -6396.61506955 -5588.09647059 22556.26479236]
------
Step:10, Action:West
State  288
Old Q Values:  [ 2481.02027713 -6396.61506955 -5588.09647059 22556.26479236]
New Q values:  [ 2481.02027713 -6396.61506955 -5588.09647059 38327.9636015 ]
Reward: -1  Episode Reward:  50
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879  6925.01345755 97686.85894852]
------
Step:11, Action:West
State  273
Old Q Values:  [ 1232.03390306  1113.04903879  6925.01345755 97686.85894852]
New Q values:  [ 1232.03390306  1113.04903879  6925.01345755 43797.27088881]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x .gx
xa  x
xxxxx
Step:12, Action:North
State  261
Old Q Values:  [ 1637.66794213    26.73544252 15743.75769802   240.60751196]
New Q values:  [ 1105.27035473    26.73544252 15743.75769802   240.60751196]
Reward: -1  Episode Reward:  48
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1502.6772596    301.98873832 -2373.61630148   262.76946019]
------
Step:13, Action:North
State  181
Old Q Values:  [ 1502.6772596    301.98873832 -2373.61630148   262.76946019]
New Q values:  [  885.43599171   301.98873832 -2373.61630148   262.76946019]
Reward: -1  Episode Reward:  47
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   949.88362623 -2165.66138672  -180.6       ]
------
Step:14, Action:South
State  109
Old Q Values:  [ -241.10880094   949.88362623 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   644.984248   -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  46
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  885.43599171   301.98873832 -2373.61630148   262.76946019]
------
Step:15, Action:North
State  181
Old Q Values:  [  885.43599171   301.98873832 -2373.61630148   262.76946019]
New Q values:  [  547.06967108   301.98873832 -2373.61630148   262.76946019]
Reward: -1  Episode Reward:  45
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   644.984248   -2165.66138672  -180.6       ]
------
Step:16, Action:South
State  109
Old Q Values:  [ -241.10880094   644.984248   -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   421.51460053 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  44
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  547.06967108   301.98873832 -2373.61630148   262.76946019]
------
Step:17, Action:North
State  181
Old Q Values:  [  547.06967108   301.98873832 -2373.61630148   262.76946019]
New Q values:  [  344.68224859   301.98873832 -2373.61630148   262.76946019]
Reward: -1  Episode Reward:  43
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   421.51460053 -2165.66138672  -180.6       ]
------
Step:18, Action:South
State  108
Old Q Values:  [-8463.16477134  1908.62087874   980.97051902     0.        ]
New Q values:  [-8463.16477134  1779.08434045   980.97051902     0.        ]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5631.58166229  3245.43755543  3387.45329652 -4966.32149798]
------
Step:19, Action:East
State  176
Old Q Values:  [103770.24881615   1621.55095326 102679.38705514      0.        ]
New Q values:  [103770.24881615   1621.55095326 107436.54432545      0.        ]
Reward: 100009  Episode Reward:  100051
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  4.94898813e+02]
------
Step:1, Action:West
State  136
Old Q Values:  [-2129.37064562  3733.13466467   660.86649319  -386.60615656]
New Q values:  [-2129.37064562  3733.13466467   660.86649319   -39.19567597]
Reward: 9  Episode Reward:  9
xxxxx
x.agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   366.82262216]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   611.86548126   881.64171089]
New Q values:  [ -253.44886264 -1902.20915811   611.86548126  1162.99131637]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2683.11544005  403.77554135 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 2683.11544005  403.77554135 -120.29354603]
New Q values:  [-177.44732869 6045.51788752  403.77554135 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1056.58542923  2341.99049364 16556.23903834  1554.80203889]
------
Step:4, Action:East
State  187
Old Q Values:  [ 320.07341842    0.         1803.59625152    0.        ]
New Q values:  [ 320.07341842    0.         1347.50615957    0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  2068.89219654  1278.19575341]
------
Step:5, Action:East
State  200
Old Q Values:  [ 169.9257398  5384.95717302 2125.71085974  568.38654082]
New Q values:  [ 169.9257398  5384.95717302 2064.67334366  568.38654082]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  856.78902046  4049.96333255 -8220.10378799   911.8961227 ]
------
Step:6, Action:South
State  216
Old Q Values:  [  856.78902046  4049.96333255 -8220.10378799   911.8961227 ]
New Q values:  [  856.78902046 13123.77441347 -8220.10378799   911.8961227 ]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2481.02027713 -6396.61506955 -5588.09647059 38327.9636015 ]
------
Step:7, Action:West
State  288
Old Q Values:  [ 2481.02027713 -6396.61506955 -5588.09647059 38327.9636015 ]
New Q values:  [ 2481.02027713 -6396.61506955 -5588.09647059 28475.76670725]
Reward: 9  Episode Reward:  53
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879  6925.01345755 43797.27088881]
------
Step:8, Action:West
State  273
Old Q Values:  [ 1232.03390306  1113.04903879  6925.01345755 43797.27088881]
New Q values:  [ 1232.03390306  1113.04903879  6925.01345755 86961.06405994]
Reward: 100009  Episode Reward:  100062
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5897.51297065  3053.77651886 -4584.50430574 -1713.91177491]
------
Step:1, Action:North
State  210
Old Q Values:  [11674.10623644  3220.66258272   790.72804752 12405.78121886]
New Q values:  [ 4823.51213858  3220.66258272   790.72804752 12405.78121886]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  4.94898813e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  4.94898813e+02]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.52256920e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   611.86548126  1162.99131637]
------
Step:3, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   611.86548126  1162.99131637]
New Q values:  [ -253.44886264 -1902.20915811   611.86548126   759.49758629]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  963.00353246  790.4386324  -252.78192178]
------
Step:4, Action:South
State  107
Old Q Values:  [-252.35169558  963.00353246  790.4386324  -252.78192178]
New Q values:  [-252.35169558  794.85326086  790.4386324  -252.78192178]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         1347.50615957    0.        ]
------
Step:5, Action:East
State  189
Old Q Values:  [ 337.36081627 1454.26752338 2587.14635017  154.04646645]
New Q values:  [ 337.36081627 1454.26752338 2446.75426607  154.04646645]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 4708.31908668 -789.02220255 1702.07235953]
------
Step:6, Action:South
State  200
Old Q Values:  [ 169.9257398  5384.95717302 2064.67334366  568.38654082]
New Q values:  [ 169.9257398  5282.02506445 2064.67334366  568.38654082]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  6175.33646467 10408.80731747]
------
Step:7, Action:West
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197 11468.53744658  2500.44751003]
New Q values:  [ 4757.67028198 -5807.06396197 11468.53744658  5728.70631342]
Reward: 9  Episode Reward:  53
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1105.27035473    26.73544252 15743.75769802   240.60751196]
------
Step:8, Action:North
State  257
Old Q Values:  [31455.85234806 12764.58618105  3851.09595999  1875.31501677]
New Q values:  [39838.3780314  12764.58618105  3851.09595999  1875.31501677]
Reward: -1  Episode Reward:  52
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[82228.67666629 16101.90751562 90855.45697393     0.        ]
------
Step:9, Action:East
State  189
Old Q Values:  [ 337.36081627 1454.26752338 2446.75426607  154.04646645]
New Q values:  [ 337.36081627 1454.26752338 1304.99403736  154.04646645]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[   0.         1089.6411031     0.          198.38683706]
------
Step:10, Action:South
State  205
Old Q Values:  [   0.         1089.6411031     0.          198.38683706]
New Q values:  [  0.         917.27249131   0.         198.38683706]
Reward: -1  Episode Reward:  50
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.         1606.7201669  1468.83994914]
------
Step:11, Action:East
State  273
Old Q Values:  [ 1232.03390306  1113.04903879  6925.01345755 86961.06405994]
New Q values:  [ 1232.03390306  1113.04903879 71318.13539519 86961.06405994]
Reward: 100009  Episode Reward:  100059
xxxxx
x   x
x  gx
x  ax
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.52256920e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.52256920e+02]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  4.54152044e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   611.86548126   759.49758629]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753  1703.39750521]
New Q values:  [ -281.736      -1150.91067548   129.67618753  2500.41436834]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 6045.51788752  403.77554135 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 6045.51788752  403.77554135 -120.29354603]
New Q values:  [-177.44732869 2859.88741202  403.77554135 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1454.26752338 1304.99403736  154.04646645]
------
Step:4, Action:South
State  189
Old Q Values:  [ 337.36081627 1454.26752338 1304.99403736  154.04646645]
New Q values:  [ 337.36081627 5310.23431876 1304.99403736  154.04646645]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1105.27035473    26.73544252 15743.75769802   240.60751196]
------
Step:5, Action:East
State  261
Old Q Values:  [ 1105.27035473    26.73544252 15743.75769802   240.60751196]
New Q values:  [1105.27035473   26.73544252 6784.91912928  240.60751196]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[   1.64433       0.         1606.7201669  1468.83994914]
------
Step:6, Action:East
State  277
Old Q Values:  [   1.64433       0.         1606.7201669  1468.83994914]
New Q values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 1.46883995e+03]
Reward: -9991  Episode Reward:  -9946
xxxxx
x   x
x  .x
x  gx
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2481.02027713 -6396.61506955 -5588.09647059 28475.76670725]
------
Step:1, Action:North
State  288
Old Q Values:  [ 2481.02027713 -6396.61506955 -5588.09647059 28475.76670725]
New Q values:  [ 4719.54247651 -6396.61506955 -5588.09647059 28475.76670725]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4823.51213858  3220.66258272   790.72804752 12405.78121886]
------
Step:2, Action:West
State  208
Old Q Values:  [ 5897.51297065  3053.77651886 -4584.50430574 -1713.91177491]
New Q values:  [ 5897.51297065  3053.77651886 -4584.50430574  -135.02716446]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1817.12515167  -222.69561341   767.35890262]
------
Step:3, Action:South
State  195
Old Q Values:  [   38.85388605  2554.09283541 19015.79464603  1169.39963074]
New Q values:  [   38.85388605 27109.35635215 19015.79464603  1169.39963074]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 71318.13539519 86961.06405994]
------
Step:4, Action:West
State  273
Old Q Values:  [ 1232.03390306  1113.04903879 71318.13539519 86961.06405994]
New Q values:  [ 1232.03390306  1113.04903879 71318.13539519 46741.3390334 ]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x.  x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39838.3780314  12764.58618105  3851.09595999  1875.31501677]
------
Step:5, Action:North
State  261
Old Q Values:  [1105.27035473   26.73544252 6784.91912928  240.60751196]
New Q values:  [ 550.91281647   26.73544252 6784.91912928  240.60751196]
Reward: 9  Episode Reward:  35
xxxxx
x...x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  344.68224859   301.98873832 -2373.61630148   262.76946019]
------
Step:6, Action:North
State  181
Old Q Values:  [  344.68224859   301.98873832 -2373.61630148   262.76946019]
New Q values:  [  300.39639271   301.98873832 -2373.61630148   262.76946019]
Reward: 9  Episode Reward:  44
xxxxx
xa.gx
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[  0.         523.74497759   0.           0.        ]
------
Step:7, Action:South
State  103
Old Q Values:  [ 221.30610858 4299.58829761  238.35800069    0.        ]
New Q values:  [ 221.30610858 1809.83194054  238.35800069    0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  300.39639271   301.98873832 -2373.61630148   262.76946019]
------
Step:8, Action:South
State  183
Old Q Values:  [ 1056.58542923  2341.99049364 16556.23903834  1554.80203889]
New Q values:  [ 1056.58542923  2971.67193624 16556.23903834  1554.80203889]
Reward: -1  Episode Reward:  42
xxxxx
x ..x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 550.91281647   26.73544252 6784.91912928  240.60751196]
------
Step:9, Action:East
State  261
Old Q Values:  [ 550.91281647   26.73544252 6784.91912928  240.60751196]
New Q values:  [  550.91281647    26.73544252 24108.80827027   240.60751196]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 71318.13539519 46741.3390334 ]
------
Step:10, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  6175.33646467 10408.80731747]
New Q values:  [ 3514.02111757 -8521.23367799 11012.26459804 10408.80731747]
Reward: -1  Episode Reward:  40
xxxxx
x ..x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4719.54247651 -6396.61506955 -5588.09647059 28475.76670725]
------
Step:11, Action:West
State  288
Old Q Values:  [ 4719.54247651 -6396.61506955 -5588.09647059 28475.76670725]
New Q values:  [ 4719.54247651 -6396.61506955 -5588.09647059 32785.14730146]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 71318.13539519 46741.3390334 ]
------
Step:12, Action:East
State  273
Old Q Values:  [ 1232.03390306  1113.04903879 71318.13539519 46741.3390334 ]
New Q values:  [ 1232.03390306  1113.04903879 32362.19834851 46741.3390334 ]
Reward: -10001  Episode Reward:  -9962
xxxxx
x ..x
x   x
x  gx
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 11012.26459804 10408.80731747]
------
Step:1, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 11012.26459804 10408.80731747]
New Q values:  [ 3514.02111757 -8521.23367799 14245.85002965 10408.80731747]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4719.54247651 -6396.61506955 -5588.09647059 32785.14730146]
------
Step:2, Action:West
State  288
Old Q Values:  [ 4719.54247651 -6396.61506955 -5588.09647059 32785.14730146]
New Q values:  [ 4719.54247651 -6396.61506955 -5588.09647059 17387.21392948]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xg..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 14245.85002965 10408.80731747]
------
Step:3, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 14245.85002965 10408.80731747]
New Q values:  [ 3514.02111757 -8521.23367799 10913.90419071 10408.80731747]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x ..x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4719.54247651 -6396.61506955 -5588.09647059 17387.21392948]
------
Step:4, Action:West
State  288
Old Q Values:  [ 4719.54247651 -6396.61506955 -5588.09647059 17387.21392948]
New Q values:  [ 4719.54247651 -6396.61506955 -5588.09647059 10228.456829  ]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xg..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799 10913.90419071 10408.80731747]
------
Step:5, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799 10913.90419071 10408.80731747]
New Q values:  [ 3514.02111757 -8521.23367799  7433.49872498 10408.80731747]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x ..x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4719.54247651 -6396.61506955 -5588.09647059 10228.456829  ]
------
Step:6, Action:West
State  288
Old Q Values:  [ 4719.54247651 -6396.61506955 -5588.09647059 10228.456829  ]
New Q values:  [ 4719.54247651 -6396.61506955 -5588.09647059  7213.42492684]
Reward: -1  Episode Reward:  4
xxxxx
x...x
xg..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  7433.49872498 10408.80731747]
------
Step:7, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  7433.49872498 10408.80731747]
New Q values:  [ 3514.02111757 -8521.23367799  7433.49872498   790.31395242]
Reward: -9991  Episode Reward:  -9987
xxxxx
x...x
x ..x
xg  x
xxxxx
xxxxx
x.g.x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  550.91281647    26.73544252 24108.80827027   240.60751196]
------
Step:1, Action:East
State  261
Old Q Values:  [  550.91281647    26.73544252 24108.80827027   240.60751196]
New Q values:  [  550.91281647    26.73544252 13089.48454208   240.60751196]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197 11468.53744658  5728.70631342]
------
Step:2, Action:East
State  273
Old Q Values:  [ 1232.03390306  1113.04903879 32362.19834851 46741.3390334 ]
New Q values:  [ 1232.03390306  1113.04903879 15114.30681746 46741.3390334 ]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
x..gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4719.54247651 -6396.61506955 -5588.09647059  7213.42492684]
------
Step:3, Action:West
State  288
Old Q Values:  [ 4719.54247651 -6396.61506955 -5588.09647059  7213.42492684]
New Q values:  [ 4719.54247651 -6396.61506955 -5588.09647059 16907.17168076]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
x...x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 15114.30681746 46741.3390334 ]
------
Step:4, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  7433.49872498   790.31395242]
New Q values:  [ 3514.02111757 -8521.23367799  7433.49872498  4242.37094359]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
x...x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  550.91281647    26.73544252 13089.48454208   240.60751196]
------
Step:5, Action:North
State  261
Old Q Values:  [  550.91281647    26.73544252 13089.48454208   240.60751196]
New Q values:  [ 5192.63683809    26.73544252 13089.48454208   240.60751196]
Reward: 9  Episode Reward:  25
xxxxx
x. .x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1056.58542923  2971.67193624 16556.23903834  1554.80203889]
------
Step:6, Action:East
State  177
Old Q Values:  [33510.55723334 29124.7048717  68405.56768954     0.        ]
New Q values:  [33510.55723334 29124.7048717  27912.76462132     0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x. .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  1817.12515167  -222.69561341   767.35890262]
------
Step:7, Action:South
State  193
Old Q Values:  [-5922.26708831  1817.12515167  -222.69561341   767.35890262]
New Q values:  [-5922.26708831 14748.65177069  -222.69561341   767.35890262]
Reward: -1  Episode Reward:  33
xxxxx
x. gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 15114.30681746 46741.3390334 ]
------
Step:8, Action:West
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197 11468.53744658  5728.70631342]
New Q values:  [ 4757.67028198 -5807.06396197 11468.53744658  6217.72788799]
Reward: -1  Episode Reward:  32
xxxxx
x.g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5192.63683809    26.73544252 13089.48454208   240.60751196]
------
Step:9, Action:East
State  260
Old Q Values:  [ 3652.85157879 -5704.51612281  8737.97008477 -5679.36893145]
New Q values:  [ 3652.85157879 -5704.51612281  6935.14926788 -5679.36893145]
Reward: -1  Episode Reward:  31
xxxxx
xg .x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197 11468.53744658  6217.72788799]
------
Step:10, Action:East
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197 11468.53744658  6217.72788799]
New Q values:  [ 4757.67028198 -5807.06396197  9658.96648286  6217.72788799]
Reward: -1  Episode Reward:  30
xxxxx
x. .x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4719.54247651 -6396.61506955 -5588.09647059 16907.17168076]
------
Step:11, Action:West
State  288
Old Q Values:  [ 4719.54247651 -6396.61506955 -5588.09647059 16907.17168076]
New Q values:  [ 4719.54247651 -6396.61506955 -5588.09647059  9659.95861716]
Reward: -1  Episode Reward:  29
xxxxx
x. .x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197  9658.96648286  6217.72788799]
------
Step:12, Action:East
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197  9658.96648286  6217.72788799]
New Q values:  [ 4757.67028198 -5807.06396197  6760.97417829  6217.72788799]
Reward: -1  Episode Reward:  28
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4719.54247651 -6396.61506955 -5588.09647059  9659.95861716]
------
Step:13, Action:West
State  288
Old Q Values:  [ 4719.54247651 -6396.61506955 -5588.09647059  9659.95861716]
New Q values:  [ 4719.54247651 -6396.61506955 -5588.09647059 17885.78515688]
Reward: -1  Episode Reward:  27
xxxxx
x. gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 15114.30681746 46741.3390334 ]
------
Step:14, Action:West
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197  6760.97417829  6217.72788799]
New Q values:  [ 4757.67028198 -5807.06396197  6760.97417829  6413.33651782]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5192.63683809    26.73544252 13089.48454208   240.60751196]
------
Step:15, Action:East
State  261
Old Q Values:  [ 5192.63683809    26.73544252 13089.48454208   240.60751196]
New Q values:  [5192.63683809   26.73544252 7263.48607032  240.60751196]
Reward: -1  Episode Reward:  25
xxxxx
x. .x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197  6760.97417829  6413.33651782]
------
Step:16, Action:East
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197  6760.97417829  6413.33651782]
New Q values:  [ 4757.67028198 -5807.06396197  8069.52521838  6413.33651782]
Reward: -1  Episode Reward:  24
xxxxx
x. .x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4719.54247651 -6396.61506955 -5588.09647059 17885.78515688]
------
Step:17, Action:West
State  288
Old Q Values:  [ 4719.54247651 -6396.61506955 -5588.09647059 17885.78515688]
New Q values:  [ 4719.54247651 -6396.61506955 -5588.09647059  9574.57162827]
Reward: -1  Episode Reward:  23
xxxxx
x. .x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197  8069.52521838  6413.33651782]
------
Step:18, Action:East
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197  8069.52521838  6413.33651782]
New Q values:  [ 4757.67028198 -5807.06396197  6099.58157583  6413.33651782]
Reward: -1  Episode Reward:  22
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4719.54247651 -6396.61506955 -5588.09647059  9574.57162827]
------
Step:19, Action:West
State  288
Old Q Values:  [ 4719.54247651 -6396.61506955 -5588.09647059  9574.57162827]
New Q values:  [ 4719.54247651 -6396.61506955 -5588.09647059  5753.22960665]
Reward: -1  Episode Reward:  21
xxxxx
xg .x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197  6099.58157583  6413.33651782]
------
Step:20, Action:West
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197  6099.58157583  6413.33651782]
New Q values:  [ 4757.67028198 -5807.06396197  6099.58157583  4743.78042822]
Reward: -1  Episode Reward:  20
xxxxx
x.g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5192.63683809   26.73544252 7263.48607032  240.60751196]
------
Step:21, Action:East
State  261
Old Q Values:  [5192.63683809   26.73544252 7263.48607032  240.60751196]
New Q values:  [5192.63683809   26.73544252 4734.66890088  240.60751196]
Reward: -1  Episode Reward:  19
xxxxx
x. .x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197  6099.58157583  4743.78042822]
------
Step:22, Action:East
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197  6099.58157583  4743.78042822]
New Q values:  [ 4757.67028198 -5807.06396197  4165.20151233  4743.78042822]
Reward: -1  Episode Reward:  18
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4719.54247651 -6396.61506955 -5588.09647059  5753.22960665]
------
Step:23, Action:West
State  288
Old Q Values:  [ 4719.54247651 -6396.61506955 -5588.09647059  5753.22960665]
New Q values:  [ 4719.54247651 -6396.61506955 -5588.09647059  3727.99292726]
Reward: -1  Episode Reward:  17
xxxxx
xg .x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4757.67028198 -5807.06396197  4165.20151233  4743.78042822]
------
Step:24, Action:North
State  276
Old Q Values:  [ 4757.67028198 -5807.06396197  4165.20151233  4743.78042822]
New Q values:  [ 5499.17111752 -5807.06396197  4165.20151233  4743.78042822]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144 11024.59842256 11989.01001575   231.67262594]
------
Step:25, Action:East
State  196
Old Q Values:  [-2469.90645144 11024.59842256 11989.01001575   231.67262594]
New Q values:  [-2469.90645144 11024.59842256  6570.2578975    231.67262594]
Reward: 9  Episode Reward:  25
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5897.51297065  3053.77651886 -4584.50430574  -135.02716446]
------
Step:26, Action:North
State  208
Old Q Values:  [ 5897.51297065  3053.77651886 -4584.50430574  -135.02716446]
New Q values:  [35085.89055745  3053.77651886 -4584.50430574  -135.02716446]
Reward: 9  Episode Reward:  34
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 27558.53877122  12144.48195471  -8652.84       109071.6178973 ]
------
Step:27, Action:North
State  130
Old Q Values:  [33256.45600344 27549.5211885   -180.00807518 74730.24594111]
New Q values:  [35541.05618371 27549.5211885   -180.00807518 74730.24594111]
Reward: -301  Episode Reward:  -267
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[35541.05618371 27549.5211885   -180.00807518 74730.24594111]
------
Step:28, Action:West
State  128
Old Q Values:  [ 27558.53877122  12144.48195471  -8652.84       109071.6178973 ]
New Q values:  [27558.53877122 12144.48195471 -8652.84       70912.97607779]
Reward: -10001  Episode Reward:  -10268
xxxxx
x.g x
x   x
x   x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  4.54152044e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  4.54152044e+02]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  9.37185128e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   129.67618753  2500.41436834]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753  2500.41436834]
New Q values:  [ -281.736      -1150.91067548   129.67618753  1098.76498515]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
xg. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -7419.70099094   310.66412604  -180.6       ]
------
Step:3, Action:East
State  110
Old Q Values:  [ -239.29051573 -7419.70099094   310.66412604  -180.6       ]
New Q values:  [ -239.29051573 -7419.70099094   453.29514596  -180.6       ]
Reward: -1  Episode Reward:  17
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   129.67618753  1098.76498515]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753  1098.76498515]
New Q values:  [ -281.736      -1150.91067548   129.67618753   677.36197232]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  794.85326086  790.4386324  -252.78192178]
------
Step:5, Action:South
State  110
Old Q Values:  [ -239.29051573 -7419.70099094   453.29514596  -180.6       ]
New Q values:  [ -239.29051573 -2521.9025475    453.29514596  -180.6       ]
Reward: 9  Episode Reward:  25
xxxxx
x   x
xa. x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  1.46859283e+03  0.00000000e+00]
------
Step:6, Action:East
State  187
Old Q Values:  [ 320.07341842    0.         1347.50615957    0.        ]
New Q values:  [ 320.07341842    0.         1165.07012279    0.        ]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  2068.89219654  1278.19575341]
------
Step:7, Action:East
State  202
Old Q Values:  [    0.         -8753.98842238  2068.89219654  1278.19575341]
New Q values:  [    0.         -8753.98842238  1827.48540877  1278.19575341]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 409.10562358 3335.09510052    0.         1540.00681929]
------
Step:8, Action:South
State  218
Old Q Values:  [ 409.10562358 3335.09510052    0.         1540.00681929]
New Q values:  [ 409.10562358 2755.30078316    0.         1540.00681929]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4719.54247651 -6396.61506955 -5588.09647059  3727.99292726]
------
Step:9, Action:North
State  288
Old Q Values:  [ 4719.54247651 -6396.61506955 -5588.09647059  3727.99292726]
New Q values:  [ 5824.34931465 -6396.61506955 -5588.09647059  3727.99292726]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  856.78902046 13123.77441347 -8220.10378799   911.8961227 ]
------
Step:10, Action:South
State  216
Old Q Values:  [  856.78902046 13123.77441347 -8220.10378799   911.8961227 ]
New Q values:  [  856.78902046  6996.21455978 -8220.10378799   911.8961227 ]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5824.34931465 -6396.61506955 -5588.09647059  3727.99292726]
------
Step:11, Action:West
State  288
Old Q Values:  [ 5824.34931465 -6396.61506955 -5588.09647059  3727.99292726]
New Q values:  [ 5824.34931465 -6396.61506955 -5588.09647059 15518.99888092]
Reward: 9  Episode Reward:  49
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 15114.30681746 46741.3390334 ]
------
Step:12, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  7433.49872498  4242.37094359]
New Q values:  [ 3514.02111757 -8521.23367799  7433.49872498 73653.86178686]
Reward: 100009  Episode Reward:  100058
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  300.39639271   301.98873832 -2373.61630148   262.76946019]
------
Step:1, Action:South
State  183
Old Q Values:  [ 1056.58542923  2971.67193624 16556.23903834  1554.80203889]
New Q values:  [ 1056.58542923  2751.85982592 16556.23903834  1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5192.63683809   26.73544252 4734.66890088  240.60751196]
------
Step:2, Action:North
State  261
Old Q Values:  [5192.63683809   26.73544252 4734.66890088  240.60751196]
New Q values:  [2167.05135673   26.73544252 4734.66890088  240.60751196]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  300.39639271   301.98873832 -2373.61630148   262.76946019]
------
Step:3, Action:South
State  183
Old Q Values:  [ 1056.58542923  2751.85982592 16556.23903834  1554.80203889]
New Q values:  [ 1056.58542923  2520.54460063 16556.23903834  1554.80203889]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2167.05135673   26.73544252 4734.66890088  240.60751196]
------
Step:4, Action:North
State  260
Old Q Values:  [ 3652.85157879 -5704.51612281  6935.14926788 -5679.36893145]
New Q values:  [ 6472.5428984  -5704.51612281  6935.14926788 -5679.36893145]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xa .x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243 16706.67422296     0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [ 1056.58542923  2520.54460063 16556.23903834  1554.80203889]
New Q values:  [ 1056.58542923  2520.54460063 18607.9357697   1554.80203889]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  3.99534672e+04  2.17939995e+03]
------
Step:6, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.11979650e+04 5.71267650e+02 2.45392999e+03]
New Q values:  [3.89777037e-01 2.11979650e+04 1.07596742e+04 2.45392999e+03]
Reward: 9  Episode Reward:  14
xxxxx
x...x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[35085.89055745  3053.77651886 -4584.50430574  -135.02716446]
------
Step:7, Action:North
State  208
Old Q Values:  [35085.89055745  3053.77651886 -4584.50430574  -135.02716446]
New Q values:  [36458.83000531  3053.77651886 -4584.50430574  -135.02716446]
Reward: 9  Episode Reward:  23
xxxxx
x..ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[35541.05618371 27549.5211885   -180.00807518 74730.24594111]
------
Step:8, Action:West
State  136
Old Q Values:  [-2129.37064562  3733.13466467   660.86649319   -39.19567597]
New Q values:  [-2129.37064562  3733.13466467   660.86649319   365.97736434]
Reward: 9  Episode Reward:  32
xxxxx
xga x
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1254.18544909    65.73076831]
------
Step:9, Action:East
State  112
Old Q Values:  [     0.           3629.92591876  34219.62570292 110949.76306292]
New Q values:  [     0.           3629.92591876  14807.19068057 110949.76306292]
Reward: -1  Episode Reward:  31
xxxxx
x.gax
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  3733.13466467   660.86649319   365.97736434]
------
Step:10, Action:South
State  136
Old Q Values:  [-2129.37064562  3733.13466467   660.86649319   365.97736434]
New Q values:  [-2129.37064562  3591.5182338    660.86649319   365.97736434]
Reward: -1  Episode Reward:  30
xxxxx
x. gx
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  856.78902046  6996.21455978 -8220.10378799   911.8961227 ]
------
Step:11, Action:South
State  208
Old Q Values:  [36458.83000531  3053.77651886 -4584.50430574  -135.02716446]
New Q values:  [36458.83000531  5882.61027182 -4584.50430574  -135.02716446]
Reward: 9  Episode Reward:  39
xxxxx
x.  x
x  gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5824.34931465 -6396.61506955 -5588.09647059 15518.99888092]
------
Step:12, Action:West
State  288
Old Q Values:  [ 5824.34931465 -6396.61506955 -5588.09647059 15518.99888092]
New Q values:  [ 5824.34931465 -6396.61506955 -5588.09647059 28309.15808843]
Reward: 9  Episode Reward:  48
xxxxx
x.  x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  7433.49872498 73653.86178686]
------
Step:13, Action:West
State  273
Old Q Values:  [ 1232.03390306  1113.04903879 15114.30681746 46741.3390334 ]
New Q values:  [ 1232.03390306  1113.04903879 15114.30681746 30647.44902278]
Reward: -1  Episode Reward:  47
xxxxx
x.  x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[39838.3780314  12764.58618105  3851.09595999  1875.31501677]
------
Step:14, Action:North
State  257
Old Q Values:  [39838.3780314  12764.58618105  3851.09595999  1875.31501677]
New Q values:  [25987.91838256 12764.58618105  3851.09595999  1875.31501677]
Reward: -1  Episode Reward:  46
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[33510.55723334 29124.7048717  27912.76462132     0.        ]
------
Step:15, Action:North
State  177
Old Q Values:  [33510.55723334 29124.7048717  27912.76462132     0.        ]
New Q values:  [88883.84400489 29124.7048717  27912.76462132     0.        ]
Reward: 100009  Episode Reward:  100055
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36458.83000531  5882.61027182 -4584.50430574  -135.02716446]
------
Step:1, Action:North
State  216
Old Q Values:  [  856.78902046  6996.21455978 -8220.10378799   911.8961227 ]
New Q values:  [  629.27114661  6996.21455978 -8220.10378799   911.8961227 ]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  9.37185128e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  9.37185128e+02]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  6.08123327e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   611.86548126   759.49758629]
------
Step:3, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   611.86548126   759.49758629]
New Q values:  [ -253.44886264 -1902.20915811   611.86548126  1167.16525812]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2859.88741202  403.77554135 -120.29354603]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 2859.88741202  403.77554135 -120.29354603]
New Q values:  [-177.44732869 6725.73569572  403.77554135 -120.29354603]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xa. x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1056.58542923  2520.54460063 18607.9357697   1554.80203889]
------
Step:5, Action:East
State  187
Old Q Values:  [ 320.07341842    0.         1165.07012279    0.        ]
New Q values:  [ 320.07341842    0.         3191.87606053    0.        ]
Reward: 9  Episode Reward:  35
xxxxx
x   x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 2.47391199e+03 2.05741271e+03 9.06816004e+03]
------
Step:6, Action:West
State  201
Old Q Values:  [ 613.33320563 4708.31908668 -789.02220255 1702.07235953]
New Q values:  [ 613.33320563 4708.31908668 -789.02220255 2273.29923944]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 5310.23431876 1304.99403736  154.04646645]
------
Step:7, Action:South
State  191
Old Q Values:  [   3.06655861  941.79461398 1241.2088357     0.        ]
New Q values:  [   3.06655861 1802.51851586 1241.2088357     0.        ]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x   x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2167.05135673   26.73544252 4734.66890088  240.60751196]
------
Step:8, Action:East
State  261
Old Q Values:  [2167.05135673   26.73544252 4734.66890088  240.60751196]
New Q values:  [ 2167.05135673    26.73544252 -2450.98110439   240.60751196]
Reward: -9991  Episode Reward:  -9948
xxxxx
x   x
x   x
x g.x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2167.05135673    26.73544252 -2450.98110439   240.60751196]
------
Step:1, Action:North
State  261
Old Q Values:  [ 2167.05135673    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 6454.6012736     26.73544252 -2450.98110439   240.60751196]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1056.58542923  2520.54460063 18607.9357697   1554.80203889]
------
Step:2, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243 16706.67422296     0.        ]
New Q values:  [    0.         -5536.05678243 18674.10984355     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.63507675e+03  3.99534672e+04  2.17939995e+03]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  3.99534672e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  1.63507675e+03  1.97085212e+04  2.17939995e+03]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4823.51213858  3220.66258272   790.72804752 12405.78121886]
------
Step:4, Action:West
State  208
Old Q Values:  [36458.83000531  5882.61027182 -4584.50430574  -135.02716446]
New Q values:  [36458.83000531  5882.61027182 -4584.50430574   304.77863761]
Reward: -10001  Episode Reward:  -9974
xxxxx
x...x
x g x
x . x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 6725.73569572  403.77554135 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 1809.83194054  238.35800069    0.        ]
New Q values:  [221.30610858 819.92939771 238.35800069   0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  300.39639271   301.98873832 -2373.61630148   262.76946019]
------
Step:2, Action:South
State  181
Old Q Values:  [  300.39639271   301.98873832 -2373.61630148   262.76946019]
New Q values:  [  300.39639271  2062.57587741 -2373.61630148   262.76946019]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x ..x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 6454.6012736     26.73544252 -2450.98110439   240.60751196]
------
Step:3, Action:North
State  261
Old Q Values:  [ 6454.6012736     26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 3200.01327266    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  17
xxxxx
x g.x
xa..x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  300.39639271  2062.57587741 -2373.61630148   262.76946019]
------
Step:4, Action:South
State  181
Old Q Values:  [  300.39639271  2062.57587741 -2373.61630148   262.76946019]
New Q values:  [  300.39639271  1784.43433276 -2373.61630148   262.76946019]
Reward: -1  Episode Reward:  16
xxxxx
x ..x
x g.x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3200.01327266    26.73544252 -2450.98110439   240.60751196]
------
Step:5, Action:North
State  260
Old Q Values:  [ 6472.5428984  -5704.51612281  6935.14926788 -5679.36893145]
New Q values:  [-2395.34685168 -5704.51612281  6935.14926788 -5679.36893145]
Reward: -10001  Episode Reward:  -9985
xxxxx
x ..x
xg..x
x . x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 14748.65177069  -222.69561341   767.35890262]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831 14748.65177069  -222.69561341   767.35890262]
New Q values:  [-5922.26708831 15099.09541511  -222.69561341   767.35890262]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 15114.30681746 30647.44902278]
------
Step:2, Action:West
State  273
Old Q Values:  [ 1232.03390306  1113.04903879 15114.30681746 30647.44902278]
New Q values:  [ 1232.03390306  1113.04903879 15114.30681746 13224.38359091]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3200.01327266    26.73544252 -2450.98110439   240.60751196]
------
Step:3, Action:North
State  261
Old Q Values:  [ 3200.01327266    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 6867.78603997    26.73544252 -2450.98110439   240.60751196]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1056.58542923  2520.54460063 18607.9357697   1554.80203889]
------
Step:4, Action:East
State  181
Old Q Values:  [  300.39639271  1784.43433276 -2373.61630148   262.76946019]
New Q values:  [ 300.39639271 1784.43433276 3579.68210394  262.76946019]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 15099.09541511  -222.69561341   767.35890262]
------
Step:5, Action:South
State  195
Old Q Values:  [   38.85388605 27109.35635215 19015.79464603  1169.39963074]
New Q values:  [   38.85388605 15377.4345861  19015.79464603  1169.39963074]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 15114.30681746 13224.38359091]
------
Step:6, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  7433.49872498 73653.86178686]
New Q values:  [ 3514.02111757 -8521.23367799  7433.49872498 31521.28052674]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 6867.78603997    26.73544252 -2450.98110439   240.60751196]
------
Step:7, Action:North
State  261
Old Q Values:  [ 6867.78603997    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 3820.41904717    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  23
xxxxx
x...x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 300.39639271 1784.43433276 3579.68210394  262.76946019]
------
Step:8, Action:South
State  181
Old Q Values:  [ 300.39639271 1784.43433276 3579.68210394  262.76946019]
New Q values:  [ 300.39639271 1859.29944726 3579.68210394  262.76946019]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3820.41904717    26.73544252 -2450.98110439   240.60751196]
------
Step:9, Action:North
State  261
Old Q Values:  [ 3820.41904717    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 2601.47225005    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  21
xxxxx
x...x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 300.39639271 1859.29944726 3579.68210394  262.76946019]
------
Step:10, Action:South
State  181
Old Q Values:  [ 300.39639271 1859.29944726 3579.68210394  262.76946019]
New Q values:  [ 300.39639271 1523.56145392 3579.68210394  262.76946019]
Reward: -1  Episode Reward:  20
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2601.47225005    26.73544252 -2450.98110439   240.60751196]
------
Step:11, Action:North
State  261
Old Q Values:  [ 2601.47225005    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 2113.8935312     26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  19
xxxxx
x...x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 300.39639271 1523.56145392 3579.68210394  262.76946019]
------
Step:12, Action:South
State  177
Old Q Values:  [88883.84400489 29124.7048717  27912.76462132     0.        ]
New Q values:  [88883.84400489 12283.45000804 27912.76462132     0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x.g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2113.8935312     26.73544252 -2450.98110439   240.60751196]
------
Step:13, Action:North
State  261
Old Q Values:  [ 2113.8935312     26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 1918.86204366    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 300.39639271 1523.56145392 3579.68210394  262.76946019]
------
Step:14, Action:South
State  177
Old Q Values:  [88883.84400489 12283.45000804 27912.76462132     0.        ]
New Q values:  [88883.84400489  5488.43861631 27912.76462132     0.        ]
Reward: -1  Episode Reward:  16
xxxxx
x.g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1918.86204366    26.73544252 -2450.98110439   240.60751196]
------
Step:15, Action:North
State  261
Old Q Values:  [ 1918.86204366    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 2.74320980e+04  2.67354425e+01 -2.45098110e+03  2.40607512e+02]
Reward: -1  Episode Reward:  15
xxxxx
x..gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[88883.84400489  5488.43861631 27912.76462132     0.        ]
------
Step:16, Action:North
State  181
Old Q Values:  [ 300.39639271 1523.56145392 3579.68210394  262.76946019]
New Q values:  [ 371.5373764  1523.56145392 3579.68210394  262.76946019]
Reward: 9  Episode Reward:  24
xxxxx
xa..x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[221.30610858 819.92939771 238.35800069   0.        ]
------
Step:17, Action:South
State  103
Old Q Values:  [221.30610858 819.92939771 238.35800069   0.        ]
New Q values:  [ 221.30610858 1401.27639027  238.35800069    0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 371.5373764  1523.56145392 3579.68210394  262.76946019]
------
Step:18, Action:South
State  181
Old Q Values:  [ 371.5373764  1523.56145392 3579.68210394  262.76946019]
New Q values:  [ 371.5373764  8838.45398725 3579.68210394  262.76946019]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2.74320980e+04  2.67354425e+01 -2.45098110e+03  2.40607512e+02]
------
Step:19, Action:North
State  261
Old Q Values:  [ 2.74320980e+04  2.67354425e+01 -2.45098110e+03  2.40607512e+02]
New Q values:  [13623.77540375    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  21
xxxxx
x .gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 371.5373764  8838.45398725 3579.68210394  262.76946019]
------
Step:20, Action:South
State  181
Old Q Values:  [ 371.5373764  8838.45398725 3579.68210394  262.76946019]
New Q values:  [ 371.5373764  7621.91421602 3579.68210394  262.76946019]
Reward: -1  Episode Reward:  20
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[13623.77540375    26.73544252 -2450.98110439   240.60751196]
------
Step:21, Action:North
State  261
Old Q Values:  [13623.77540375    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 7735.48442631    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  19
xxxxx
x .gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 371.5373764  7621.91421602 3579.68210394  262.76946019]
------
Step:22, Action:South
State  181
Old Q Values:  [ 371.5373764  7621.91421602 3579.68210394  262.76946019]
New Q values:  [ 371.5373764  5368.8110143  3579.68210394  262.76946019]
Reward: -1  Episode Reward:  18
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 7735.48442631    26.73544252 -2450.98110439   240.60751196]
------
Step:23, Action:North
State  261
Old Q Values:  [ 7735.48442631    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 4704.23707481    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  17
xxxxx
x .gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 371.5373764  5368.8110143  3579.68210394  262.76946019]
------
Step:24, Action:South
State  181
Old Q Values:  [ 371.5373764  5368.8110143  3579.68210394  262.76946019]
New Q values:  [ 371.5373764  3558.19552816 3579.68210394  262.76946019]
Reward: -1  Episode Reward:  16
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4704.23707481    26.73544252 -2450.98110439   240.60751196]
------
Step:25, Action:North
State  261
Old Q Values:  [ 4704.23707481    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 2954.99946111    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 371.5373764  3558.19552816 3579.68210394  262.76946019]
------
Step:26, Action:South
State  183
Old Q Values:  [ 1056.58542923  2520.54460063 18607.9357697   1554.80203889]
New Q values:  [ 1056.58542923  1894.11767858 18607.9357697   1554.80203889]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2954.99946111    26.73544252 -2450.98110439   240.60751196]
------
Step:27, Action:North
State  261
Old Q Values:  [ 2954.99946111    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 2255.30441562    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 371.5373764  3558.19552816 3579.68210394  262.76946019]
------
Step:28, Action:South
State  181
Old Q Values:  [ 371.5373764  3558.19552816 3579.68210394  262.76946019]
New Q values:  [ 371.5373764  2099.26953595 3579.68210394  262.76946019]
Reward: -1  Episode Reward:  12
xxxxx
x g.x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2255.30441562    26.73544252 -2450.98110439   240.60751196]
------
Step:29, Action:North
State  261
Old Q Values:  [ 2255.30441562    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 1975.42639743    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  11
xxxxx
x ..x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 371.5373764  2099.26953595 3579.68210394  262.76946019]
------
Step:30, Action:South
State  180
Old Q Values:  [-5631.58166229  3245.43755543  3387.45329652 -4966.32149798]
New Q values:  [-5631.58166229  3378.11980254  3387.45329652 -4966.32149798]
Reward: -1  Episode Reward:  10
xxxxx
x ..x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2395.34685168 -5704.51612281  6935.14926788 -5679.36893145]
------
Step:31, Action:East
State  260
Old Q Values:  [-2395.34685168 -5704.51612281  6935.14926788 -5679.36893145]
New Q values:  [-2395.34685168 -5704.51612281  4423.21104241 -5679.36893145]
Reward: -1  Episode Reward:  9
xxxxx
x ..x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 5499.17111752 -5807.06396197  4165.20151233  4743.78042822]
------
Step:32, Action:North
State  276
Old Q Values:  [ 5499.17111752 -5807.06396197  4165.20151233  4743.78042822]
New Q values:  [ 8558.4579504  -5807.06396197  4165.20151233  4743.78042822]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.11979650e+04 1.07596742e+04 2.45392999e+03]
------
Step:33, Action:South
State  194
Old Q Values:  [-6.00000000e-01  1.63507675e+03  1.97085212e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  3.22096808e+03  1.97085212e+04  2.17939995e+03]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 8558.4579504  -5807.06396197  4165.20151233  4743.78042822]
------
Step:34, Action:North
State  276
Old Q Values:  [ 8558.4579504  -5807.06396197  4165.20151233  4743.78042822]
New Q values:  [ 9782.17268356 -5807.06396197  4165.20151233  4743.78042822]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.11979650e+04 1.07596742e+04 2.45392999e+03]
------
Step:35, Action:South
State  194
Old Q Values:  [-6.00000000e-01  3.22096808e+03  1.97085212e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  4.22243904e+03  1.97085212e+04  2.17939995e+03]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 9782.17268356 -5807.06396197  4165.20151233  4743.78042822]
------
Step:36, Action:North
State  276
Old Q Values:  [ 9782.17268356 -5807.06396197  4165.20151233  4743.78042822]
New Q values:  [ 9824.82544487 -5807.06396197  4165.20151233  4743.78042822]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  4.22243904e+03  1.97085212e+04  2.17939995e+03]
------
Step:37, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.11979650e+04 1.07596742e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 2.11979650e+04 1.52409187e+04 2.45392999e+03]
Reward: -1  Episode Reward:  3
xxxxx
x ..x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36458.83000531  5882.61027182 -4584.50430574   304.77863761]
------
Step:38, Action:North
State  216
Old Q Values:  [  629.27114661  6996.21455978 -8220.10378799   911.8961227 ]
New Q values:  [ 1334.56392879  6996.21455978 -8220.10378799   911.8961227 ]
Reward: 9  Episode Reward:  12
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  3591.5182338    660.86649319   365.97736434]
------
Step:39, Action:South
State  128
Old Q Values:  [27558.53877122 12144.48195471 -8652.84       70912.97607779]
New Q values:  [27558.53877122 15794.84178348 -8652.84       70912.97607779]
Reward: -1  Episode Reward:  11
xxxxx
x .gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36458.83000531  5882.61027182 -4584.50430574   304.77863761]
------
Step:40, Action:South
State  208
Old Q Values:  [36458.83000531  5882.61027182 -4584.50430574   304.77863761]
New Q values:  [36458.83000531 10851.19153526 -4584.50430574   304.77863761]
Reward: 9  Episode Reward:  20
xxxxx
x g x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5824.34931465 -6396.61506955 -5588.09647059 28309.15808843]
------
Step:41, Action:West
State  288
Old Q Values:  [ 5824.34931465 -6396.61506955 -5588.09647059 28309.15808843]
New Q values:  [ 5824.34931465 -6396.61506955 -5588.09647059 20779.44739339]
Reward: -1  Episode Reward:  19
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  7433.49872498 31521.28052674]
------
Step:42, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  7433.49872498 31521.28052674]
New Q values:  [ 3514.02111757 -8521.23367799  7433.49872498 13934.87552342]
Reward: -1  Episode Reward:  18
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2395.34685168 -5704.51612281  4423.21104241 -5679.36893145]
------
Step:43, Action:East
State  260
Old Q Values:  [-2395.34685168 -5704.51612281  4423.21104241 -5679.36893145]
New Q values:  [-2395.34685168 -5704.51612281  5949.14707399 -5679.36893145]
Reward: -1  Episode Reward:  17
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  7433.49872498 13934.87552342]
------
Step:44, Action:West
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  7433.49872498 13934.87552342]
New Q values:  [ 3514.02111757 -8521.23367799  7433.49872498  7358.09433156]
Reward: -1  Episode Reward:  16
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2395.34685168 -5704.51612281  5949.14707399 -5679.36893145]
------
Step:45, Action:East
State  260
Old Q Values:  [-2395.34685168 -5704.51612281  5949.14707399 -5679.36893145]
New Q values:  [-2395.34685168 -5704.51612281  4609.10844709 -5679.36893145]
Reward: -1  Episode Reward:  15
xxxxx
x . x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  7433.49872498  7358.09433156]
------
Step:46, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  7433.49872498  7358.09433156]
New Q values:  [ 3514.02111757 -8521.23367799  9206.63370801  7358.09433156]
Reward: -1  Episode Reward:  14
xxxxx
x . x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5824.34931465 -6396.61506955 -5588.09647059 20779.44739339]
------
Step:47, Action:West
State  288
Old Q Values:  [ 5824.34931465 -6396.61506955 -5588.09647059 20779.44739339]
New Q values:  [ 5824.34931465 -6396.61506955 -5588.09647059 11073.16906976]
Reward: -1  Episode Reward:  13
xxxxx
x . x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  9206.63370801  7358.09433156]
------
Step:48, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  9206.63370801  7358.09433156]
New Q values:  [ 3514.02111757 -8521.23367799  7004.00420413  7358.09433156]
Reward: -1  Episode Reward:  12
xxxxx
x . x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5824.34931465 -6396.61506955 -5588.09647059 11073.16906976]
------
Step:49, Action:West
State  288
Old Q Values:  [ 5824.34931465 -6396.61506955 -5588.09647059 11073.16906976]
New Q values:  [ 5824.34931465 -6396.61506955 -5588.09647059  6636.09592737]
Reward: -1  Episode Reward:  11
xxxxx
x . x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  7004.00420413  7358.09433156]
------
Step:50, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  7004.00420413  7358.09433156]
New Q values:  [ 3514.02111757 -8521.23367799  4791.83045986  7358.09433156]
Reward: -1  Episode Reward:  10
xxxxx
x . x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5824.34931465 -6396.61506955 -5588.09647059  6636.09592737]
------
Step:51, Action:West
State  288
Old Q Values:  [ 5824.34931465 -6396.61506955 -5588.09647059  6636.09592737]
New Q values:  [ 5824.34931465 -6396.61506955 -5588.09647059  4861.26667042]
Reward: -1  Episode Reward:  9
xxxxx
x . x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3514.02111757 -8521.23367799  4791.83045986  7358.09433156]
------
Step:52, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  4791.83045986  7358.09433156]
New Q values:  [ 3514.02111757 -8521.23367799  3663.43697834  7358.09433156]
Reward: -1  Episode Reward:  8
xxxxx
x . x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5824.34931465 -6396.61506955 -5588.09647059  4861.26667042]
------
Step:53, Action:North
State  288
Old Q Values:  [ 5824.34931465 -6396.61506955 -5588.09647059  4861.26667042]
New Q values:  [13266.78872745 -6396.61506955 -5588.09647059  4861.26667042]
Reward: -1  Episode Reward:  7
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36458.83000531 10851.19153526 -4584.50430574   304.77863761]
------
Step:54, Action:North
State  208
Old Q Values:  [36458.83000531 10851.19153526 -4584.50430574   304.77863761]
New Q values:  [37002.00578446 10851.19153526 -4584.50430574   304.77863761]
Reward: -1  Episode Reward:  6
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[35541.05618371 27549.5211885   -180.00807518 74730.24594111]
------
Step:55, Action:West
State  130
Old Q Values:  [35541.05618371 27549.5211885   -180.00807518 74730.24594111]
New Q values:  [ 35541.05618371  27549.5211885    -180.00807518 127621.08689832]
Reward: 100009  Episode Reward:  100015
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 371.5373764  2099.26953595 3579.68210394  262.76946019]
------
Step:1, Action:South
State  183
Old Q Values:  [ 1056.58542923  1894.11767858 18607.9357697   1554.80203889]
New Q values:  [ 1056.58542923  1355.67499066 18607.9357697   1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1975.42639743    26.73544252 -2450.98110439   240.60751196]
------
Step:2, Action:North
State  261
Old Q Values:  [ 1975.42639743    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 1863.47519015    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  8
xxxxx
x...x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 371.5373764  2099.26953595 3579.68210394  262.76946019]
------
Step:3, Action:South
State  183
Old Q Values:  [ 1056.58542923  1355.67499066 18607.9357697   1554.80203889]
New Q values:  [ 1056.58542923  1100.71255331 18607.9357697   1554.80203889]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1863.47519015    26.73544252 -2450.98110439   240.60751196]
------
Step:4, Action:North
State  261
Old Q Values:  [ 1863.47519015    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 1818.69470724    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  6
xxxxx
x...x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 371.5373764  2099.26953595 3579.68210394  262.76946019]
------
Step:5, Action:South
State  181
Old Q Values:  [ 371.5373764  2099.26953595 3579.68210394  262.76946019]
New Q values:  [ 371.5373764  1384.71622655 3579.68210394  262.76946019]
Reward: -1  Episode Reward:  5
xxxxx
x.g.x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1818.69470724    26.73544252 -2450.98110439   240.60751196]
------
Step:6, Action:North
State  260
Old Q Values:  [-2395.34685168 -5704.51612281  4609.10844709 -5679.36893145]
New Q values:  [   57.49724828 -5704.51612281  4609.10844709 -5679.36893145]
Reward: -1  Episode Reward:  4
xxxxx
xg..x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[-5631.58166229  3378.11980254  3387.45329652 -4966.32149798]
------
Step:7, Action:East
State  180
Old Q Values:  [-5631.58166229  3378.11980254  3387.45329652 -4966.32149798]
New Q values:  [-5631.58166229  3378.11980254  7713.770822   -4966.32149798]
Reward: -1  Episode Reward:  3
xxxxx
x...x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.11979650e+04 1.52409187e+04 2.45392999e+03]
------
Step:8, Action:South
State  194
Old Q Values:  [-6.00000000e-01  4.22243904e+03  1.97085212e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  4.64182325e+03  1.97085212e+04  2.17939995e+03]
Reward: 9  Episode Reward:  12
xxxxx
x...x
x  .x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 9824.82544487 -5807.06396197  4165.20151233  4743.78042822]
------
Step:9, Action:North
State  276
Old Q Values:  [ 9824.82544487 -5807.06396197  4165.20151233  4743.78042822]
New Q values:  [ 9841.88654939 -5807.06396197  4165.20151233  4743.78042822]
Reward: -1  Episode Reward:  11
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  4.64182325e+03  1.97085212e+04  2.17939995e+03]
------
Step:10, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.11979650e+04 1.52409187e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 2.11979650e+04 1.72023692e+04 2.45392999e+03]
Reward: 9  Episode Reward:  20
xxxxx
x...x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[37002.00578446 10851.19153526 -4584.50430574   304.77863761]
------
Step:11, Action:North
State  208
Old Q Values:  [37002.00578446 10851.19153526 -4584.50430574   304.77863761]
New Q values:  [36080.09513712 10851.19153526 -4584.50430574   304.77863761]
Reward: 9  Episode Reward:  29
xxxxx
x.gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[27558.53877122 15794.84178348 -8652.84       70912.97607779]
------
Step:12, Action:North
State  128
Old Q Values:  [27558.53877122 15794.84178348 -8652.84       70912.97607779]
New Q values:  [32116.70833183 15794.84178348 -8652.84       70912.97607779]
Reward: -301  Episode Reward:  -272
xxxxx
xg.ax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[32116.70833183 15794.84178348 -8652.84       70912.97607779]
------
Step:13, Action:West
State  130
Old Q Values:  [ 35541.05618371  27549.5211885    -180.00807518 127621.08689832]
New Q values:  [35541.05618371 27549.5211885   -180.00807518 88777.4232812 ]
Reward: 9  Episode Reward:  -263
xxxxx
x.a x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 125745.29507292]
------
Step:14, Action:West
State  126
Old Q Values:  [   0.          331.64678262 7128.26683804  646.18246453]
New Q values:  [   0.          331.64678262 7128.26683804  399.8615296 ]
Reward: 9  Episode Reward:  -254
xxxxx
xa  x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -2521.9025475    453.29514596  -180.6       ]
------
Step:15, Action:East
State  110
Old Q Values:  [ -239.29051573 -2521.9025475    453.29514596  -180.6       ]
New Q values:  [ -239.29051573 -2521.9025475   2319.1981098   -180.6       ]
Reward: -1  Episode Reward:  -255
xxxxx
x a x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 7128.26683804  399.8615296 ]
------
Step:16, Action:East
State  126
Old Q Values:  [   0.          331.64678262 7128.26683804  399.8615296 ]
New Q values:  [   0.          331.64678262 3033.14373335  399.8615296 ]
Reward: -1  Episode Reward:  -256
xxxxx
x  ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  6.08123327e+02]
------
Step:17, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  6.08123327e+02]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  1.15259245e+03]
Reward: -1  Episode Reward:  -257
xxxxx
x a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 3033.14373335  399.8615296 ]
------
Step:18, Action:East
State  126
Old Q Values:  [   0.          331.64678262 3033.14373335  399.8615296 ]
New Q values:  [   0.          331.64678262 1558.4352286   399.8615296 ]
Reward: -1  Episode Reward:  -258
xxxxx
x  ax
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  1.15259245e+03]
------
Step:19, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  1.15259245e+03]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  9.27967549e+02]
Reward: -1  Episode Reward:  -259
xxxxx
x a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 1558.4352286   399.8615296 ]
------
Step:20, Action:East
State  126
Old Q Values:  [   0.          331.64678262 1558.4352286   399.8615296 ]
New Q values:  [  0.         331.64678262 901.16435612 399.8615296 ]
Reward: -1  Episode Reward:  -260
xxxxx
x  ax
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  9.27967549e+02]
------
Step:21, Action:West
State  136
Old Q Values:  [-2129.37064562  3591.5182338    660.86649319   365.97736434]
New Q values:  [-2129.37064562  3591.5182338    660.86649319 -4785.50597998]
Reward: -10001  Episode Reward:  -10261
xxxxx
x g x
x   x
x  .x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1232.03390306  1113.04903879 15114.30681746 13224.38359091]
------
Step:1, Action:East
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  3663.43697834  7358.09433156]
New Q values:  [ 3514.02111757 -8521.23367799  5450.81140957  7358.09433156]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x...x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13266.78872745 -6396.61506955 -5588.09647059  4861.26667042]
------
Step:2, Action:North
State  288
Old Q Values:  [13266.78872745 -6396.61506955 -5588.09647059  4861.26667042]
New Q values:  [16136.14403212 -6396.61506955 -5588.09647059  4861.26667042]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36080.09513712 10851.19153526 -4584.50430574   304.77863761]
------
Step:3, Action:North
State  208
Old Q Values:  [36080.09513712 10851.19153526 -4584.50430574   304.77863761]
New Q values:  [15508.89352499 10851.19153526 -4584.50430574   304.77863761]
Reward: -1  Episode Reward:  17
xxxxx
xg.ax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  3591.5182338    660.86649319 -4785.50597998]
------
Step:4, Action:South
State  136
Old Q Values:  [-2129.37064562  3591.5182338    660.86649319 -4785.50597998]
New Q values:  [-2129.37064562  6088.67535102   660.86649319 -4785.50597998]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[15508.89352499 10851.19153526 -4584.50430574   304.77863761]
------
Step:5, Action:North
State  208
Old Q Values:  [15508.89352499 10851.19153526 -4584.50430574   304.77863761]
New Q values:  [32836.18439436 10851.19153526 -4584.50430574   304.77863761]
Reward: -1  Episode Reward:  15
xxxxx
x..ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[35541.05618371 27549.5211885   -180.00807518 88777.4232812 ]
------
Step:6, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  9.27967549e+02]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.79795611e+02]
Reward: 9  Episode Reward:  24
xxxxx
x.a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   129.67618753   677.36197232]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753   677.36197232]
New Q values:  [ -281.736      -1150.91067548   129.67618753   803.33187312]
Reward: 9  Episode Reward:  33
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494  1756.62361399  -180.6       ]
------
Step:8, Action:East
State  106
Old Q Values:  [ -180.6        -7710.46911494  1756.62361399  -180.6       ]
New Q values:  [ -180.6        -7710.46911494   943.04900753  -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x a x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   129.67618753   803.33187312]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753   803.33187312]
New Q values:  [ -281.736      -1150.91067548   129.67618753   603.64745151]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -7710.46911494   943.04900753  -180.6       ]
------
Step:10, Action:East
State  107
Old Q Values:  [-252.35169558  794.85326086  790.4386324  -252.78192178]
New Q values:  [-252.35169558  794.85326086  496.66968841 -252.78192178]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   129.67618753   603.64745151]
------
Step:11, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753   603.64745151]
New Q values:  [ -281.736      -1150.91067548   129.67618753   479.31495886]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  794.85326086  496.66968841 -252.78192178]
------
Step:12, Action:South
State  106
Old Q Values:  [ -180.6        -7710.46911494   943.04900753  -180.6       ]
New Q values:  [ -180.6        -6764.65639938   943.04900753  -180.6       ]
Reward: -9991  Episode Reward:  -9962
xxxxx
x   x
xg. x
x.  x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.11979650e+04 1.72023692e+04 2.45392999e+03]
------
Step:1, Action:South
State  196
Old Q Values:  [-2469.90645144 11024.59842256  6570.2578975    231.67262594]
New Q values:  [-2469.90645144  7367.80533384  6570.2578975    231.67262594]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 9841.88654939 -5807.06396197  4165.20151233  4743.78042822]
------
Step:2, Action:North
State  273
Old Q Values:  [ 1232.03390306  1113.04903879 15114.30681746 13224.38359091]
New Q values:  [ 5021.94218576  1113.04903879 15114.30681746 13224.38359091]
Reward: -1  Episode Reward:  8
xxxxx
x .gx
x.a.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 15099.09541511  -222.69561341   767.35890262]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144  7367.80533384  6570.2578975    231.67262594]
New Q values:  [-2469.90645144  5899.08809835  6570.2578975    231.67262594]
Reward: -1  Episode Reward:  7
xxxxx
x g.x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 9841.88654939 -5807.06396197  4165.20151233  4743.78042822]
------
Step:4, Action:North
State  272
Old Q Values:  [ 3514.02111757 -8521.23367799  5450.81140957  7358.09433156]
New Q values:  [ 1764.39795042 -8521.23367799  5450.81140957  7358.09433156]
Reward: -10001  Episode Reward:  -9994
xxxxx
x ..x
x.g.x
x. .x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1818.69470724    26.73544252 -2450.98110439   240.60751196]
------
Step:1, Action:North
State  261
Old Q Values:  [ 1818.69470724    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 1806.78251408    26.73544252 -2450.98110439   240.60751196]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 371.5373764  1384.71622655 3579.68210394  262.76946019]
------
Step:2, Action:East
State  181
Old Q Values:  [ 371.5373764  1384.71622655 3579.68210394  262.76946019]
New Q values:  [ 371.5373764  1384.71622655 5967.00146611  262.76946019]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 15099.09541511  -222.69561341   767.35890262]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831 15099.09541511  -222.69561341   767.35890262]
New Q values:  [-5922.26708831 10579.33021128  -222.69561341   767.35890262]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 5021.94218576  1113.04903879 15114.30681746 13224.38359091]
------
Step:4, Action:East
State  272
Old Q Values:  [ 1764.39795042 -8521.23367799  5450.81140957  7358.09433156]
New Q values:  [ 1764.39795042 -8521.23367799  7026.56777346  7358.09433156]
Reward: 9  Episode Reward:  36
xxxxx
x.. x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16136.14403212 -6396.61506955 -5588.09647059  4861.26667042]
------
Step:5, Action:North
State  288
Old Q Values:  [16136.14403212 -6396.61506955 -5588.09647059  4861.26667042]
New Q values:  [16310.71293115 -6396.61506955 -5588.09647059  4861.26667042]
Reward: 9  Episode Reward:  45
xxxxx
x.g x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[32836.18439436 10851.19153526 -4584.50430574   304.77863761]
------
Step:6, Action:North
State  208
Old Q Values:  [32836.18439436 10851.19153526 -4584.50430574   304.77863761]
New Q values:  [34407.76658108 10851.19153526 -4584.50430574   304.77863761]
Reward: -1  Episode Reward:  44
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[32116.70833183 15794.84178348 -8652.84       70912.97607779]
------
Step:7, Action:West
State  130
Old Q Values:  [35541.05618371 27549.5211885   -180.00807518 88777.4232812 ]
New Q values:  [35541.05618371 27549.5211885   -180.00807518 73239.95783436]
Reward: 9  Episode Reward:  53
xxxxx
x.a x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 125745.29507292]
------
Step:8, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   32608.63878402 125745.29507292]
New Q values:  [  -180.6          3557.6642036   32608.63878402 127711.58907484]
Reward: 100009  Episode Reward:  100062
xxxxx
xa  x
x   x
xg  x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.79795611e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.79795611e+02]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.87467822e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   611.86548126  1167.16525812]
------
Step:2, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   611.86548126  1167.16525812]
New Q values:  [ -253.44886264 -1902.20915811   611.86548126  2489.98681196]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 6725.73569572  403.77554135 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 6725.73569572  403.77554135 -120.29354603]
New Q values:  [-177.44732869 4485.79471812  403.77554135 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 371.5373764  1384.71622655 5967.00146611  262.76946019]
------
Step:4, Action:East
State  183
Old Q Values:  [ 1056.58542923  1100.71255331 18607.9357697   1554.80203889]
New Q values:  [ 1056.58542923  1100.71255331 10169.02231929  1554.80203889]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 2.47391199e+03 2.05741271e+03 9.06816004e+03]
------
Step:5, Action:West
State  201
Old Q Values:  [ 613.33320563 4708.31908668 -789.02220255 2273.29923944]
New Q values:  [ 613.33320563 4708.31908668 -789.02220255 1598.29363255]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 189.19059642    0.         2298.57978925 -178.98      ]
------
Step:6, Action:East
State  181
Old Q Values:  [ 371.5373764  1384.71622655 5967.00146611  262.76946019]
New Q values:  [  371.5373764   1384.71622655 -2029.19189422   262.76946019]
Reward: -10001  Episode Reward:  -9966
xxxxx
x   x
x g.x
x.. x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1806.78251408    26.73544252 -2450.98110439   240.60751196]
------
Step:1, Action:North
State  261
Old Q Values:  [ 1806.78251408    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 1143.5278736     26.73544252 -2450.98110439   240.60751196]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  371.5373764   1384.71622655 -2029.19189422   262.76946019]
------
Step:2, Action:South
State  181
Old Q Values:  [  371.5373764   1384.71622655 -2029.19189422   262.76946019]
New Q values:  [  371.5373764    896.3448527  -2029.19189422   262.76946019]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1143.5278736     26.73544252 -2450.98110439   240.60751196]
------
Step:3, Action:North
State  261
Old Q Values:  [ 1143.5278736     26.73544252 -2450.98110439   240.60751196]
New Q values:  [  725.71460525    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  371.5373764    896.3448527  -2029.19189422   262.76946019]
------
Step:4, Action:South
State  181
Old Q Values:  [  371.5373764    896.3448527  -2029.19189422   262.76946019]
New Q values:  [  371.5373764    575.65232266 -2029.19189422   262.76946019]
Reward: -1  Episode Reward:  6
xxxxx
x..gx
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  725.71460525    26.73544252 -2450.98110439   240.60751196]
------
Step:5, Action:North
State  261
Old Q Values:  [  725.71460525    26.73544252 -2450.98110439   240.60751196]
New Q values:  [  462.3815389     26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  371.5373764    575.65232266 -2029.19189422   262.76946019]
------
Step:6, Action:South
State  181
Old Q Values:  [  371.5373764    575.65232266 -2029.19189422   262.76946019]
New Q values:  [  371.5373764    368.37539073 -2029.19189422   262.76946019]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  462.3815389     26.73544252 -2450.98110439   240.60751196]
------
Step:7, Action:North
State  261
Old Q Values:  [  462.3815389     26.73544252 -2450.98110439   240.60751196]
New Q values:  [  295.81382848    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  3
xxxxx
x.. x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  371.5373764    368.37539073 -2029.19189422   262.76946019]
------
Step:8, Action:North
State  181
Old Q Values:  [  371.5373764    368.37539073 -2029.19189422   262.76946019]
New Q values:  [ 1499.753366     368.37539073 -2029.19189422   262.76946019]
Reward: 9  Episode Reward:  12
xxxxx
xa. x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4485.79471812  403.77554135 -120.29354603]
------
Step:9, Action:South
State  111
Old Q Values:  [-177.44732869 4485.79471812  403.77554135 -120.29354603]
New Q values:  [-177.44732869 2243.64389705  403.77554135 -120.29354603]
Reward: -1  Episode Reward:  11
xxxxx
x . x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1499.753366     368.37539073 -2029.19189422   262.76946019]
------
Step:10, Action:North
State  183
Old Q Values:  [ 1056.58542923  1100.71255331 10169.02231929  1554.80203889]
New Q values:  [ 1095.12734081  1100.71255331 10169.02231929  1554.80203889]
Reward: -1  Episode Reward:  10
xxxxx
xa. x
x ..x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2243.64389705  403.77554135 -120.29354603]
------
Step:11, Action:South
State  111
Old Q Values:  [-177.44732869 2243.64389705  403.77554135 -120.29354603]
New Q values:  [-177.44732869 3947.56425461  403.77554135 -120.29354603]
Reward: -1  Episode Reward:  9
xxxxx
x . x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1095.12734081  1100.71255331 10169.02231929  1554.80203889]
------
Step:12, Action:East
State  181
Old Q Values:  [ 1499.753366     368.37539073 -2029.19189422   262.76946019]
New Q values:  [1499.753366    368.37539073 -446.88725429  262.76946019]
Reward: -9991  Episode Reward:  -9982
xxxxx
x . x
x g.x
x ..x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 5021.94218576  1113.04903879 15114.30681746 13224.38359091]
------
Step:1, Action:West
State  273
Old Q Values:  [ 5021.94218576  1113.04903879 15114.30681746 13224.38359091]
New Q values:  [ 5021.94218576  1113.04903879 15114.30681746  5383.89758491]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  295.81382848    26.73544252 -2450.98110439   240.60751196]
------
Step:2, Action:North
State  261
Old Q Values:  [  295.81382848    26.73544252 -2450.98110439   240.60751196]
New Q values:  [  573.65154119    26.73544252 -2450.98110439   240.60751196]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1499.753366    368.37539073 -446.88725429  262.76946019]
------
Step:3, Action:North
State  181
Old Q Values:  [1499.753366    368.37539073 -446.88725429  262.76946019]
New Q values:  [1025.68426348  368.37539073 -446.88725429  262.76946019]
Reward: 9  Episode Reward:  27
xxxxx
xa..x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1401.27639027  238.35800069    0.        ]
------
Step:4, Action:South
State  109
Old Q Values:  [ -241.10880094   421.51460053 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   475.71111925 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  26
xxxxx
x .gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1025.68426348  368.37539073 -446.88725429  262.76946019]
------
Step:5, Action:North
State  181
Old Q Values:  [1025.68426348  368.37539073 -446.88725429  262.76946019]
New Q values:  [ 552.38704117  368.37539073 -446.88725429  262.76946019]
Reward: -1  Episode Reward:  25
xxxxx
xag.x
x ..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   475.71111925 -2165.66138672  -180.6       ]
------
Step:6, Action:South
State  103
Old Q Values:  [ 221.30610858 1401.27639027  238.35800069    0.        ]
New Q values:  [221.30610858 725.62666846 238.35800069   0.        ]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 552.38704117  368.37539073 -446.88725429  262.76946019]
------
Step:7, Action:North
State  183
Old Q Values:  [ 1095.12734081  1100.71255331 10169.02231929  1554.80203889]
New Q values:  [  655.13893686  1100.71255331 10169.02231929  1554.80203889]
Reward: -1  Episode Reward:  23
xxxxx
xa..x
x ..x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[221.30610858 725.62666846 238.35800069   0.        ]
------
Step:8, Action:South
State  102
Old Q Values:  [-180.6        1942.41406952 2846.56389321 -180.6       ]
New Q values:  [-180.6        6378.59858087 2846.56389321 -180.6       ]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
xa..x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243 18674.10984355     0.        ]
------
Step:9, Action:East
State  183
Old Q Values:  [  655.13893686  1100.71255331 10169.02231929  1554.80203889]
New Q values:  [ 655.13893686 1100.71255331 6235.92743307 1554.80203889]
Reward: 9  Episode Reward:  31
xxxxx
x ..x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  7.20972835e+03  0.00000000e+00]
------
Step:10, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.11979650e+04 1.72023692e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 2.11979650e+04 1.72086777e+04 2.45392999e+03]
Reward: 9  Episode Reward:  40
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[34407.76658108 10851.19153526 -4584.50430574   304.77863761]
------
Step:11, Action:North
State  210
Old Q Values:  [ 4823.51213858  3220.66258272   790.72804752 12405.78121886]
New Q values:  [23906.79220574  3220.66258272   790.72804752 12405.78121886]
Reward: 9  Episode Reward:  49
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[35541.05618371 27549.5211885   -180.00807518 73239.95783436]
------
Step:12, Action:West
State  130
Old Q Values:  [35541.05618371 27549.5211885   -180.00807518 73239.95783436]
New Q values:  [ 35541.05618371  27549.5211885    -180.00807518 127614.85985619]
Reward: 100009  Episode Reward:  100058
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16310.71293115 -6396.61506955 -5588.09647059  4861.26667042]
------
Step:1, Action:North
State  288
Old Q Values:  [16310.71293115 -6396.61506955 -5588.09647059  4861.26667042]
New Q values:  [16852.01514679 -6396.61506955 -5588.09647059  4861.26667042]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[34407.76658108 10851.19153526 -4584.50430574   304.77863761]
------
Step:2, Action:North
State  208
Old Q Values:  [34407.76658108 10851.19153526 -4584.50430574   304.77863761]
New Q values:  [13944.74697901 10851.19153526 -4584.50430574   304.77863761]
Reward: 9  Episode Reward:  18
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.87467822e+02]
------
Step:3, Action:West
State  136
Old Q Values:  [-2129.37064562  6088.67535102   660.86649319 -4785.50597998]
New Q values:  [-2129.37064562  6088.67535102   660.86649319 -7532.54675726]
Reward: -9991  Episode Reward:  -9973
xxxxx
x g x
x.. x
x.. x
xxxxx
xxxxx
x.g.x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.11979650e+04 1.72086777e+04 2.45392999e+03]
------
Step:1, Action:South
State  196
Old Q Values:  [-2469.90645144  5899.08809835  6570.2578975    231.67262594]
New Q values:  [-2469.90645144  5317.60120416  6570.2578975    231.67262594]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 9841.88654939 -5807.06396197  4165.20151233  4743.78042822]
------
Step:2, Action:North
State  276
Old Q Values:  [ 9841.88654939 -5807.06396197  4165.20151233  4743.78042822]
New Q values:  [ 5907.231989   -5807.06396197  4165.20151233  4743.78042822]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
xga.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  5317.60120416  6570.2578975    231.67262594]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  4.64182325e+03  1.97085212e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  4.64182325e+03  1.50608462e+04  2.17939995e+03]
Reward: 9  Episode Reward:  17
xxxxx
x. .x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[23906.79220574  3220.66258272   790.72804752 12405.78121886]
------
Step:4, Action:North
State  210
Old Q Values:  [23906.79220574  3220.66258272   790.72804752 12405.78121886]
New Q values:  [ 9744.35722888  3220.66258272   790.72804752 12405.78121886]
Reward: 9  Episode Reward:  26
xxxxx
x. ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.87467822e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  5.87467822e+02]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  9.81383172e+02]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   611.86548126  2489.98681196]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753   479.31495886]
New Q values:  [ -281.736      -1150.91067548   129.67618753   435.5819618 ]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  794.85326086  496.66968841 -252.78192178]
------
Step:7, Action:South
State  107
Old Q Values:  [-252.35169558  794.85326086  496.66968841 -252.78192178]
New Q values:  [-252.35169558 1280.9041225   496.66968841 -252.78192178]
Reward: 9  Episode Reward:  43
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 320.07341842    0.         3191.87606053    0.        ]
------
Step:8, Action:East
State  187
Old Q Values:  [ 320.07341842    0.         3191.87606053    0.        ]
New Q values:  [ 320.07341842    0.         1824.39604684    0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:9, Action:East
State  203
Old Q Values:  [3.60604218e+00 2.47391199e+03 2.05741271e+03 9.06816004e+03]
New Q values:  [3.60604218e+00 2.47391199e+03 1.64895532e+03 9.06816004e+03]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 409.10562358 2755.30078316    0.         1540.00681929]
------
Step:10, Action:West
State  216
Old Q Values:  [ 1334.56392879  6996.21455978 -8220.10378799   911.8961227 ]
New Q values:  [ 1334.56392879  6996.21455978 -8220.10378799  1776.65417508]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 4708.31908668 -789.02220255 1598.29363255]
------
Step:11, Action:South
State  200
Old Q Values:  [ 169.9257398  5282.02506445 2064.67334366  568.38654082]
New Q values:  [ 169.9257398  4319.63832525 2064.67334366  568.38654082]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1764.39795042 -8521.23367799  7026.56777346  7358.09433156]
------
Step:12, Action:West
State  276
Old Q Values:  [ 5907.231989   -5807.06396197  4165.20151233  4743.78042822]
New Q values:  [ 5907.231989   -5807.06396197  4165.20151233  2075.00763365]
Reward: 9  Episode Reward:  48
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  573.65154119    26.73544252 -2450.98110439   240.60751196]
------
Step:13, Action:North
State  260
Old Q Values:  [   57.49724828 -5704.51612281  4609.10844709 -5679.36893145]
New Q values:  [ 1078.80516838 -5704.51612281  4609.10844709 -5679.36893145]
Reward: -1  Episode Reward:  47
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  3521.35423022  1485.98871489     0.        ]
------
Step:14, Action:South
State  189
Old Q Values:  [ 337.36081627 5310.23431876 1304.99403736  154.04646645]
New Q values:  [ 337.36081627 2295.58918986 1304.99403736  154.04646645]
Reward: -1  Episode Reward:  46
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  573.65154119    26.73544252 -2450.98110439   240.60751196]
------
Step:15, Action:North
State  261
Old Q Values:  [  573.65154119    26.73544252 -2450.98110439   240.60751196]
New Q values:  [  917.53737343    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  45
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 2295.58918986 1304.99403736  154.04646645]
------
Step:16, Action:South
State  189
Old Q Values:  [ 337.36081627 2295.58918986 1304.99403736  154.04646645]
New Q values:  [ 337.36081627 1192.89688797 1304.99403736  154.04646645]
Reward: -1  Episode Reward:  44
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  917.53737343    26.73544252 -2450.98110439   240.60751196]
------
Step:17, Action:North
State  261
Old Q Values:  [  917.53737343    26.73544252 -2450.98110439   240.60751196]
New Q values:  [  757.91316058    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1192.89688797 1304.99403736  154.04646645]
------
Step:18, Action:South
State  189
Old Q Values:  [ 337.36081627 1192.89688797 1304.99403736  154.04646645]
New Q values:  [ 337.36081627  703.93270336 1304.99403736  154.04646645]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  757.91316058    26.73544252 -2450.98110439   240.60751196]
------
Step:19, Action:North
State  261
Old Q Values:  [  757.91316058    26.73544252 -2450.98110439   240.60751196]
New Q values:  [  694.06347544    26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627  703.93270336 1304.99403736  154.04646645]
------
Step:20, Action:South
State  189
Old Q Values:  [ 337.36081627  703.93270336 1304.99403736  154.04646645]
New Q values:  [ 337.36081627  489.19212398 1304.99403736  154.04646645]
Reward: -1  Episode Reward:  40
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  694.06347544    26.73544252 -2450.98110439   240.60751196]
------
Step:21, Action:North
State  260
Old Q Values:  [ 1078.80516838 -5704.51612281  4609.10844709 -5679.36893145]
New Q values:  [ 1487.32833642 -5704.51612281  4609.10844709 -5679.36893145]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  3521.35423022  1485.98871489     0.        ]
------
Step:22, Action:South
State  188
Old Q Values:  [-6523.78898263  3521.35423022  1485.98871489     0.        ]
New Q values:  [-6523.78898263  2790.67422621  1485.98871489     0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1487.32833642 -5704.51612281  4609.10844709 -5679.36893145]
------
Step:23, Action:East
State  260
Old Q Values:  [ 1487.32833642 -5704.51612281  4609.10844709 -5679.36893145]
New Q values:  [ 1487.32833642 -5704.51612281  3615.21297554 -5679.36893145]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 5907.231989   -5807.06396197  4165.20151233  2075.00763365]
------
Step:24, Action:North
State  276
Old Q Values:  [ 5907.231989   -5807.06396197  4165.20151233  2075.00763365]
New Q values:  [ 4921.93178116 -5807.06396197  4165.20151233  2075.00763365]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         8532.12995187 3881.10435421  441.58769553]
------
Step:25, Action:South
State  206
Old Q Values:  [   0.         2504.14895323 1418.80979599    0.        ]
New Q values:  [   0.         2477.63911564 1418.80979599    0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4921.93178116 -5807.06396197  4165.20151233  2075.00763365]
------
Step:26, Action:North
State  276
Old Q Values:  [ 4921.93178116 -5807.06396197  4165.20151233  2075.00763365]
New Q values:  [ 4527.81169803 -5807.06396197  4165.20151233  2075.00763365]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         8532.12995187 3881.10435421  441.58769553]
------
Step:27, Action:South
State  204
Old Q Values:  [   0.         8532.12995187 3881.10435421  441.58769553]
New Q values:  [   0.         4770.59549016 3881.10435421  441.58769553]
Reward: -1  Episode Reward:  33
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4527.81169803 -5807.06396197  4165.20151233  2075.00763365]
------
Step:28, Action:North
State  276
Old Q Values:  [ 4527.81169803 -5807.06396197  4165.20151233  2075.00763365]
New Q values:  [ 3241.70332626 -5807.06396197  4165.20151233  2075.00763365]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         4770.59549016 3881.10435421  441.58769553]
------
Step:29, Action:South
State  204
Old Q Values:  [   0.         4770.59549016 3881.10435421  441.58769553]
New Q values:  [   0.         3157.19864976 3881.10435421  441.58769553]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3241.70332626 -5807.06396197  4165.20151233  2075.00763365]
------
Step:30, Action:East
State  272
Old Q Values:  [ 1764.39795042 -8521.23367799  7026.56777346  7358.09433156]
New Q values:  [ 1764.39795042 -8521.23367799 67871.63165342  7358.09433156]
Reward: 100009  Episode Reward:  100040
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 552.38704117  368.37539073 -446.88725429  262.76946019]
------
Step:1, Action:North
State  181
Old Q Values:  [ 552.38704117  368.37539073 -446.88725429  262.76946019]
New Q values:  [ 369.06815224  368.37539073 -446.88725429  262.76946019]
Reward: 9  Episode Reward:  9
xxxxx
xa gx
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   475.71111925 -2165.66138672  -180.6       ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 3947.56425461  403.77554135 -120.29354603]
New Q values:  [-177.44732869 1689.14614752  403.77554135 -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x  .x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 369.06815224  368.37539073 -446.88725429  262.76946019]
------
Step:3, Action:North
State  181
Old Q Values:  [ 369.06815224  368.37539073 -446.88725429  262.76946019]
New Q values:  [ 289.74059667  368.37539073 -446.88725429  262.76946019]
Reward: -1  Episode Reward:  7
xxxxx
xa gx
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   475.71111925 -2165.66138672  -180.6       ]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 1689.14614752  403.77554135 -120.29354603]
New Q values:  [-177.44732869  785.57107623  403.77554135 -120.29354603]
Reward: -1  Episode Reward:  6
xxxxx
x  .x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 289.74059667  368.37539073 -446.88725429  262.76946019]
------
Step:5, Action:South
State  183
Old Q Values:  [ 655.13893686 1100.71255331 6235.92743307 1554.80203889]
New Q values:  [ 655.13893686  653.90406396 6235.92743307 1554.80203889]
Reward: 9  Episode Reward:  15
xxxxx
x  .x
x ..x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  694.06347544    26.73544252 -2450.98110439   240.60751196]
------
Step:6, Action:North
State  261
Old Q Values:  [  694.06347544    26.73544252 -2450.98110439   240.60751196]
New Q values:  [ 2147.8036201     26.73544252 -2450.98110439   240.60751196]
Reward: -1  Episode Reward:  14
xxxxx
x  .x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 655.13893686  653.90406396 6235.92743307 1554.80203889]
------
Step:7, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243 18674.10984355     0.        ]
New Q values:  [    0.         -5536.05678243  9637.96244277     0.        ]
Reward: 9  Episode Reward:  23
xxxxx
x  .x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  7.20972835e+03  0.00000000e+00]
------
Step:8, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -2.00610230e+02  7.20972835e+03  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.00610230e+02  6.61102571e+03  0.00000000e+00]
Reward: 9  Episode Reward:  32
xxxxx
x  .x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 9744.35722888  3220.66258272   790.72804752 12405.78121886]
------
Step:9, Action:West
State  218
Old Q Values:  [ 409.10562358 2755.30078316    0.         1540.00681929]
New Q values:  [ 409.10562358 2755.30078316    0.         2598.71043955]
Reward: -1  Episode Reward:  31
xxxxx
x  .x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  6.61102571e+03  0.00000000e+00]
------
Step:10, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -2.00610230e+02  6.61102571e+03  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.00610230e+02  6.36554465e+03  0.00000000e+00]
Reward: -1  Episode Reward:  30
xxxxx
x  .x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 9744.35722888  3220.66258272   790.72804752 12405.78121886]
------
Step:11, Action:West
State  210
Old Q Values:  [ 9744.35722888  3220.66258272   790.72804752 12405.78121886]
New Q values:  [9744.35722888 3220.66258272  790.72804752 7286.15406391]
Reward: -1  Episode Reward:  29
xxxxx
x  .x
x a x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  14.86214194 1482.85185902 7748.13858789 1915.70494401]
------
Step:12, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -2.00610230e+02  6.36554465e+03  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.00610230e+02  5.46892503e+03  0.00000000e+00]
Reward: -1  Episode Reward:  28
xxxxx
x  .x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9744.35722888 3220.66258272  790.72804752 7286.15406391]
------
Step:13, Action:North
State  218
Old Q Values:  [ 409.10562358 2755.30078316    0.         2598.71043955]
New Q values:  [ 463.45720114 2755.30078316    0.         2598.71043955]
Reward: 9  Episode Reward:  37
xxxxx
x  ax
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  9.81383172e+02]
------
Step:14, Action:West
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  9.81383172e+02]
New Q values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  1.13894931e+03]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   611.86548126  2489.98681196]
------
Step:15, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   611.86548126  2489.98681196]
New Q values:  [ -253.44886264 -1902.20915811   611.86548126  1231.06604765]
Reward: -1  Episode Reward:  35
xxxxx
xa  x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  785.57107623  403.77554135 -120.29354603]
------
Step:16, Action:South
State  111
Old Q Values:  [-177.44732869  785.57107623  403.77554135 -120.29354603]
New Q values:  [-177.44732869  705.1266417   403.77554135 -120.29354603]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627  489.19212398 1304.99403736  154.04646645]
------
Step:17, Action:East
State  191
Old Q Values:  [   3.06655861 1802.51851586 1241.2088357     0.        ]
New Q values:  [3.06655861e+00 1.80251852e+03 3.21633155e+03 0.00000000e+00]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x a x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 2.47391199e+03 1.64895532e+03 9.06816004e+03]
------
Step:18, Action:West
State  203
Old Q Values:  [3.60604218e+00 2.47391199e+03 1.64895532e+03 9.06816004e+03]
New Q values:  [3.60604218e+00 2.47391199e+03 1.64895532e+03 4.59156348e+03]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[3.06655861e+00 1.80251852e+03 3.21633155e+03 0.00000000e+00]
------
Step:19, Action:East
State  189
Old Q Values:  [ 337.36081627  489.19212398 1304.99403736  154.04646645]
New Q values:  [ 337.36081627  489.19212398 1933.89334095  154.04646645]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 4708.31908668 -789.02220255 1598.29363255]
------
Step:20, Action:South
State  205
Old Q Values:  [  0.         917.27249131   0.         198.38683706]
New Q values:  [   0.         1329.5544202     0.          198.38683706]
Reward: 9  Episode Reward:  40
xxxxx
x  gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.19081808e+03 1.46883995e+03]
------
Step:21, Action:East
State  272
Old Q Values:  [ 1764.39795042 -8521.23367799 67871.63165342  7358.09433156]
New Q values:  [ 1764.39795042 -8521.23367799 92209.6572054   7358.09433156]
Reward: 100009  Episode Reward:  100049
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16852.01514679 -6396.61506955 -5588.09647059  4861.26667042]
------
Step:1, Action:North
State  288
Old Q Values:  [16852.01514679 -6396.61506955 -5588.09647059  4861.26667042]
New Q values:  [10929.63015242 -6396.61506955 -5588.09647059  4861.26667042]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13944.74697901 10851.19153526 -4584.50430574   304.77863761]
------
Step:2, Action:South
State  208
Old Q Values:  [13944.74697901 10851.19153526 -4584.50430574   304.77863761]
New Q values:  [13944.74697901  7618.76565983 -4584.50430574   304.77863761]
Reward: -1  Episode Reward:  8
xxxxx
x. .x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10929.63015242 -6396.61506955 -5588.09647059  4861.26667042]
------
Step:3, Action:West
State  288
Old Q Values:  [10929.63015242 -6396.61506955 -5588.09647059  4861.26667042]
New Q values:  [10929.63015242 -6396.61506955 -5588.09647059 29612.80382979]
Reward: 9  Episode Reward:  17
xxxxx
x. .x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1764.39795042 -8521.23367799 92209.6572054   7358.09433156]
------
Step:4, Action:East
State  272
Old Q Values:  [ 1764.39795042 -8521.23367799 92209.6572054   7358.09433156]
New Q values:  [ 1764.39795042 -8521.23367799 45767.1040311   7358.09433156]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10929.63015242 -6396.61506955 -5588.09647059 29612.80382979]
------
Step:5, Action:North
State  288
Old Q Values:  [10929.63015242 -6396.61506955 -5588.09647059 29612.80382979]
New Q values:  [ 7294.55922963 -6396.61506955 -5588.09647059 29612.80382979]
Reward: -1  Episode Reward:  15
xxxxx
x. .x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9744.35722888 3220.66258272  790.72804752 7286.15406391]
------
Step:6, Action:North
State  208
Old Q Values:  [13944.74697901  7618.76565983 -4584.50430574   304.77863761]
New Q values:  [ 5924.98358537  7618.76565983 -4584.50430574   304.77863761]
Reward: 9  Episode Reward:  24
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  1.13894931e+03]
------
Step:7, Action:West
State  136
Old Q Values:  [-2129.37064562  6088.67535102   660.86649319 -7532.54675726]
New Q values:  [-2129.37064562  6088.67535102   660.86649319 -2637.36306818]
Reward: -1  Episode Reward:  23
xxxxx
xga x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1254.18544909    65.73076831]
------
Step:8, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1254.18544909    65.73076831]
New Q values:  [-9594.56523706 -8069.05606225  2327.67678494    65.73076831]
Reward: -1  Episode Reward:  22
xxxxx
x.gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  6088.67535102   660.86649319 -2637.36306818]
------
Step:9, Action:South
State  138
Old Q Values:  [-8.31588891e+01 -2.78935791e+02 -3.22965309e-01  1.13894931e+03]
New Q values:  [-8.31588891e+01  2.17345538e+03 -3.22965309e-01  1.13894931e+03]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5924.98358537  7618.76565983 -4584.50430574   304.77863761]
------
Step:10, Action:South
State  208
Old Q Values:  [ 5924.98358537  7618.76565983 -4584.50430574   304.77863761]
New Q values:  [ 5924.98358537 11930.74741287 -4584.50430574   304.77863761]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7294.55922963 -6396.61506955 -5588.09647059 29612.80382979]
------
Step:11, Action:West
State  288
Old Q Values:  [ 7294.55922963 -6396.61506955 -5588.09647059 29612.80382979]
New Q values:  [ 7294.55922963 -6396.61506955 -5588.09647059 25574.65274124]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1764.39795042 -8521.23367799 45767.1040311   7358.09433156]
------
Step:12, Action:East
State  272
Old Q Values:  [ 1764.39795042 -8521.23367799 45767.1040311   7358.09433156]
New Q values:  [ 1764.39795042 -8521.23367799 25978.63743481  7358.09433156]
Reward: -1  Episode Reward:  18
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7294.55922963 -6396.61506955 -5588.09647059 25574.65274124]
------
Step:13, Action:West
State  288
Old Q Values:  [ 7294.55922963 -6396.61506955 -5588.09647059 25574.65274124]
New Q values:  [ 7294.55922963 -6396.61506955 -5588.09647059 14763.55314174]
Reward: -1  Episode Reward:  17
xxxxx
x. gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 5021.94218576  1113.04903879 15114.30681746  5383.89758491]
------
Step:14, Action:East
State  273
Old Q Values:  [ 5021.94218576  1113.04903879 15114.30681746  5383.89758491]
New Q values:  [ 5021.94218576  1113.04903879 10474.1886695   5383.89758491]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7294.55922963 -6396.61506955 -5588.09647059 14763.55314174]
------
Step:15, Action:West
State  288
Old Q Values:  [ 7294.55922963 -6396.61506955 -5588.09647059 14763.55314174]
New Q values:  [ 7294.55922963 -6396.61506955 -5588.09647059  9047.07785755]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 5021.94218576  1113.04903879 10474.1886695   5383.89758491]
------
Step:16, Action:West
State  273
Old Q Values:  [ 5021.94218576  1113.04903879 10474.1886695   5383.89758491]
New Q values:  [ 5021.94218576  1113.04903879 10474.1886695   9955.33454873]
Reward: 9  Episode Reward:  24
xxxxx
x.  x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[25987.91838256 12764.58618105  3851.09595999  1875.31501677]
------
Step:17, Action:North
State  257
Old Q Values:  [25987.91838256 12764.58618105  3851.09595999  1875.31501677]
New Q values:  [37065.72055449 12764.58618105  3851.09595999  1875.31501677]
Reward: 9  Episode Reward:  33
xxxxx
x. gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[88883.84400489  5488.43861631 27912.76462132     0.        ]
------
Step:18, Action:North
State  181
Old Q Values:  [ 289.74059667  368.37539073 -446.88725429  262.76946019]
New Q values:  [ 264.00957445  368.37539073 -446.88725429  262.76946019]
Reward: 9  Episode Reward:  42
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   475.71111925 -2165.66138672  -180.6       ]
------
Step:19, Action:South
State  99
Old Q Values:  [    0.         30833.62257313 51580.7370385      0.        ]
New Q values:  [    0.         38998.00223072 51580.7370385      0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[88883.84400489  5488.43861631 27912.76462132     0.        ]
------
Step:20, Action:North
State  181
Old Q Values:  [ 264.00957445  368.37539073 -446.88725429  262.76946019]
New Q values:  [ 316.54182229  368.37539073 -446.88725429  262.76946019]
Reward: -1  Episode Reward:  40
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  705.1266417   403.77554135 -120.29354603]
------
Step:21, Action:South
State  99
Old Q Values:  [    0.         38998.00223072 51580.7370385      0.        ]
New Q values:  [    0.         42263.75409375 51580.7370385      0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[88883.84400489  5488.43861631 27912.76462132     0.        ]
------
Step:22, Action:North
State  180
Old Q Values:  [-5631.58166229  3378.11980254  7713.770822   -4966.32149798]
New Q values:  [-1557.47323198  3378.11980254  7713.770822   -4966.32149798]
Reward: -1  Episode Reward:  38
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[ -239.29051573 -2521.9025475   2319.1981098   -180.6       ]
------
Step:23, Action:East
State  99
Old Q Values:  [    0.         42263.75409375 51580.7370385      0.        ]
New Q values:  [    0.         42263.75409375 58945.17153785     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 127711.58907484]
------
Step:24, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753   435.5819618 ]
New Q values:  [ -281.736      -1150.91067548   129.67618753   385.17077723]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  705.1266417   403.77554135 -120.29354603]
------
Step:25, Action:South
State  110
Old Q Values:  [ -239.29051573 -2521.9025475   2319.1981098   -180.6       ]
New Q values:  [-239.29051573 1882.02771383 2319.1981098  -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  9637.96244277     0.        ]
------
Step:26, Action:East
State  179
Old Q Values:  [82228.67666629 16101.90751562 90855.45697393     0.        ]
New Q values:  [ 82228.67666629  16101.90751562 100865.83663667      0.        ]
Reward: 100009  Episode Reward:  100044
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   611.86548126  1231.06604765]
------
Step:1, Action:West
State  121
Old Q Values:  [    0.             0.         -8209.41191864   366.82262216]
New Q values:  [    0.             0.         -8209.41191864   294.84238464]
Reward: 9  Episode Reward:  9
xxxxx
xa gx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   475.71111925 -2165.66138672  -180.6       ]
------
Step:2, Action:South
State  109
Old Q Values:  [ -241.10880094   475.71111925 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   306.19706492 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x g.x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 316.54182229  368.37539073 -446.88725429  262.76946019]
------
Step:3, Action:South
State  180
Old Q Values:  [-1557.47323198  3378.11980254  7713.770822   -4966.32149798]
New Q values:  [-1557.47323198  2441.21181368  7713.770822   -4966.32149798]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
x . x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1487.32833642 -5704.51612281  3615.21297554 -5679.36893145]
------
Step:4, Action:East
State  261
Old Q Values:  [ 2147.8036201     26.73544252 -2450.98110439   240.60751196]
New Q values:  [2147.8036201    26.73544252  274.56801194  240.60751196]
Reward: 9  Episode Reward:  36
xxxxx
x g.x
x . x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3241.70332626 -5807.06396197  4165.20151233  2075.00763365]
------
Step:5, Action:East
State  273
Old Q Values:  [ 5021.94218576  1113.04903879 10474.1886695   9955.33454873]
New Q values:  [5021.94218576 1113.04903879 6909.19882507 9955.33454873]
Reward: 9  Episode Reward:  45
xxxxx
x  gx
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7294.55922963 -6396.61506955 -5588.09647059  9047.07785755]
------
Step:6, Action:West
State  288
Old Q Values:  [ 7294.55922963 -6396.61506955 -5588.09647059  9047.07785755]
New Q values:  [ 7294.55922963 -6396.61506955 -5588.09647059 11411.82237346]
Reward: -1  Episode Reward:  44
xxxxx
x g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1764.39795042 -8521.23367799 25978.63743481  7358.09433156]
------
Step:7, Action:East
State  273
Old Q Values:  [5021.94218576 1113.04903879 6909.19882507 9955.33454873]
New Q values:  [5021.94218576 1113.04903879 6186.62624206 9955.33454873]
Reward: -1  Episode Reward:  43
xxxxx
x  gx
x . x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7294.55922963 -6396.61506955 -5588.09647059 11411.82237346]
------
Step:8, Action:West
State  288
Old Q Values:  [ 7294.55922963 -6396.61506955 -5588.09647059 11411.82237346]
New Q values:  [ 7294.55922963 -6396.61506955 -5588.09647059 12357.72017983]
Reward: -1  Episode Reward:  42
xxxxx
x g.x
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1764.39795042 -8521.23367799 25978.63743481  7358.09433156]
------
Step:9, Action:East
State  276
Old Q Values:  [ 3241.70332626 -5807.06396197  4165.20151233  2075.00763365]
New Q values:  [ 3241.70332626 -5807.06396197  5372.79665888  2075.00763365]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7294.55922963 -6396.61506955 -5588.09647059 12357.72017983]
------
Step:10, Action:West
State  288
Old Q Values:  [ 7294.55922963 -6396.61506955 -5588.09647059 12357.72017983]
New Q values:  [ 7294.55922963 -6396.61506955 -5588.09647059  6736.07930238]
Reward: -10001  Episode Reward:  -9960
xxxxx
x  .x
x . x
x g x
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7294.55922963 -6396.61506955 -5588.09647059  6736.07930238]
------
Step:1, Action:North
State  288
Old Q Values:  [ 7294.55922963 -6396.61506955 -5588.09647059  6736.07930238]
New Q values:  [ 5846.53086052 -6396.61506955 -5588.09647059  6736.07930238]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9744.35722888 3220.66258272  790.72804752 7286.15406391]
------
Step:2, Action:North
State  208
Old Q Values:  [ 5924.98358537 11930.74741287 -4584.50430574   304.77863761]
New Q values:  [40659.851391   11930.74741287 -4584.50430574   304.77863761]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
xg. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 35541.05618371  27549.5211885    -180.00807518 127614.85985619]
------
Step:3, Action:West
State  138
Old Q Values:  [-8.31588891e+01  2.17345538e+03 -3.22965309e-01  1.13894931e+03]
New Q values:  [-8.31588891e+01  2.17345538e+03 -3.22965309e-01  5.76530958e+02]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   129.67618753   385.17077723]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753   385.17077723]
New Q values:  [ -281.736      -1150.91067548   129.67618753   543.73954764]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1280.9041225   496.66968841 -252.78192178]
------
Step:5, Action:South
State  110
Old Q Values:  [-239.29051573 1882.02771383 2319.1981098  -180.6       ]
New Q values:  [-239.29051573 3649.59981836 2319.1981098  -180.6       ]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xa. x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  9637.96244277     0.        ]
------
Step:6, Action:East
State  188
Old Q Values:  [-6523.78898263  2790.67422621  1485.98871489     0.        ]
New Q values:  [-6523.78898263  2790.67422621  1895.68698353     0.        ]
Reward: 9  Episode Reward:  54
xxxxx
x   x
xga x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  4319.63832525 2064.67334366  568.38654082]
------
Step:7, Action:South
State  194
Old Q Values:  [-6.00000000e-01  4.64182325e+03  1.50608462e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  6.96557205e+04  1.50608462e+04  2.17939995e+03]
Reward: 100009  Episode Reward:  100063
xxxxx
x   x
x   x
xga x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2147.8036201    26.73544252  274.56801194  240.60751196]
------
Step:1, Action:North
State  261
Old Q Values:  [2147.8036201    26.73544252  274.56801194  240.60751196]
New Q values:  [975.03406526  26.73544252 274.56801194 240.60751196]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 316.54182229  368.37539073 -446.88725429  262.76946019]
------
Step:2, Action:South
State  181
Old Q Values:  [ 316.54182229  368.37539073 -446.88725429  262.76946019]
New Q values:  [ 316.54182229  439.26037587 -446.88725429  262.76946019]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[975.03406526  26.73544252 274.56801194 240.60751196]
------
Step:3, Action:North
State  261
Old Q Values:  [975.03406526  26.73544252 274.56801194 240.60751196]
New Q values:  [521.19173886  26.73544252 274.56801194 240.60751196]
Reward: -1  Episode Reward:  7
xxxxx
x.g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 316.54182229  439.26037587 -446.88725429  262.76946019]
------
Step:4, Action:South
State  181
Old Q Values:  [ 316.54182229  439.26037587 -446.88725429  262.76946019]
New Q values:  [ 316.54182229  331.46167201 -446.88725429  262.76946019]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[521.19173886  26.73544252 274.56801194 240.60751196]
------
Step:5, Action:North
State  261
Old Q Values:  [521.19173886  26.73544252 274.56801194 240.60751196]
New Q values:  [2078.65492547   26.73544252  274.56801194  240.60751196]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 655.13893686  653.90406396 6235.92743307 1554.80203889]
------
Step:6, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243  9637.96244277     0.        ]
New Q values:  [    0.         -5536.05678243 24757.30113613     0.        ]
Reward: 9  Episode Reward:  14
xxxxx
x.. x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  6.96557205e+04  1.50608462e+04  2.17939995e+03]
------
Step:7, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.11979650e+04 1.72086777e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.62781772e+04 1.72086777e+04 2.45392999e+03]
Reward: 9  Episode Reward:  23
xxxxx
x.. x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1764.39795042 -8521.23367799 25978.63743481  7358.09433156]
------
Step:8, Action:East
State  272
Old Q Values:  [ 1764.39795042 -8521.23367799 25978.63743481  7358.09433156]
New Q values:  [ 1764.39795042 -8521.23367799 12417.67876464  7358.09433156]
Reward: 9  Episode Reward:  32
xxxxx
x.. x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5846.53086052 -6396.61506955 -5588.09647059  6736.07930238]
------
Step:9, Action:West
State  288
Old Q Values:  [ 5846.53086052 -6396.61506955 -5588.09647059  6736.07930238]
New Q values:  [ 5846.53086052 -6396.61506955 -5588.09647059  5680.43208557]
Reward: -1  Episode Reward:  31
xxxxx
x.. x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[5021.94218576 1113.04903879 6186.62624206 9955.33454873]
------
Step:10, Action:West
State  273
Old Q Values:  [5021.94218576 1113.04903879 6186.62624206 9955.33454873]
New Q values:  [5021.94218576 1113.04903879 6186.62624206 4605.13029713]
Reward: -1  Episode Reward:  30
xxxxx
x.. x
x  .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2078.65492547   26.73544252  274.56801194  240.60751196]
------
Step:11, Action:North
State  261
Old Q Values:  [2078.65492547   26.73544252  274.56801194  240.60751196]
New Q values:  [2701.64020011   26.73544252  274.56801194  240.60751196]
Reward: -1  Episode Reward:  29
xxxxx
x.. x
xa .x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 655.13893686  653.90406396 6235.92743307 1554.80203889]
------
Step:12, Action:East
State  181
Old Q Values:  [ 316.54182229  331.46167201 -446.88725429  262.76946019]
New Q values:  [  316.54182229   331.46167201 -1016.75160394   262.76946019]
Reward: -10001  Episode Reward:  -9972
xxxxx
x.. x
x g.x
x   x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 15377.4345861  19015.79464603  1169.39963074]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01  6.96557205e+04  1.50608462e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  6.96557205e+04  8.95304563e+03  2.17939995e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9744.35722888 3220.66258272  790.72804752 7286.15406391]
------
Step:2, Action:North
State  210
Old Q Values:  [9744.35722888 3220.66258272  790.72804752 7286.15406391]
New Q values:  [4555.17950599 3220.66258272  790.72804752 7286.15406391]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01  2.17345538e+03 -3.22965309e-01  5.76530958e+02]
------
Step:3, Action:South
State  130
Old Q Values:  [ 35541.05618371  27549.5211885    -180.00807518 127614.85985619]
New Q values:  [ 35541.05618371  13205.05469457   -180.00807518 127614.85985619]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4555.17950599 3220.66258272  790.72804752 7286.15406391]
------
Step:4, Action:West
State  210
Old Q Values:  [4555.17950599 3220.66258272  790.72804752 7286.15406391]
New Q values:  [ 4555.17950599  3220.66258272   790.72804752 23810.57778458]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x.a x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  6.96557205e+04  8.95304563e+03  2.17939995e+03]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.62781772e+04 1.72086777e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.02419745e+04 1.72086777e+04 2.45392999e+03]
Reward: 9  Episode Reward:  25
xxxxx
x.. x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1764.39795042 -8521.23367799 12417.67876464  7358.09433156]
------
Step:6, Action:East
State  272
Old Q Values:  [ 1764.39795042 -8521.23367799 12417.67876464  7358.09433156]
New Q values:  [ 1764.39795042 -8521.23367799  6720.43076401  7358.09433156]
Reward: -1  Episode Reward:  24
xxxxx
x.. x
x.  x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5846.53086052 -6396.61506955 -5588.09647059  5680.43208557]
------
Step:7, Action:North
State  288
Old Q Values:  [ 5846.53086052 -6396.61506955 -5588.09647059  5680.43208557]
New Q values:  [ 9481.18567958 -6396.61506955 -5588.09647059  5680.43208557]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4555.17950599  3220.66258272   790.72804752 23810.57778458]
------
Step:8, Action:West
State  210
Old Q Values:  [ 4555.17950599  3220.66258272   790.72804752 23810.57778458]
New Q values:  [ 4555.17950599  3220.66258272   790.72804752 30420.34727285]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
x.a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  6.96557205e+04  8.95304563e+03  2.17939995e+03]
------
Step:9, Action:South
State  194
Old Q Values:  [-6.00000000e-01  6.96557205e+04  8.95304563e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  2.40691165e+04  8.95304563e+03  2.17939995e+03]
Reward: -10001  Episode Reward:  -9979
xxxxx
x.. x
x.  x
x.g x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   129.67618753   543.73954764]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753   543.73954764]
New Q values:  [ -281.736      -1150.91067548   129.67618753   607.16705581]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1280.9041225   496.66968841 -252.78192178]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869  705.1266417   403.77554135 -120.29354603]
New Q values:  [-177.44732869 2158.2288866   403.77554135 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 655.13893686  653.90406396 6235.92743307 1554.80203889]
------
Step:3, Action:East
State  183
Old Q Values:  [ 655.13893686  653.90406396 6235.92743307 1554.80203889]
New Q values:  [ 655.13893686  653.90406396 4134.4484816  1554.80203889]
Reward: -1  Episode Reward:  17
xxxxx
x  .x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  5.46892503e+03  0.00000000e+00]
------
Step:4, Action:East
State  203
Old Q Values:  [3.60604218e+00 2.47391199e+03 1.64895532e+03 4.59156348e+03]
New Q values:  [3.60604218e+00 2.47391199e+03 1.49157236e+03 4.59156348e+03]
Reward: 9  Episode Reward:  26
xxxxx
x  .x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 463.45720114 2755.30078316    0.         2598.71043955]
------
Step:5, Action:West
State  216
Old Q Values:  [ 1334.56392879  6996.21455978 -8220.10378799  1776.65417508]
New Q values:  [ 1334.56392879  6996.21455978 -8220.10378799  2122.55739604]
Reward: -1  Episode Reward:  25
xxxxx
x  .x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 4708.31908668 -789.02220255 1598.29363255]
------
Step:6, Action:South
State  201
Old Q Values:  [ 613.33320563 4708.31908668 -789.02220255 1598.29363255]
New Q values:  [ 613.33320563 3744.71550729 -789.02220255 1598.29363255]
Reward: 9  Episode Reward:  34
xxxxx
x  gx
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[5021.94218576 1113.04903879 6186.62624206 4605.13029713]
------
Step:7, Action:East
State  272
Old Q Values:  [ 1764.39795042 -8521.23367799  6720.43076401  7358.09433156]
New Q values:  [ 1764.39795042 -8521.23367799  5537.92800948  7358.09433156]
Reward: 9  Episode Reward:  43
xxxxx
x g.x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9481.18567958 -6396.61506955 -5588.09647059  5680.43208557]
------
Step:8, Action:North
State  288
Old Q Values:  [ 9481.18567958 -6396.61506955 -5588.09647059  5680.43208557]
New Q values:  [15989.82968913 -6396.61506955 -5588.09647059  5680.43208557]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[40659.851391   11930.74741287 -4584.50430574   304.77863761]
------
Step:9, Action:North
State  216
Old Q Values:  [ 1334.56392879  6996.21455978 -8220.10378799  2122.55739604]
New Q values:  [ 1191.26218595  6996.21455978 -8220.10378799  2122.55739604]
Reward: 9  Episode Reward:  51
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01  2.17345538e+03 -3.22965309e-01  5.76530958e+02]
------
Step:10, Action:West
State  138
Old Q Values:  [-8.31588891e+01  2.17345538e+03 -3.22965309e-01  5.76530958e+02]
New Q values:  [-8.31588891e+01  2.17345538e+03 -3.22965309e-01  5.99332198e+02]
Reward: -1  Episode Reward:  50
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   611.86548126  1231.06604765]
------
Step:11, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   611.86548126  1231.06604765]
New Q values:  [ -253.44886264 -1902.20915811   611.86548126   876.09765581]
Reward: -1  Episode Reward:  49
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1280.9041225   496.66968841 -252.78192178]
------
Step:12, Action:South
State  105
Old Q Values:  [-180.6         332.82828905 -764.93196255    0.        ]
New Q values:  [-180.6         822.1052524  -764.93196255    0.        ]
Reward: -1  Episode Reward:  48
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 189.19059642    0.         2298.57978925 -178.98      ]
------
Step:13, Action:East
State  185
Old Q Values:  [ 189.19059642    0.         2298.57978925 -178.98      ]
New Q values:  [ 189.19059642    0.         2214.72341328 -178.98      ]
Reward: -1  Episode Reward:  47
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  4319.63832525 2064.67334366  568.38654082]
------
Step:14, Action:South
State  200
Old Q Values:  [ 169.9257398  4319.63832525 2064.67334366  568.38654082]
New Q values:  [ 169.9257398  3934.68362957 2064.67334366  568.38654082]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1764.39795042 -8521.23367799  5537.92800948  7358.09433156]
------
Step:15, Action:West
State  272
Old Q Values:  [ 1764.39795042 -8521.23367799  5537.92800948  7358.09433156]
New Q values:  [ 1764.39795042 -8521.23367799  5537.92800948 77053.71612584]
Reward: 100009  Episode Reward:  100055
xxxxx
x   x
xg  x
xa  x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15989.82968913 -6396.61506955 -5588.09647059  5680.43208557]
------
Step:1, Action:North
State  288
Old Q Values:  [15989.82968913 -6396.61506955 -5588.09647059  5680.43208557]
New Q values:  [18599.28729296 -6396.61506955 -5588.09647059  5680.43208557]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[40659.851391   11930.74741287 -4584.50430574   304.77863761]
------
Step:2, Action:North
State  208
Old Q Values:  [40659.851391   11930.74741287 -4584.50430574   304.77863761]
New Q values:  [18095.94316171 11930.74741287 -4584.50430574   304.77863761]
Reward: 9  Episode Reward:  18
xxxxx
x.gax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  6088.67535102   660.86649319 -2637.36306818]
------
Step:3, Action:South
State  136
Old Q Values:  [-2129.37064562  6088.67535102   660.86649319 -2637.36306818]
New Q values:  [-2129.37064562  7863.65308892   660.86649319 -2637.36306818]
Reward: -1  Episode Reward:  17
xxxxx
x. gx
x..ax
x.. x
xxxxx
Step:4, Action:North
State  208
Old Q Values:  [18095.94316171 11930.74741287 -4584.50430574   304.77863761]
New Q values:  [ 6666.5602132  11930.74741287 -4584.50430574   304.77863761]
Reward: -10001  Episode Reward:  -9984
xxxxx
x.  x
x..gx
x.. x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[5021.94218576 1113.04903879 6186.62624206 4605.13029713]
------
Step:1, Action:East
State  273
Old Q Values:  [5021.94218576 1113.04903879 6186.62624206 4605.13029713]
New Q values:  [5021.94218576 1113.04903879 8059.83668471 4605.13029713]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18599.28729296 -6396.61506955 -5588.09647059  5680.43208557]
------
Step:2, Action:North
State  288
Old Q Values:  [18599.28729296 -6396.61506955 -5588.09647059  5680.43208557]
New Q values:  [ 5018.33914104 -6396.61506955 -5588.09647059  5680.43208557]
Reward: -10001  Episode Reward:  -9992
xxxxx
x...x
x..gx
x.  x
xxxxx
xxxxx
x.a.x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   129.67618753   607.16705581]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753   607.16705581]
New Q values:  [ -281.736      -1150.91067548   129.67618753   632.53805907]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1280.9041225   496.66968841 -252.78192178]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 2158.2288866   403.77554135 -120.29354603]
New Q values:  [-177.44732869  968.13005624  403.77554135 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa.gx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  316.54182229   331.46167201 -1016.75160394   262.76946019]
------
Step:3, Action:South
State  183
Old Q Values:  [ 655.13893686  653.90406396 4134.4484816  1554.80203889]
New Q values:  [ 655.13893686 1077.45368562 4134.4484816  1554.80203889]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x ..x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2701.64020011   26.73544252  274.56801194  240.60751196]
------
Step:4, Action:North
State  261
Old Q Values:  [2701.64020011   26.73544252  274.56801194  240.60751196]
New Q values:  [2320.39062452   26.73544252  274.56801194  240.60751196]
Reward: -1  Episode Reward:  26
xxxxx
x  .x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 655.13893686 1077.45368562 4134.4484816  1554.80203889]
------
Step:5, Action:East
State  181
Old Q Values:  [  316.54182229   331.46167201 -1016.75160394   262.76946019]
New Q values:  [  316.54182229   331.46167201 -4430.22327233   262.76946019]
Reward: -9991  Episode Reward:  -9965
xxxxx
x  .x
x g.x
x  .x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 3649.59981836 2319.1981098  -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869  968.13005624  403.77554135 -120.29354603]
New Q values:  [-177.44732869 1632.98656698  403.77554135 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 655.13893686 1077.45368562 4134.4484816  1554.80203889]
------
Step:2, Action:East
State  183
Old Q Values:  [ 655.13893686 1077.45368562 4134.4484816  1554.80203889]
New Q values:  [ 655.13893686 1077.45368562 3983.62096901 1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  14.86214194 1482.85185902 7748.13858789 1915.70494401]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.40691165e+04  8.95304563e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  2.40691165e+04  1.27127224e+04  2.17939995e+03]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4555.17950599  3220.66258272   790.72804752 30420.34727285]
------
Step:4, Action:West
State  210
Old Q Values:  [ 4555.17950599  3220.66258272   790.72804752 30420.34727285]
New Q values:  [ 4555.17950599  3220.66258272   790.72804752 17872.27730295]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x a x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 15377.4345861  19015.79464603  1169.39963074]
------
Step:5, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.40691165e+04  1.27127224e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  2.40691165e+04  1.04461722e+04  2.17939995e+03]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4555.17950599  3220.66258272   790.72804752 17872.27730295]
------
Step:6, Action:West
State  210
Old Q Values:  [ 4555.17950599  3220.66258272   790.72804752 17872.27730295]
New Q values:  [ 4555.17950599  3220.66258272   790.72804752 14369.04587463]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.40691165e+04  1.04461722e+04  2.17939995e+03]
------
Step:7, Action:South
State  194
Old Q Values:  [-6.00000000e-01  2.40691165e+04  1.04461722e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  5.24488560e+03  1.04461722e+04  2.17939995e+03]
Reward: -9991  Episode Reward:  -9967
xxxxx
x ..x
x   x
x g.x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[5021.94218576 1113.04903879 8059.83668471 4605.13029713]
------
Step:1, Action:North
State  272
Old Q Values:  [ 1764.39795042 -8521.23367799  5537.92800948 77053.71612584]
New Q values:  [ 3845.01082957 -8521.23367799  5537.92800948 77053.71612584]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  5.24488560e+03  1.04461722e+04  2.17939995e+03]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.02419745e+04 1.72086777e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.02419745e+04 1.04680953e+04 2.45392999e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6666.5602132  11930.74741287 -4584.50430574   304.77863761]
------
Step:3, Action:South
State  208
Old Q Values:  [ 6666.5602132  11930.74741287 -4584.50430574   304.77863761]
New Q values:  [ 6666.5602132   6475.82859082 -4584.50430574   304.77863761]
Reward: -1  Episode Reward:  17
xxxxx
x.g.x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5018.33914104 -6396.61506955 -5588.09647059  5680.43208557]
------
Step:4, Action:West
State  288
Old Q Values:  [ 5018.33914104 -6396.61506955 -5588.09647059  5680.43208557]
New Q values:  [ 5018.33914104 -6396.61506955 -5588.09647059 25387.68767198]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  5537.92800948 77053.71612584]
------
Step:5, Action:West
State  272
Old Q Values:  [ 3845.01082957 -8521.23367799  5537.92800948 77053.71612584]
New Q values:  [ 3845.01082957 -8521.23367799  5537.92800948 41946.60261668]
Reward: 9  Episode Reward:  25
xxxxx
x...x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[37065.72055449 12764.58618105  3851.09595999  1875.31501677]
------
Step:6, Action:North
State  261
Old Q Values:  [2320.39062452   26.73544252  274.56801194  240.60751196]
New Q values:  [1032.99475141   26.73544252  274.56801194  240.60751196]
Reward: 9  Episode Reward:  34
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  316.54182229   331.46167201 -4430.22327233   262.76946019]
------
Step:7, Action:South
State  183
Old Q Values:  [ 655.13893686 1077.45368562 3983.62096901 1554.80203889]
New Q values:  [ 655.13893686  740.27989967 3983.62096901 1554.80203889]
Reward: -1  Episode Reward:  33
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1032.99475141   26.73544252  274.56801194  240.60751196]
------
Step:8, Action:North
State  261
Old Q Values:  [1032.99475141   26.73544252  274.56801194  240.60751196]
New Q values:  [1607.68419127   26.73544252  274.56801194  240.60751196]
Reward: -1  Episode Reward:  32
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 655.13893686  740.27989967 3983.62096901 1554.80203889]
------
Step:9, Action:East
State  183
Old Q Values:  [ 655.13893686  740.27989967 3983.62096901 1554.80203889]
New Q values:  [ 655.13893686  740.27989967 4726.700037   1554.80203889]
Reward: -1  Episode Reward:  31
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  5.24488560e+03  1.04461722e+04  2.17939995e+03]
------
Step:10, Action:East
State  194
Old Q Values:  [-6.00000000e-01  5.24488560e+03  1.04461722e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  5.24488560e+03  8.48858263e+03  2.17939995e+03]
Reward: -1  Episode Reward:  30
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4555.17950599  3220.66258272   790.72804752 14369.04587463]
------
Step:11, Action:West
State  210
Old Q Values:  [ 4555.17950599  3220.66258272   790.72804752 14369.04587463]
New Q values:  [4555.17950599 3220.66258272  790.72804752 8293.59313833]
Reward: -1  Episode Reward:  29
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  5.24488560e+03  8.48858263e+03  2.17939995e+03]
------
Step:12, Action:East
State  194
Old Q Values:  [-6.00000000e-01  5.24488560e+03  8.48858263e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  5.24488560e+03  5.88291099e+03  2.17939995e+03]
Reward: -1  Episode Reward:  28
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4555.17950599 3220.66258272  790.72804752 8293.59313833]
------
Step:13, Action:West
State  210
Old Q Values:  [4555.17950599 3220.66258272  790.72804752 8293.59313833]
New Q values:  [4555.17950599 3220.66258272  790.72804752 5081.71055317]
Reward: -1  Episode Reward:  27
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  5.24488560e+03  5.88291099e+03  2.17939995e+03]
------
Step:14, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.02419745e+04 1.04680953e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.02419745e+04 6.18660618e+03 2.45392999e+03]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6666.5602132   6475.82859082 -4584.50430574   304.77863761]
------
Step:15, Action:North
State  210
Old Q Values:  [4555.17950599 3220.66258272  790.72804752 5081.71055317]
New Q values:  [40111.92975925  3220.66258272   790.72804752  5081.71055317]
Reward: 9  Episode Reward:  35
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 35541.05618371  13205.05469457   -180.00807518 127614.85985619]
------
Step:16, Action:West
State  130
Old Q Values:  [ 35541.05618371  13205.05469457   -180.00807518 127614.85985619]
New Q values:  [35541.05618371 13205.05469457  -180.00807518 89364.82066493]
Reward: 9  Episode Reward:  44
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 127711.58907484]
------
Step:17, Action:West
State  115
Old Q Values:  [  -180.6          1117.48597573  18639.04650494 134179.40896346]
New Q values:  [  -180.6          1117.48597573  18639.04650494 131360.71504674]
Reward: 100009  Episode Reward:  100053
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[5021.94218576 1113.04903879 8059.83668471 4605.13029713]
------
Step:1, Action:North
State  273
Old Q Values:  [5021.94218576 1113.04903879 8059.83668471 4605.13029713]
New Q values:  [5187.97593769 1113.04903879 8059.83668471 4605.13029713]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 10579.33021128  -222.69561341   767.35890262]
------
Step:2, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.02419745e+04 6.18660618e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 1.66801706e+04 6.18660618e+03 2.45392999e+03]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  5537.92800948 41946.60261668]
------
Step:3, Action:West
State  272
Old Q Values:  [ 3845.01082957 -8521.23367799  5537.92800948 41946.60261668]
New Q values:  [ 3845.01082957 -8521.23367799  5537.92800948 17266.34630405]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x. .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1607.68419127   26.73544252  274.56801194  240.60751196]
------
Step:4, Action:North
State  261
Old Q Values:  [1607.68419127   26.73544252  274.56801194  240.60751196]
New Q values:  [747.91217811  26.73544252 274.56801194 240.60751196]
Reward: 9  Episode Reward:  26
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  316.54182229   331.46167201 -4430.22327233   262.76946019]
------
Step:5, Action:South
State  183
Old Q Values:  [ 655.13893686  740.27989967 4726.700037   1554.80203889]
New Q values:  [ 655.13893686  519.8856133  4726.700037   1554.80203889]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[747.91217811  26.73544252 274.56801194 240.60751196]
------
Step:6, Action:North
State  261
Old Q Values:  [747.91217811  26.73544252 274.56801194 240.60751196]
New Q values:  [1716.57488234   26.73544252  274.56801194  240.60751196]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 655.13893686  519.8856133  4726.700037   1554.80203889]
------
Step:7, Action:East
State  183
Old Q Values:  [ 655.13893686  519.8856133  4726.700037   1554.80203889]
New Q values:  [ 655.13893686  519.8856133  3654.95331264 1554.80203889]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  5.24488560e+03  5.88291099e+03  2.17939995e+03]
------
Step:8, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.66801706e+04 6.18660618e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 1.66801706e+04 4.48001054e+03 2.45392999e+03]
Reward: 9  Episode Reward:  32
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6666.5602132   6475.82859082 -4584.50430574   304.77863761]
------
Step:9, Action:North
State  208
Old Q Values:  [ 6666.5602132   6475.82859082 -4584.50430574   304.77863761]
New Q values:  [29481.47028476  6475.82859082 -4584.50430574   304.77863761]
Reward: 9  Episode Reward:  41
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[35541.05618371 13205.05469457  -180.00807518 89364.82066493]
------
Step:10, Action:West
State  130
Old Q Values:  [35541.05618371 13205.05469457  -180.00807518 89364.82066493]
New Q values:  [35541.05618371 13205.05469457  -180.00807518 75159.54277999]
Reward: 9  Episode Reward:  50
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 131360.71504674]
------
Step:11, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   32608.63878402 127711.58907484]
New Q values:  [  -180.6          3557.6642036   32608.63878402 128773.58709129]
Reward: 100009  Episode Reward:  100059
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5018.33914104 -6396.61506955 -5588.09647059 25387.68767198]
------
Step:1, Action:West
State  288
Old Q Values:  [ 5018.33914104 -6396.61506955 -5588.09647059 25387.68767198]
New Q values:  [ 5018.33914104 -6396.61506955 -5588.09647059 15340.37896001]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  5537.92800948 17266.34630405]
------
Step:2, Action:West
State  272
Old Q Values:  [ 3845.01082957 -8521.23367799  5537.92800948 17266.34630405]
New Q values:  [ 3845.01082957 -8521.23367799  5537.92800948  7426.91098632]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1716.57488234   26.73544252  274.56801194  240.60751196]
------
Step:3, Action:North
State  260
Old Q Values:  [ 1487.32833642 -5704.51612281  3615.21297554 -5679.36893145]
New Q values:  [-3085.53741883 -5704.51612281  3615.21297554 -5679.36893145]
Reward: -9991  Episode Reward:  -9973
xxxxx
x ..x
xg..x
x   x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5018.33914104 -6396.61506955 -5588.09647059 15340.37896001]
------
Step:1, Action:North
State  288
Old Q Values:  [ 5018.33914104 -6396.61506955 -5588.09647059 15340.37896001]
New Q values:  [14046.31458419 -6396.61506955 -5588.09647059 15340.37896001]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[40111.92975925  3220.66258272   790.72804752  5081.71055317]
------
Step:2, Action:North
State  208
Old Q Values:  [29481.47028476  6475.82859082 -4584.50430574   304.77863761]
New Q values:  [34345.8509479   6475.82859082 -4584.50430574   304.77863761]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[35541.05618371 13205.05469457  -180.00807518 75159.54277999]
------
Step:3, Action:West
State  136
Old Q Values:  [-2129.37064562  7863.65308892   660.86649319 -2637.36306818]
New Q values:  [-2129.37064562  7863.65308892   660.86649319  -351.24219179]
Reward: 9  Episode Reward:  27
xxxxx
xga x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2327.67678494    65.73076831]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   129.67618753   632.53805907]
New Q values:  [ -281.736      -1150.91067548   703.30708945   632.53805907]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01  2.17345538e+03 -3.22965309e-01  5.99332198e+02]
------
Step:5, Action:South
State  138
Old Q Values:  [-8.31588891e+01  2.17345538e+03 -3.22965309e-01  5.99332198e+02]
New Q values:  [-8.31588891e+01  1.29023611e+04 -3.22965309e-01  5.99332198e+02]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[40111.92975925  3220.66258272   790.72804752  5081.71055317]
------
Step:6, Action:North
State  210
Old Q Values:  [40111.92975925  3220.66258272   790.72804752  5081.71055317]
New Q values:  [19914.88022781  3220.66258272   790.72804752  5081.71055317]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01  1.29023611e+04 -3.22965309e-01  5.99332198e+02]
------
Step:7, Action:South
State  138
Old Q Values:  [-8.31588891e+01  1.29023611e+04 -3.22965309e-01  5.99332198e+02]
New Q values:  [-8.31588891e+01  1.11348085e+04 -3.22965309e-01  5.99332198e+02]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[19914.88022781  3220.66258272   790.72804752  5081.71055317]
------
Step:8, Action:North
State  208
Old Q Values:  [34345.8509479   6475.82859082 -4584.50430574   304.77863761]
New Q values:  [17078.18292931  6475.82859082 -4584.50430574   304.77863761]
Reward: -1  Episode Reward:  22
xxxxx
x. ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01  1.11348085e+04 -3.22965309e-01  5.99332198e+02]
------
Step:9, Action:West
State  138
Old Q Values:  [-8.31588891e+01  1.11348085e+04 -3.22965309e-01  5.99332198e+02]
New Q values:  [-8.31588891e+01  1.11348085e+04 -3.22965309e-01  5.01962176e+02]
Reward: -1  Episode Reward:  21
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   611.86548126   876.09765581]
------
Step:10, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   611.86548126   876.09765581]
New Q values:  [ -253.44886264 -1902.20915811   611.86548126   740.11029907]
Reward: 9  Episode Reward:  30
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1280.9041225   496.66968841 -252.78192178]
------
Step:11, Action:South
State  105
Old Q Values:  [-180.6         822.1052524  -764.93196255    0.        ]
New Q values:  [-180.6         433.68060256 -764.93196255    0.        ]
Reward: 9  Episode Reward:  39
xxxxx
x  gx
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  316.54182229   331.46167201 -4430.22327233   262.76946019]
------
Step:12, Action:South
State  181
Old Q Values:  [  316.54182229   331.46167201 -4430.22327233   262.76946019]
New Q values:  [  316.54182229   652.95713351 -4430.22327233   262.76946019]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1716.57488234   26.73544252  274.56801194  240.60751196]
------
Step:13, Action:North
State  257
Old Q Values:  [37065.72055449 12764.58618105  3851.09595999  1875.31501677]
New Q values:  [41490.84142326 12764.58618105  3851.09595999  1875.31501677]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[88883.84400489  5488.43861631 27912.76462132     0.        ]
------
Step:14, Action:North
State  183
Old Q Values:  [ 655.13893686  519.8856133  3654.95331264 1554.80203889]
New Q values:  [ 751.35154484  519.8856133  3654.95331264 1554.80203889]
Reward: -1  Episode Reward:  46
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1632.98656698  403.77554135 -120.29354603]
------
Step:15, Action:South
State  99
Old Q Values:  [    0.         42263.75409375 58945.17153785     0.        ]
New Q values:  [    0.         43570.05483897 58945.17153785     0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[88883.84400489  5488.43861631 27912.76462132     0.        ]
------
Step:16, Action:North
State  180
Old Q Values:  [-1557.47323198  2441.21181368  7713.770822   -4966.32149798]
New Q values:  [  471.29065272  2441.21181368  7713.770822   -4966.32149798]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 3649.59981836 2319.1981098  -180.6       ]
------
Step:17, Action:East
State  110
Old Q Values:  [-239.29051573 3649.59981836 2319.1981098  -180.6       ]
New Q values:  [-239.29051573 3649.59981836 1138.07137075 -180.6       ]
Reward: -1  Episode Reward:  43
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   703.30708945   632.53805907]
------
Step:18, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   703.30708945   632.53805907]
New Q values:  [ -281.736      -1150.91067548  3621.16538593   632.53805907]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01  1.11348085e+04 -3.22965309e-01  5.01962176e+02]
------
Step:19, Action:South
State  138
Old Q Values:  [-8.31588891e+01  1.11348085e+04 -3.22965309e-01  5.01962176e+02]
New Q values:  [-8.31588891e+01  1.04277875e+04 -3.22965309e-01  5.01962176e+02]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[19914.88022781  3220.66258272   790.72804752  5081.71055317]
------
Step:20, Action:North
State  208
Old Q Values:  [17078.18292931  6475.82859082 -4584.50430574   304.77863761]
New Q values:  [ 9959.00941228  6475.82859082 -4584.50430574   304.77863761]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01  1.04277875e+04 -3.22965309e-01  5.01962176e+02]
------
Step:21, Action:South
State  138
Old Q Values:  [-8.31588891e+01  1.04277875e+04 -3.22965309e-01  5.01962176e+02]
New Q values:  [-8.31588891e+01  1.01449791e+04 -3.22965309e-01  5.01962176e+02]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[19914.88022781  3220.66258272   790.72804752  5081.71055317]
------
Step:22, Action:North
State  210
Old Q Values:  [19914.88022781  3220.66258272   790.72804752  5081.71055317]
New Q values:  [11008.84580785  3220.66258272   790.72804752  5081.71055317]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01  1.01449791e+04 -3.22965309e-01  5.01962176e+02]
------
Step:23, Action:South
State  138
Old Q Values:  [-8.31588891e+01  1.01449791e+04 -3.22965309e-01  5.01962176e+02]
New Q values:  [-8.31588891e+01  7.36004536e+03 -3.22965309e-01  5.01962176e+02]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11008.84580785  3220.66258272   790.72804752  5081.71055317]
------
Step:24, Action:North
State  210
Old Q Values:  [11008.84580785  3220.66258272   790.72804752  5081.71055317]
New Q values:  [6610.95193254 3220.66258272  790.72804752 5081.71055317]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01  7.36004536e+03 -3.22965309e-01  5.01962176e+02]
------
Step:25, Action:South
State  138
Old Q Values:  [-8.31588891e+01  7.36004536e+03 -3.22965309e-01  5.01962176e+02]
New Q values:  [-8.31588891e+01  4.92670373e+03 -3.22965309e-01  5.01962176e+02]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6610.95193254 3220.66258272  790.72804752 5081.71055317]
------
Step:26, Action:North
State  210
Old Q Values:  [6610.95193254 3220.66258272  790.72804752 5081.71055317]
New Q values:  [4121.7918907  3220.66258272  790.72804752 5081.71055317]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[-8.31588891e+01  4.92670373e+03 -3.22965309e-01  5.01962176e+02]
------
Step:27, Action:South
State  138
Old Q Values:  [-8.31588891e+01  4.92670373e+03 -3.22965309e-01  5.01962176e+02]
New Q values:  [-8.31588891e+01  3.49459466e+03 -3.22965309e-01  5.01962176e+02]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4121.7918907  3220.66258272  790.72804752 5081.71055317]
------
Step:28, Action:West
State  210
Old Q Values:  [4121.7918907  3220.66258272  790.72804752 5081.71055317]
New Q values:  [ 4121.7918907   3220.66258272   790.72804752 63802.95751911]
Reward: 100009  Episode Reward:  100042
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14046.31458419 -6396.61506955 -5588.09647059 15340.37896001]
------
Step:1, Action:West
State  288
Old Q Values:  [14046.31458419 -6396.61506955 -5588.09647059 15340.37896001]
New Q values:  [14046.31458419 -6396.61506955 -5588.09647059  8369.6248799 ]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  5537.92800948  7426.91098632]
------
Step:2, Action:West
State  273
Old Q Values:  [5187.97593769 1113.04903879 8059.83668471 4605.13029713]
New Q values:  [5187.97593769 1113.04903879 8059.83668471 2362.42458356]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1716.57488234   26.73544252  274.56801194  240.60751196]
------
Step:3, Action:North
State  261
Old Q Values:  [1716.57488234   26.73544252  274.56801194  240.60751196]
New Q values:  [887.91709299  26.73544252 274.56801194 240.60751196]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  316.54182229   652.95713351 -4430.22327233   262.76946019]
------
Step:4, Action:South
State  181
Old Q Values:  [  316.54182229   652.95713351 -4430.22327233   262.76946019]
New Q values:  [  316.54182229   526.9579813  -4430.22327233   262.76946019]
Reward: -1  Episode Reward:  26
xxxxx
x..gx
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[887.91709299  26.73544252 274.56801194 240.60751196]
------
Step:5, Action:North
State  261
Old Q Values:  [887.91709299  26.73544252 274.56801194 240.60751196]
New Q values:  [512.65423159  26.73544252 274.56801194 240.60751196]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  316.54182229   526.9579813  -4430.22327233   262.76946019]
------
Step:6, Action:South
State  181
Old Q Values:  [  316.54182229   526.9579813  -4430.22327233   262.76946019]
New Q values:  [  316.54182229   363.979462   -4430.22327233   262.76946019]
Reward: -1  Episode Reward:  24
xxxxx
x..gx
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[512.65423159  26.73544252 274.56801194 240.60751196]
------
Step:7, Action:North
State  261
Old Q Values:  [512.65423159  26.73544252 274.56801194 240.60751196]
New Q values:  [313.65553123  26.73544252 274.56801194 240.60751196]
Reward: -1  Episode Reward:  23
xxxxx
x.. x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  316.54182229   363.979462   -4430.22327233   262.76946019]
------
Step:8, Action:South
State  183
Old Q Values:  [ 751.35154484  519.8856133  3654.95331264 1554.80203889]
New Q values:  [ 751.35154484  301.45090469 3654.95331264 1554.80203889]
Reward: -1  Episode Reward:  22
xxxxx
x.. x
x ..x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[313.65553123  26.73544252 274.56801194 240.60751196]
------
Step:9, Action:North
State  261
Old Q Values:  [313.65553123  26.73544252 274.56801194 240.60751196]
New Q values:  [234.05605109  26.73544252 274.56801194 240.60751196]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  316.54182229   363.979462   -4430.22327233   262.76946019]
------
Step:10, Action:South
State  181
Old Q Values:  [  316.54182229   363.979462   -4430.22327233   262.76946019]
New Q values:  [  316.54182229   227.36218838 -4430.22327233   262.76946019]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
x g.x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[234.05605109  26.73544252 274.56801194 240.60751196]
------
Step:11, Action:East
State  260
Old Q Values:  [-3085.53741883 -5704.51612281  3615.21297554 -5679.36893145]
New Q values:  [-3085.53741883 -5704.51612281  3673.55848611 -5679.36893145]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  5537.92800948  7426.91098632]
------
Step:12, Action:West
State  272
Old Q Values:  [ 3845.01082957 -8521.23367799  5537.92800948  7426.91098632]
New Q values:  [ 3845.01082957 -8521.23367799  5537.92800948  4072.23194036]
Reward: -1  Episode Reward:  18
xxxxx
xg. x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-3085.53741883 -5704.51612281  3673.55848611 -5679.36893145]
------
Step:13, Action:East
State  260
Old Q Values:  [-3085.53741883 -5704.51612281  3673.55848611 -5679.36893145]
New Q values:  [-3085.53741883 -5704.51612281  3130.20179729 -5679.36893145]
Reward: -1  Episode Reward:  17
xxxxx
xg. x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  5537.92800948  4072.23194036]
------
Step:14, Action:East
State  272
Old Q Values:  [ 3845.01082957 -8521.23367799  5537.92800948  4072.23194036]
New Q values:  [ 3845.01082957 -8521.23367799  6428.46557905  4072.23194036]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14046.31458419 -6396.61506955 -5588.09647059  8369.6248799 ]
------
Step:15, Action:North
State  288
Old Q Values:  [14046.31458419 -6396.61506955 -5588.09647059  8369.6248799 ]
New Q values:  [ 8611.62865736 -6396.61506955 -5588.09647059  8369.6248799 ]
Reward: 9  Episode Reward:  25
xxxxx
xg. x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9959.00941228  6475.82859082 -4584.50430574   304.77863761]
------
Step:16, Action:North
State  208
Old Q Values:  [ 9959.00941228  6475.82859082 -4584.50430574   304.77863761]
New Q values:  [26530.86659891  6475.82859082 -4584.50430574   304.77863761]
Reward: -1  Episode Reward:  24
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[35541.05618371 13205.05469457  -180.00807518 75159.54277999]
------
Step:17, Action:West
State  136
Old Q Values:  [-2129.37064562  7863.65308892   660.86649319  -351.24219179]
New Q values:  [-2129.37064562  7863.65308892   660.86649319   563.20615877]
Reward: 9  Episode Reward:  33
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2327.67678494    65.73076831]
------
Step:18, Action:East
State  112
Old Q Values:  [     0.           3629.92591876  14807.19068057 110949.76306292]
New Q values:  [     0.           3629.92591876  27196.16909557 110949.76306292]
Reward: -1  Episode Reward:  32
xxxxx
x.gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[32116.70833183 15794.84178348 -8652.84       70912.97607779]
------
Step:19, Action:North
State  130
Old Q Values:  [35541.05618371 13205.05469457  -180.00807518 75159.54277999]
New Q values:  [36583.68530748 13205.05469457  -180.00807518 75159.54277999]
Reward: -301  Episode Reward:  -269
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36583.68530748 13205.05469457  -180.00807518 75159.54277999]
------
Step:20, Action:West
State  130
Old Q Values:  [36583.68530748 13205.05469457  -180.00807518 75159.54277999]
New Q values:  [36583.68530748 13205.05469457  -180.00807518 68695.29323938]
Reward: -1  Episode Reward:  -270
xxxxx
x.a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 128773.58709129]
------
Step:21, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   32608.63878402 128773.58709129]
New Q values:  [ -180.6         3557.6642036  32608.63878402 69198.38629787]
Reward: 9  Episode Reward:  -261
xxxxx
xa  x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SE
[    0.         43570.05483897 58945.17153785     0.        ]
------
Step:22, Action:East
State  110
Old Q Values:  [-239.29051573 3649.59981836 1138.07137075 -180.6       ]
New Q values:  [-239.29051573 3649.59981836 1540.97816408 -180.6       ]
Reward: -1  Episode Reward:  -262
xxxxx
x a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3621.16538593   632.53805907]
------
Step:23, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3621.16538593   632.53805907]
New Q values:  [ -281.736      -1150.91067548  2496.24455123   632.53805907]
Reward: -1  Episode Reward:  -263
xxxxx
x  ax
x . x
xg  x
xxxxx
Step:24, Action:North
State  138
Old Q Values:  [-8.31588891e+01  3.49459466e+03 -3.22965309e-01  5.01962176e+02]
New Q values:  [ 8.34514841e+02  3.49459466e+03 -3.22965309e-01  5.01962176e+02]
Reward: -301  Episode Reward:  -564
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.34514841e+02  3.49459466e+03 -3.22965309e-01  5.01962176e+02]
------
Step:25, Action:South
State  138
Old Q Values:  [ 8.34514841e+02  3.49459466e+03 -3.22965309e-01  5.01962176e+02]
New Q values:  [ 8.34514841e+02  2.05381251e+04 -3.22965309e-01  5.01962176e+02]
Reward: -1  Episode Reward:  -565
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4121.7918907   3220.66258272   790.72804752 63802.95751911]
------
Step:26, Action:West
State  208
Old Q Values:  [26530.86659891  6475.82859082 -4584.50430574   304.77863761]
New Q values:  [26530.86659891  6475.82859082 -4584.50430574 65131.36263335]
Reward: 100009  Episode Reward:  99444
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[234.05605109  26.73544252 274.56801194 240.60751196]
------
Step:1, Action:East
State  261
Old Q Values:  [234.05605109  26.73544252 274.56801194 240.60751196]
New Q values:  [  234.05605109    26.73544252 -3956.23312151   240.60751196]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. .x
x g.x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1632.98656698  403.77554135 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [221.30610858 725.62666846 238.35800069   0.        ]
New Q values:  [ 221.30610858 1392.13666117  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 751.35154484  301.45090469 3654.95331264 1554.80203889]
------
Step:2, Action:East
State  181
Old Q Values:  [  316.54182229   227.36218838 -4430.22327233   262.76946019]
New Q values:  [  316.54182229   227.36218838 -5795.61193968   262.76946019]
Reward: -9991  Episode Reward:  -9982
xxxxx
x ..x
x g.x
x.. x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8611.62865736 -6396.61506955 -5588.09647059  8369.6248799 ]
------
Step:1, Action:North
State  288
Old Q Values:  [ 8611.62865736 -6396.61506955 -5588.09647059  8369.6248799 ]
New Q values:  [22989.46025295 -6396.61506955 -5588.09647059  8369.6248799 ]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26530.86659891  6475.82859082 -4584.50430574 65131.36263335]
------
Step:2, Action:North
State  208
Old Q Values:  [26530.86659891  6475.82859082 -4584.50430574 65131.36263335]
New Q values:  [16779.18417503  6475.82859082 -4584.50430574 65131.36263335]
Reward: 9  Episode Reward:  18
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.34514841e+02  2.05381251e+04 -3.22965309e-01  5.01962176e+02]
------
Step:3, Action:North
State  138
Old Q Values:  [ 8.34514841e+02  2.05381251e+04 -3.22965309e-01  5.01962176e+02]
New Q values:  [ 6.31464347e+03  2.05381251e+04 -3.22965309e-01  5.01962176e+02]
Reward: -301  Episode Reward:  -283
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.31464347e+03  2.05381251e+04 -3.22965309e-01  5.01962176e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [ 6.31464347e+03  2.05381251e+04 -3.22965309e-01  5.01962176e+02]
New Q values:  [ 6.31464347e+03  2.73555373e+04 -3.22965309e-01  5.01962176e+02]
Reward: -1  Episode Reward:  -284
xxxxx
x.  x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4121.7918907   3220.66258272   790.72804752 63802.95751911]
------
Step:5, Action:West
State  210
Old Q Values:  [ 4121.7918907   3220.66258272   790.72804752 63802.95751911]
New Q values:  [ 4121.7918907   3220.66258272   790.72804752 27291.45630548]
Reward: 9  Episode Reward:  -275
xxxxx
x.  x
x.a x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  5.24488560e+03  5.88291099e+03  2.17939995e+03]
------
Step:6, Action:East
State  194
Old Q Values:  [-6.00000000e-01  5.24488560e+03  5.88291099e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  5.24488560e+03  1.05400013e+04  2.17939995e+03]
Reward: -1  Episode Reward:  -276
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4121.7918907   3220.66258272   790.72804752 27291.45630548]
------
Step:7, Action:West
State  208
Old Q Values:  [16779.18417503  6475.82859082 -4584.50430574 65131.36263335]
New Q values:  [16779.18417503  6475.82859082 -4584.50430574 25055.99623165]
Reward: -10001  Episode Reward:  -10277
xxxxx
x.  x
x.g x
x.. x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  6428.46557905  4072.23194036]
------
Step:1, Action:East
State  273
Old Q Values:  [5187.97593769 1113.04903879 8059.83668471 2362.42458356]
New Q values:  [ 5187.97593769  1113.04903879 10126.17274977  2362.42458356]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x...x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[22989.46025295 -6396.61506955 -5588.09647059  8369.6248799 ]
------
Step:2, Action:North
State  288
Old Q Values:  [22989.46025295 -6396.61506955 -5588.09647059  8369.6248799 ]
New Q values:  [16717.98297067 -6396.61506955 -5588.09647059  8369.6248799 ]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16779.18417503  6475.82859082 -4584.50430574 25055.99623165]
------
Step:3, Action:West
State  208
Old Q Values:  [16779.18417503  6475.82859082 -4584.50430574 25055.99623165]
New Q values:  [16779.18417503  6475.82859082 -4584.50430574  9031.84967097]
Reward: -9991  Episode Reward:  -9973
xxxxx
x. .x
x.g x
x.  x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  234.05605109    26.73544252 -3956.23312151   240.60751196]
------
Step:1, Action:West
State  261
Old Q Values:  [  234.05605109    26.73544252 -3956.23312151   240.60751196]
New Q values:  [  234.05605109    26.73544252 -3956.23312151   -12.17474163]
Reward: -301  Episode Reward:  -301
xxxxx
x...x
x. .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  234.05605109    26.73544252 -3956.23312151   -12.17474163]
------
Step:2, Action:North
State  261
Old Q Values:  [  234.05605109    26.73544252 -3956.23312151   -12.17474163]
New Q values:  [  193.98496712    26.73544252 -3956.23312151   -12.17474163]
Reward: 9  Episode Reward:  -292
xxxxx
x...x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  316.54182229   227.36218838 -5795.61193968   262.76946019]
------
Step:3, Action:North
State  181
Old Q Values:  [  316.54182229   227.36218838 -5795.61193968   262.76946019]
New Q values:  [  223.87584839   227.36218838 -5795.61193968   262.76946019]
Reward: 9  Episode Reward:  -283
xxxxx
xag.x
x  .x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   306.19706492 -2165.66138672  -180.6       ]
------
Step:4, Action:South
State  109
Old Q Values:  [ -241.10880094   306.19706492 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   200.70966403 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  -284
xxxxx
x .gx
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[  223.87584839   227.36218838 -5795.61193968   262.76946019]
------
Step:5, Action:West
State  181
Old Q Values:  [  223.87584839   227.36218838 -5795.61193968   262.76946019]
New Q values:  [ 2.23875848e+02  2.27362188e+02 -5.79561194e+03  3.33862213e+00]
Reward: -301  Episode Reward:  -585
xxxxx
x ..x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.23875848e+02  2.27362188e+02 -5.79561194e+03  3.33862213e+00]
------
Step:6, Action:South
State  181
Old Q Values:  [ 2.23875848e+02  2.27362188e+02 -5.79561194e+03  3.33862213e+00]
New Q values:  [ 2.23875848e+02  1.48540365e+02 -5.79561194e+03  3.33862213e+00]
Reward: -1  Episode Reward:  -586
xxxxx
x .gx
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  193.98496712    26.73544252 -3956.23312151   -12.17474163]
------
Step:7, Action:North
State  261
Old Q Values:  [  193.98496712    26.73544252 -3956.23312151   -12.17474163]
New Q values:  [  144.15674137    26.73544252 -3956.23312151   -12.17474163]
Reward: -1  Episode Reward:  -587
xxxxx
x ..x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.23875848e+02  1.48540365e+02 -5.79561194e+03  3.33862213e+00]
------
Step:8, Action:North
State  183
Old Q Values:  [ 751.35154484  301.45090469 3654.95331264 1554.80203889]
New Q values:  [ 717.58161629  301.45090469 3654.95331264 1554.80203889]
Reward: -1  Episode Reward:  -588
xxxxx
xa..x
x  .x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1392.13666117  238.35800069    0.        ]
------
Step:9, Action:South
State  103
Old Q Values:  [ 221.30610858 1392.13666117  238.35800069    0.        ]
New Q values:  [ 221.30610858 1652.74065826  238.35800069    0.        ]
Reward: -1  Episode Reward:  -589
xxxxx
x ..x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 717.58161629  301.45090469 3654.95331264 1554.80203889]
------
Step:10, Action:East
State  181
Old Q Values:  [ 2.23875848e+02  1.48540365e+02 -5.79561194e+03  3.33862213e+00]
New Q values:  [ 2.23875848e+02  1.48540365e+02 -6.34776741e+03  3.33862213e+00]
Reward: -10001  Episode Reward:  -10590
xxxxx
x ..x
x g.x
x ..x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.66801706e+04 4.48001054e+03 2.45392999e+03]
------
Step:1, Action:South
State  196
Old Q Values:  [-2469.90645144  5317.60120416  6570.2578975    231.67262594]
New Q values:  [-2469.90645144  3744.27947933  6570.2578975    231.67262594]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3241.70332626 -5807.06396197  5372.79665888  2075.00763365]
------
Step:2, Action:East
State  272
Old Q Values:  [ 3845.01082957 -8521.23367799  6428.46557905  4072.23194036]
New Q values:  [ 3845.01082957 -8521.23367799  7592.18112282  4072.23194036]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16717.98297067 -6396.61506955 -5588.09647059  8369.6248799 ]
------
Step:3, Action:North
State  288
Old Q Values:  [16717.98297067 -6396.61506955 -5588.09647059  8369.6248799 ]
New Q values:  [11726.34844078 -6396.61506955 -5588.09647059  8369.6248799 ]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[16779.18417503  6475.82859082 -4584.50430574  9031.84967097]
------
Step:4, Action:North
State  208
Old Q Values:  [16779.18417503  6475.82859082 -4584.50430574  9031.84967097]
New Q values:  [ 9076.16959669  6475.82859082 -4584.50430574  9031.84967097]
Reward: 9  Episode Reward:  36
xxxxx
xg.ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  7863.65308892   660.86649319   563.20615877]
------
Step:5, Action:South
State  130
Old Q Values:  [36583.68530748 13205.05469457  -180.00807518 68695.29323938]
New Q values:  [36583.68530748  8004.27275684  -180.00807518 68695.29323938]
Reward: -1  Episode Reward:  35
xxxxx
x . x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9076.16959669  6475.82859082 -4584.50430574  9031.84967097]
------
Step:6, Action:North
State  210
Old Q Values:  [ 4121.7918907   3220.66258272   790.72804752 27291.45630548]
New Q values:  [22256.7047281   3220.66258272   790.72804752 27291.45630548]
Reward: -1  Episode Reward:  34
xxxxx
x .ax
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36583.68530748  8004.27275684  -180.00807518 68695.29323938]
------
Step:7, Action:West
State  138
Old Q Values:  [ 6.31464347e+03  2.73555373e+04 -3.22965309e-01  5.01962176e+02]
New Q values:  [ 6.31464347e+03  2.73555373e+04 -3.22965309e-01  9.55058236e+02]
Reward: 9  Episode Reward:  43
xxxxx
x a x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2496.24455123   632.53805907]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2496.24455123   632.53805907]
New Q values:  [ -281.736      -1150.91067548  9204.5590114    632.53805907]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.31464347e+03  2.73555373e+04 -3.22965309e-01  9.55058236e+02]
------
Step:9, Action:South
State  138
Old Q Values:  [ 6.31464347e+03  2.73555373e+04 -3.22965309e-01  9.55058236e+02]
New Q values:  [ 6.31464347e+03  1.30404793e+04 -3.22965309e-01  9.55058236e+02]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1191.26218595  6996.21455978 -8220.10378799  2122.55739604]
------
Step:10, Action:South
State  210
Old Q Values:  [22256.7047281   3220.66258272   790.72804752 27291.45630548]
New Q values:  [22256.7047281   4805.56956532   790.72804752 27291.45630548]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x.  x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11726.34844078 -6396.61506955 -5588.09647059  8369.6248799 ]
------
Step:11, Action:North
State  288
Old Q Values:  [11726.34844078 -6396.61506955 -5588.09647059  8369.6248799 ]
New Q values:  [ 6788.80374425 -6396.61506955 -5588.09647059  8369.6248799 ]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1191.26218595  6996.21455978 -8220.10378799  2122.55739604]
------
Step:12, Action:South
State  208
Old Q Values:  [ 9076.16959669  6475.82859082 -4584.50430574  9031.84967097]
New Q values:  [ 9076.16959669  5100.6189003  -4584.50430574  9031.84967097]
Reward: -1  Episode Reward:  38
xxxxx
xg  x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6788.80374425 -6396.61506955 -5588.09647059  8369.6248799 ]
------
Step:13, Action:West
State  288
Old Q Values:  [ 6788.80374425 -6396.61506955 -5588.09647059  8369.6248799 ]
New Q values:  [ 6788.80374425 -6396.61506955 -5588.09647059  5624.90428881]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  7592.18112282  4072.23194036]
------
Step:14, Action:East
State  272
Old Q Values:  [ 3845.01082957 -8521.23367799  7592.18112282  4072.23194036]
New Q values:  [ 3845.01082957 -8521.23367799  5072.9135724   4072.23194036]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6788.80374425 -6396.61506955 -5588.09647059  5624.90428881]
------
Step:15, Action:North
State  288
Old Q Values:  [ 6788.80374425 -6396.61506955 -5588.09647059  5624.90428881]
New Q values:  [ 5437.7723767  -6396.61506955 -5588.09647059  5624.90428881]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9076.16959669  5100.6189003  -4584.50430574  9031.84967097]
------
Step:16, Action:North
State  208
Old Q Values:  [ 9076.16959669  5100.6189003  -4584.50430574  9031.84967097]
New Q values:  [ 7542.01162542  5100.6189003  -4584.50430574  9031.84967097]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.31464347e+03  1.30404793e+04 -3.22965309e-01  9.55058236e+02]
------
Step:17, Action:South
State  136
Old Q Values:  [-2129.37064562  7863.65308892   660.86649319   563.20615877]
New Q values:  [-2129.37064562  5854.41613686   660.86649319   563.20615877]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7542.01162542  5100.6189003  -4584.50430574  9031.84967097]
------
Step:18, Action:West
State  208
Old Q Values:  [ 7542.01162542  5100.6189003  -4584.50430574  9031.84967097]
New Q values:  [ 7542.01162542  5100.6189003  -4584.50430574  6785.93893177]
Reward: -1  Episode Reward:  32
xxxxx
x  gx
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 10579.33021128  -222.69561341   767.35890262]
------
Step:19, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.66801706e+04 4.48001054e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 8.19334231e+03 4.48001054e+03 2.45392999e+03]
Reward: -1  Episode Reward:  31
xxxxx
x g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  5072.9135724   4072.23194036]
------
Step:20, Action:East
State  273
Old Q Values:  [ 5187.97593769  1113.04903879 10126.17274977  2362.42458356]
New Q values:  [5187.97593769 1113.04903879 5737.34038655 2362.42458356]
Reward: -1  Episode Reward:  30
xxxxx
x  gx
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5437.7723767  -6396.61506955 -5588.09647059  5624.90428881]
------
Step:21, Action:West
State  288
Old Q Values:  [ 5437.7723767  -6396.61506955 -5588.09647059  5624.90428881]
New Q values:  [ 5437.7723767  -6396.61506955 -5588.09647059  3771.23578724]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  5072.9135724   4072.23194036]
------
Step:22, Action:East
State  273
Old Q Values:  [5187.97593769 1113.04903879 5737.34038655 2362.42458356]
New Q values:  [5187.97593769 1113.04903879 3925.66786763 2362.42458356]
Reward: -1  Episode Reward:  28
xxxxx
x  gx
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5437.7723767  -6396.61506955 -5588.09647059  3771.23578724]
------
Step:23, Action:North
State  288
Old Q Values:  [ 5437.7723767  -6396.61506955 -5588.09647059  3771.23578724]
New Q values:  [-1562.88756169 -6396.61506955 -5588.09647059  3771.23578724]
Reward: -10001  Episode Reward:  -9973
xxxxx
x   x
x. gx
x.  x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.31464347e+03  1.30404793e+04 -3.22965309e-01  9.55058236e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 6.31464347e+03  1.30404793e+04 -3.22965309e-01  9.55058236e+02]
New Q values:  [ 6.31464347e+03  1.34090286e+04 -3.22965309e-01  9.55058236e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[22256.7047281   4805.56956532   790.72804752 27291.45630548]
------
Step:2, Action:West
State  210
Old Q Values:  [22256.7047281   4805.56956532   790.72804752 27291.45630548]
New Q values:  [22256.7047281   4805.56956532   790.72804752 16620.720916  ]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x.a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 15377.4345861  19015.79464603  1169.39963074]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  5.24488560e+03  1.05400013e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  5.24488560e+03  1.08924119e+04  2.17939995e+03]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[22256.7047281   4805.56956532   790.72804752 16620.720916  ]
------
Step:4, Action:North
State  210
Old Q Values:  [22256.7047281   4805.56956532   790.72804752 16620.720916  ]
New Q values:  [12924.79047343  4805.56956532   790.72804752 16620.720916  ]
Reward: -1  Episode Reward:  6
xxxxx
x..ax
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.31464347e+03  1.34090286e+04 -3.22965309e-01  9.55058236e+02]
------
Step:5, Action:South
State  138
Old Q Values:  [ 6.31464347e+03  1.34090286e+04 -3.22965309e-01  9.55058236e+02]
New Q values:  [ 6.31464347e+03  1.03492277e+04 -3.22965309e-01  9.55058236e+02]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[12924.79047343  4805.56956532   790.72804752 16620.720916  ]
------
Step:6, Action:West
State  210
Old Q Values:  [12924.79047343  4805.56956532   790.72804752 16620.720916  ]
New Q values:  [12924.79047343  4805.56956532   790.72804752 12352.42676021]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
x.a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 15377.4345861  19015.79464603  1169.39963074]
------
Step:7, Action:East
State  193
Old Q Values:  [-5922.26708831 10579.33021128  -222.69561341   767.35890262]
New Q values:  [-5922.26708831 10579.33021128 -3827.07475774   767.35890262]
Reward: -10001  Episode Reward:  -9997
xxxxx
x.. x
x. gx
x...x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[12924.79047343  4805.56956532   790.72804752 12352.42676021]
------
Step:1, Action:North
State  208
Old Q Values:  [ 7542.01162542  5100.6189003  -4584.50430574  6785.93893177]
New Q values:  [ 6126.97296548  5100.6189003  -4584.50430574  6785.93893177]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.31464347e+03  1.03492277e+04 -3.22965309e-01  9.55058236e+02]
------
Step:2, Action:North
State  138
Old Q Values:  [ 6.31464347e+03  1.03492277e+04 -3.22965309e-01  9.55058236e+02]
New Q values:  [ 5.45002570e+03  1.03492277e+04 -3.22965309e-01  9.55058236e+02]
Reward: -301  Episode Reward:  -292
xxxxx
x..ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.45002570e+03  1.03492277e+04 -3.22965309e-01  9.55058236e+02]
------
Step:3, Action:South
State  136
Old Q Values:  [-2129.37064562  5854.41613686   660.86649319   563.20615877]
New Q values:  [-2129.37064562  4376.94813427   660.86649319   563.20615877]
Reward: -1  Episode Reward:  -293
xxxxx
x.g x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6126.97296548  5100.6189003  -4584.50430574  6785.93893177]
------
Step:4, Action:West
State  208
Old Q Values:  [ 6126.97296548  5100.6189003  -4584.50430574  6785.93893177]
New Q values:  [ 6126.97296548  5100.6189003  -4584.50430574  -822.22173445]
Reward: -9991  Episode Reward:  -10284
xxxxx
x.. x
x.g x
x.. x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  5072.9135724   4072.23194036]
------
Step:1, Action:East
State  272
Old Q Values:  [ 3845.01082957 -8521.23367799  5072.9135724   4072.23194036]
New Q values:  [ 3845.01082957 -8521.23367799  3165.93616513  4072.23194036]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1562.88756169 -6396.61506955 -5588.09647059  3771.23578724]
------
Step:2, Action:West
State  288
Old Q Values:  [-1562.88756169 -6396.61506955 -5588.09647059  3771.23578724]
New Q values:  [-1562.88756169 -6396.61506955 -5588.09647059  2729.56389701]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  3165.93616513  4072.23194036]
------
Step:3, Action:West
State  272
Old Q Values:  [ 3845.01082957 -8521.23367799  3165.93616513  4072.23194036]
New Q values:  [ 3845.01082957 -8521.23367799  3165.93616513 14081.54520312]
Reward: 9  Episode Reward:  17
xxxxx
x.g.x
x. .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41490.84142326 12764.58618105  3851.09595999  1875.31501677]
------
Step:4, Action:North
State  261
Old Q Values:  [  144.15674137    26.73544252 -3956.23312151   -12.17474163]
New Q values:  [  130.22545106    26.73544252 -3956.23312151   -12.17474163]
Reward: 9  Episode Reward:  26
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 2.23875848e+02  1.48540365e+02 -6.34776741e+03  3.33862213e+00]
------
Step:5, Action:North
State  181
Old Q Values:  [ 2.23875848e+02  1.48540365e+02 -6.34776741e+03  3.33862213e+00]
New Q values:  [ 5.90772537e+02  1.48540365e+02 -6.34776741e+03  3.33862213e+00]
Reward: 9  Episode Reward:  35
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1652.74065826  238.35800069    0.        ]
------
Step:6, Action:South
State  103
Old Q Values:  [ 221.30610858 1652.74065826  238.35800069    0.        ]
New Q values:  [ 221.30610858 1756.9822571   238.35800069    0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 717.58161629  301.45090469 3654.95331264 1554.80203889]
------
Step:7, Action:East
State  181
Old Q Values:  [ 5.90772537e+02  1.48540365e+02 -6.34776741e+03  3.33862213e+00]
New Q values:  [590.77253684 148.54036549 634.09210073   3.33862213]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 10579.33021128 -3827.07475774   767.35890262]
------
Step:8, Action:South
State  193
Old Q Values:  [-5922.26708831 10579.33021128 -3827.07475774   767.35890262]
New Q values:  [-5922.26708831  5787.52486582 -3827.07475774   767.35890262]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[5187.97593769 1113.04903879 3925.66786763 2362.42458356]
------
Step:9, Action:North
State  273
Old Q Values:  [5187.97593769 1113.04903879 3925.66786763 2362.42458356]
New Q values:  [3810.84783482 1113.04903879 3925.66786763 2362.42458356]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5787.52486582 -3827.07475774   767.35890262]
------
Step:10, Action:South
State  196
Old Q Values:  [-2469.90645144  3744.27947933  6570.2578975    231.67262594]
New Q values:  [-2469.90645144  3108.9507894   6570.2578975    231.67262594]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3241.70332626 -5807.06396197  5372.79665888  2075.00763365]
------
Step:11, Action:East
State  273
Old Q Values:  [3810.84783482 1113.04903879 3925.66786763 2362.42458356]
New Q values:  [3810.84783482 1113.04903879 2388.53631615 2362.42458356]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1562.88756169 -6396.61506955 -5588.09647059  2729.56389701]
------
Step:12, Action:West
State  288
Old Q Values:  [-1562.88756169 -6396.61506955 -5588.09647059  2729.56389701]
New Q values:  [-1562.88756169 -6396.61506955 -5588.09647059  2234.47990925]
Reward: -1  Episode Reward:  28
xxxxx
x .gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3810.84783482 1113.04903879 2388.53631615 2362.42458356]
------
Step:13, Action:North
State  273
Old Q Values:  [3810.84783482 1113.04903879 2388.53631615 2362.42458356]
New Q values:  [3259.99659367 1113.04903879 2388.53631615 2362.42458356]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5787.52486582 -3827.07475774   767.35890262]
------
Step:14, Action:South
State  196
Old Q Values:  [-2469.90645144  3108.9507894   6570.2578975    231.67262594]
New Q values:  [-2469.90645144  2854.81931342  6570.2578975    231.67262594]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3241.70332626 -5807.06396197  5372.79665888  2075.00763365]
------
Step:15, Action:East
State  273
Old Q Values:  [3259.99659367 1113.04903879 2388.53631615 2362.42458356]
New Q values:  [3259.99659367 1113.04903879 1625.15849924 2362.42458356]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1562.88756169 -6396.61506955 -5588.09647059  2234.47990925]
------
Step:16, Action:West
State  288
Old Q Values:  [-1562.88756169 -6396.61506955 -5588.09647059  2234.47990925]
New Q values:  [-1562.88756169 -6396.61506955 -5588.09647059  1850.43738738]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.19081808e+03 1.46883995e+03]
------
Step:17, Action:West
State  276
Old Q Values:  [ 3241.70332626 -5807.06396197  5372.79665888  2075.00763365]
New Q values:  [ 3241.70332626 -5807.06396197  5372.79665888   868.47068878]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[  130.22545106    26.73544252 -3956.23312151   -12.17474163]
------
Step:18, Action:North
State  261
Old Q Values:  [  130.22545106    26.73544252 -3956.23312151   -12.17474163]
New Q values:  [ 1147.97617422    26.73544252 -3956.23312151   -12.17474163]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 717.58161629  301.45090469 3654.95331264 1554.80203889]
------
Step:19, Action:East
State  181
Old Q Values:  [590.77253684 148.54036549 634.09210073   3.33862213]
New Q values:  [ 590.77253684  148.54036549 1989.29430004    3.33862213]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  5787.52486582 -3827.07475774   767.35890262]
------
Step:20, Action:South
State  196
Old Q Values:  [-2469.90645144  2854.81931342  6570.2578975    231.67262594]
New Q values:  [-2469.90645144  2753.16672303  6570.2578975    231.67262594]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3241.70332626 -5807.06396197  5372.79665888   868.47068878]
------
Step:21, Action:East
State  276
Old Q Values:  [ 3241.70332626 -5807.06396197  5372.79665888   868.47068878]
New Q values:  [ 3241.70332626 -5807.06396197  2703.64987977   868.47068878]
Reward: -1  Episode Reward:  19
xxxxx
x ..x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-1562.88756169 -6396.61506955 -5588.09647059  1850.43738738]
------
Step:22, Action:North
State  288
Old Q Values:  [-1562.88756169 -6396.61506955 -5588.09647059  1850.43738738]
New Q values:  [ 3257.68211735 -6396.61506955 -5588.09647059  1850.43738738]
Reward: 9  Episode Reward:  28
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[12924.79047343  4805.56956532   790.72804752 12352.42676021]
------
Step:23, Action:North
State  208
Old Q Values:  [ 6126.97296548  5100.6189003  -4584.50430574  -822.22173445]
New Q values:  [23064.77715801  5100.6189003  -4584.50430574  -822.22173445]
Reward: 9  Episode Reward:  37
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36583.68530748  8004.27275684  -180.00807518 68695.29323938]
------
Step:24, Action:West
State  128
Old Q Values:  [32116.70833183 15794.84178348 -8652.84       70912.97607779]
New Q values:  [ 32116.70833183  15794.84178348  -8652.84       121655.51934999]
Reward: 100009  Episode Reward:  100046
xxxxx
xga x
x   x
x   x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  4376.94813427   660.86649319   563.20615877]
------
Step:1, Action:South
State  138
Old Q Values:  [ 5.45002570e+03  1.03492277e+04 -3.22965309e-01  9.55058236e+02]
New Q values:  [ 5.45002570e+03  6.24395546e+03 -3.22965309e-01  9.55058236e+02]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1191.26218595  6996.21455978 -8220.10378799  2122.55739604]
------
Step:2, Action:South
State  208
Old Q Values:  [23064.77715801  5100.6189003  -4584.50430574  -822.22173445]
New Q values:  [23064.77715801  3022.95219532 -4584.50430574  -822.22173445]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3257.68211735 -6396.61506955 -5588.09647059  1850.43738738]
------
Step:3, Action:North
State  288
Old Q Values:  [ 3257.68211735 -6396.61506955 -5588.09647059  1850.43738738]
New Q values:  [ 8221.90599434 -6396.61506955 -5588.09647059  1850.43738738]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23064.77715801  3022.95219532 -4584.50430574  -822.22173445]
------
Step:4, Action:North
State  208
Old Q Values:  [23064.77715801  3022.95219532 -4584.50430574  -822.22173445]
New Q values:  [11098.49749971  3022.95219532 -4584.50430574  -822.22173445]
Reward: -1  Episode Reward:  16
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.45002570e+03  6.24395546e+03 -3.22965309e-01  9.55058236e+02]
------
Step:5, Action:South
State  138
Old Q Values:  [ 5.45002570e+03  6.24395546e+03 -3.22965309e-01  9.55058236e+02]
New Q values:  [ 5.45002570e+03  5.82653143e+03 -3.22965309e-01  9.55058236e+02]
Reward: -1  Episode Reward:  15
xxxxx
x . x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11098.49749971  3022.95219532 -4584.50430574  -822.22173445]
------
Step:6, Action:North
State  210
Old Q Values:  [12924.79047343  4805.56956532   790.72804752 12352.42676021]
New Q values:  [ 6917.27561895  4805.56956532   790.72804752 12352.42676021]
Reward: -1  Episode Reward:  14
xxxxx
x .ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.45002570e+03  5.82653143e+03 -3.22965309e-01  9.55058236e+02]
------
Step:7, Action:South
State  138
Old Q Values:  [ 5.45002570e+03  5.82653143e+03 -3.22965309e-01  9.55058236e+02]
New Q values:  [ 5.45002570e+03  6.03574060e+03 -3.22965309e-01  9.55058236e+02]
Reward: -1  Episode Reward:  13
xxxxx
x . x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 6917.27561895  4805.56956532   790.72804752 12352.42676021]
------
Step:8, Action:West
State  210
Old Q Values:  [ 6917.27561895  4805.56956532   790.72804752 12352.42676021]
New Q values:  [6917.27561895 4805.56956532  790.72804752 8214.09428426]
Reward: 9  Episode Reward:  22
xxxxx
x . x
x.a x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  5.24488560e+03  1.08924119e+04  2.17939995e+03]
------
Step:9, Action:East
State  200
Old Q Values:  [ 169.9257398  3934.68362957 2064.67334366  568.38654082]
New Q values:  [ 169.9257398  3934.68362957 2924.1337054   568.38654082]
Reward: -1  Episode Reward:  21
xxxxx
x . x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1191.26218595  6996.21455978 -8220.10378799  2122.55739604]
------
Step:10, Action:South
State  210
Old Q Values:  [6917.27561895 4805.56956532  790.72804752 8214.09428426]
New Q values:  [6917.27561895 4388.19962443  790.72804752 8214.09428426]
Reward: -1  Episode Reward:  20
xxxxx
x . x
x.  x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8221.90599434 -6396.61506955 -5588.09647059  1850.43738738]
------
Step:11, Action:North
State  288
Old Q Values:  [ 8221.90599434 -6396.61506955 -5588.09647059  1850.43738738]
New Q values:  [ 5752.39068302 -6396.61506955 -5588.09647059  1850.43738738]
Reward: -1  Episode Reward:  19
xxxxx
x . x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6917.27561895 4388.19962443  790.72804752 8214.09428426]
------
Step:12, Action:West
State  210
Old Q Values:  [6917.27561895 4388.19962443  790.72804752 8214.09428426]
New Q values:  [6917.27561895 4388.19962443  790.72804752 6552.76129389]
Reward: -1  Episode Reward:  18
xxxxx
x . x
x.a x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  5.24488560e+03  1.08924119e+04  2.17939995e+03]
------
Step:13, Action:East
State  194
Old Q Values:  [-6.00000000e-01  5.24488560e+03  1.08924119e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  5.24488560e+03  6.43154746e+03  2.17939995e+03]
Reward: -1  Episode Reward:  17
xxxxx
x . x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6917.27561895 4388.19962443  790.72804752 6552.76129389]
------
Step:14, Action:North
State  208
Old Q Values:  [11098.49749971  3022.95219532 -4584.50430574  -822.22173445]
New Q values:  [ 6249.52118013  3022.95219532 -4584.50430574  -822.22173445]
Reward: -1  Episode Reward:  16
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.45002570e+03  6.03574060e+03 -3.22965309e-01  9.55058236e+02]
------
Step:15, Action:South
State  136
Old Q Values:  [-2129.37064562  4376.94813427   660.86649319   563.20615877]
New Q values:  [-2129.37064562  3849.04362164   660.86649319   563.20615877]
Reward: -1  Episode Reward:  15
xxxxx
x g x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1191.26218595  6996.21455978 -8220.10378799  2122.55739604]
------
Step:16, Action:South
State  208
Old Q Values:  [ 6249.52118013  3022.95219532 -4584.50430574  -822.22173445]
New Q values:  [ 6249.52118013  2934.29808303 -4584.50430574  -822.22173445]
Reward: -1  Episode Reward:  14
xxxxx
xg. x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5752.39068302 -6396.61506955 -5588.09647059  1850.43738738]
------
Step:17, Action:North
State  288
Old Q Values:  [ 5752.39068302 -6396.61506955 -5588.09647059  1850.43738738]
New Q values:  [ 4399.22064114 -6396.61506955 -5588.09647059  1850.43738738]
Reward: -1  Episode Reward:  13
xxxxx
x g x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1191.26218595  6996.21455978 -8220.10378799  2122.55739604]
------
Step:18, Action:South
State  208
Old Q Values:  [ 6249.52118013  2934.29808303 -4584.50430574  -822.22173445]
New Q values:  [ 6249.52118013  2492.88542556 -4584.50430574  -822.22173445]
Reward: -1  Episode Reward:  12
xxxxx
xg. x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4399.22064114 -6396.61506955 -5588.09647059  1850.43738738]
------
Step:19, Action:North
State  288
Old Q Values:  [ 4399.22064114 -6396.61506955 -5588.09647059  1850.43738738]
New Q values:  [ 3857.95262439 -6396.61506955 -5588.09647059  1850.43738738]
Reward: -1  Episode Reward:  11
xxxxx
x . x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1191.26218595  6996.21455978 -8220.10378799  2122.55739604]
------
Step:20, Action:South
State  208
Old Q Values:  [ 6249.52118013  2492.88542556 -4584.50430574  -822.22173445]
New Q values:  [ 6249.52118013  2153.93995754 -4584.50430574  -822.22173445]
Reward: -1  Episode Reward:  10
xxxxx
xg. x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3857.95262439 -6396.61506955 -5588.09647059  1850.43738738]
------
Step:21, Action:North
State  288
Old Q Values:  [ 3857.95262439 -6396.61506955 -5588.09647059  1850.43738738]
New Q values:  [ 3641.44541769 -6396.61506955 -5588.09647059  1850.43738738]
Reward: -1  Episode Reward:  9
xxxxx
x . x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1191.26218595  6996.21455978 -8220.10378799  2122.55739604]
------
Step:22, Action:South
State  208
Old Q Values:  [ 6249.52118013  2153.93995754 -4584.50430574  -822.22173445]
New Q values:  [ 6249.52118013  1953.40960832 -4584.50430574  -822.22173445]
Reward: -1  Episode Reward:  8
xxxxx
x . x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3641.44541769 -6396.61506955 -5588.09647059  1850.43738738]
------
Step:23, Action:North
State  288
Old Q Values:  [ 3641.44541769 -6396.61506955 -5588.09647059  1850.43738738]
New Q values:  [-2669.16547888 -6396.61506955 -5588.09647059  1850.43738738]
Reward: -10001  Episode Reward:  -9993
xxxxx
x . x
x. gx
x.. x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   200.70966403 -2165.66138672  -180.6       ]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094   200.70966403 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   682.47215562 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x  gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 590.77253684  148.54036549 1989.29430004    3.33862213]
------
Step:2, Action:East
State  189
Old Q Values:  [ 337.36081627  489.19212398 1933.89334095  154.04646645]
New Q values:  [ 337.36081627  489.19212398 1902.37198857  154.04646645]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 3744.71550729 -789.02220255 1598.29363255]
------
Step:3, Action:South
State  196
Old Q Values:  [-2469.90645144  2753.16672303  6570.2578975    231.67262594]
New Q values:  [-2469.90645144  2079.17768709  6570.2578975    231.67262594]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3241.70332626 -5807.06396197  2703.64987977   868.47068878]
------
Step:4, Action:East
State  276
Old Q Values:  [ 3241.70332626 -5807.06396197  2703.64987977   868.47068878]
New Q values:  [ 3241.70332626 -5807.06396197  1641.99116812   868.47068878]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2669.16547888 -6396.61506955 -5588.09647059  1850.43738738]
------
Step:5, Action:West
State  288
Old Q Values:  [-2669.16547888 -6396.61506955 -5588.09647059  1850.43738738]
New Q values:  [-2669.16547888 -6396.61506955 -5588.09647059  1712.08595283]
Reward: -1  Episode Reward:  35
xxxxx
xg .x
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3241.70332626 -5807.06396197  1641.99116812   868.47068878]
------
Step:6, Action:North
State  276
Old Q Values:  [ 3241.70332626 -5807.06396197  1641.99116812   868.47068878]
New Q values:  [ 3267.15869975 -5807.06396197  1641.99116812   868.47068878]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
xga.x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  2079.17768709  6570.2578975    231.67262594]
------
Step:7, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -2.00610230e+02  5.46892503e+03  0.00000000e+00]
New Q values:  [-2.7887208e-01 -2.0061023e+02  4.2681527e+03  0.0000000e+00]
Reward: 9  Episode Reward:  43
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6917.27561895 4388.19962443  790.72804752 6552.76129389]
------
Step:8, Action:North
State  218
Old Q Values:  [ 463.45720114 2755.30078316    0.         2598.71043955]
New Q values:  [2001.50506071 2755.30078316    0.         2598.71043955]
Reward: 9  Episode Reward:  52
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.45002570e+03  6.03574060e+03 -3.22965309e-01  9.55058236e+02]
------
Step:9, Action:South
State  130
Old Q Values:  [36583.68530748  8004.27275684  -180.00807518 68695.29323938]
New Q values:  [36583.68530748  5276.29178842  -180.00807518 68695.29323938]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6917.27561895 4388.19962443  790.72804752 6552.76129389]
------
Step:10, Action:North
State  216
Old Q Values:  [ 1191.26218595  6996.21455978 -8220.10378799  2122.55739604]
New Q values:  [ 2286.62705463  6996.21455978 -8220.10378799  2122.55739604]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.45002570e+03  6.03574060e+03 -3.22965309e-01  9.55058236e+02]
------
Step:11, Action:South
State  136
Old Q Values:  [-2129.37064562  3849.04362164   660.86649319   563.20615877]
New Q values:  [-2129.37064562  3637.88181659   660.86649319   563.20615877]
Reward: -1  Episode Reward:  49
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2286.62705463  6996.21455978 -8220.10378799  2122.55739604]
------
Step:12, Action:South
State  216
Old Q Values:  [ 2286.62705463  6996.21455978 -8220.10378799  2122.55739604]
New Q values:  [ 2286.62705463  3311.51160976 -8220.10378799  2122.55739604]
Reward: -1  Episode Reward:  48
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2669.16547888 -6396.61506955 -5588.09647059  1712.08595283]
------
Step:13, Action:West
State  288
Old Q Values:  [-2669.16547888 -6396.61506955 -5588.09647059  1712.08595283]
New Q values:  [-2669.16547888 -6396.61506955 -5588.09647059  1662.23335923]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3259.99659367 1113.04903879 1625.15849924 2362.42458356]
------
Step:14, Action:North
State  273
Old Q Values:  [3259.99659367 1113.04903879 1625.15849924 2362.42458356]
New Q values:  [2426.81328966 1113.04903879 1625.15849924 2362.42458356]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 3744.71550729 -789.02220255 1598.29363255]
------
Step:15, Action:South
State  201
Old Q Values:  [ 613.33320563 3744.71550729 -789.02220255 1598.29363255]
New Q values:  [ 613.33320563 2225.33018981 -789.02220255 1598.29363255]
Reward: -1  Episode Reward:  45
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2426.81328966 1113.04903879 1625.15849924 2362.42458356]
------
Step:16, Action:North
State  273
Old Q Values:  [2426.81328966 1113.04903879 1625.15849924 2362.42458356]
New Q values:  [1637.72437281 1113.04903879 1625.15849924 2362.42458356]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 2225.33018981 -789.02220255 1598.29363255]
------
Step:17, Action:South
State  201
Old Q Values:  [ 613.33320563 2225.33018981 -789.02220255 1598.29363255]
New Q values:  [ 613.33320563 1598.25945099 -789.02220255 1598.29363255]
Reward: -1  Episode Reward:  43
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1113.04903879 1625.15849924 2362.42458356]
------
Step:18, Action:West
State  272
Old Q Values:  [ 3845.01082957 -8521.23367799  3165.93616513 14081.54520312]
New Q values:  [ 3845.01082957 -8521.23367799  3165.93616513 78085.27050823]
Reward: 100009  Episode Reward:  100052
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.45002570e+03  6.03574060e+03 -3.22965309e-01  9.55058236e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 5.45002570e+03  6.03574060e+03 -3.22965309e-01  9.55058236e+02]
New Q values:  [ 5.45002570e+03  4.49487893e+03 -3.22965309e-01  9.55058236e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6917.27561895 4388.19962443  790.72804752 6552.76129389]
------
Step:2, Action:North
State  210
Old Q Values:  [6917.27561895 4388.19962443  790.72804752 6552.76129389]
New Q values:  [4401.31795881 4388.19962443  790.72804752 6552.76129389]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.45002570e+03  4.49487893e+03 -3.22965309e-01  9.55058236e+02]
------
Step:3, Action:North
State  138
Old Q Values:  [ 5.45002570e+03  4.49487893e+03 -3.22965309e-01  9.55058236e+02]
New Q values:  [ 3.63441799e+03  4.49487893e+03 -3.22965309e-01  9.55058236e+02]
Reward: -301  Episode Reward:  -293
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.63441799e+03  4.49487893e+03 -3.22965309e-01  9.55058236e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [ 3.63441799e+03  4.49487893e+03 -3.22965309e-01  9.55058236e+02]
New Q values:  [ 3.63441799e+03  3.76317996e+03 -3.22965309e-01  9.55058236e+02]
Reward: -1  Episode Reward:  -294
xxxxx
x.. x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4401.31795881 4388.19962443  790.72804752 6552.76129389]
------
Step:5, Action:West
State  208
Old Q Values:  [ 6249.52118013  1953.40960832 -4584.50430574  -822.22173445]
New Q values:  [ 6249.52118013  1953.40960832 -4584.50430574  2134.51399906]
Reward: 9  Episode Reward:  -285
xxxxx
x.. x
xga x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 8.19334231e+03 4.48001054e+03 2.45392999e+03]
------
Step:6, Action:South
State  194
Old Q Values:  [-6.00000000e-01  5.24488560e+03  6.43154746e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  2.55289354e+04  6.43154746e+03  2.17939995e+03]
Reward: 9  Episode Reward:  -276
xxxxx
x.. x
x.  x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3845.01082957 -8521.23367799  3165.93616513 78085.27050823]
------
Step:7, Action:North
State  272
Old Q Values:  [ 3845.01082957 -8521.23367799  3165.93616513 78085.27050823]
New Q values:  [ 3995.40702467 -8521.23367799  3165.93616513 78085.27050823]
Reward: -1  Episode Reward:  -277
xxxxx
x.. x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 8.19334231e+03 4.48001054e+03 2.45392999e+03]
------
Step:8, Action:South
State  192
Old Q Values:  [3.89777037e-01 8.19334231e+03 4.48001054e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 2.67023181e+04 4.48001054e+03 2.45392999e+03]
Reward: -1  Episode Reward:  -278
xxxxx
x.. x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  3165.93616513 78085.27050823]
------
Step:9, Action:West
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  3165.93616513 78085.27050823]
New Q values:  [ 3995.40702467 -8521.23367799  3165.93616513 43686.76063027]
Reward: 9  Episode Reward:  -269
xxxxx
x.g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41490.84142326 12764.58618105  3851.09595999  1875.31501677]
------
Step:10, Action:North
State  260
Old Q Values:  [-3085.53741883 -5704.51612281  3130.20179729 -5679.36893145]
New Q values:  [ 1085.31627907 -5704.51612281  3130.20179729 -5679.36893145]
Reward: 9  Episode Reward:  -260
xxxxx
xg. x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[  471.29065272  2441.21181368  7713.770822   -4966.32149798]
------
Step:11, Action:East
State  177
Old Q Values:  [88883.84400489  5488.43861631 27912.76462132     0.        ]
New Q values:  [88883.84400489  5488.43861631 19175.2012714      0.        ]
Reward: -1  Episode Reward:  -261
xxxxx
x.g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.67023181e+04 4.48001054e+03 2.45392999e+03]
------
Step:12, Action:South
State  193
Old Q Values:  [-5922.26708831  5787.52486582 -3827.07475774   767.35890262]
New Q values:  [-5922.26708831  3023.13732139 -3827.07475774   767.35890262]
Reward: -1  Episode Reward:  -262
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1113.04903879 1625.15849924 2362.42458356]
------
Step:13, Action:West
State  273
Old Q Values:  [1637.72437281 1113.04903879 1625.15849924 2362.42458356]
New Q values:  [ 1637.72437281  1113.04903879  1625.15849924 13391.6222604 ]
Reward: -1  Episode Reward:  -263
xxxxx
x.. x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41490.84142326 12764.58618105  3851.09595999  1875.31501677]
------
Step:14, Action:North
State  257
Old Q Values:  [41490.84142326 12764.58618105  3851.09595999  1875.31501677]
New Q values:  [43260.88977077 12764.58618105  3851.09595999  1875.31501677]
Reward: -1  Episode Reward:  -264
xxxxx
x.. x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[88883.84400489  5488.43861631 19175.2012714      0.        ]
------
Step:15, Action:North
State  181
Old Q Values:  [ 590.77253684  148.54036549 1989.29430004    3.33862213]
New Q values:  [ 768.80369186  148.54036549 1989.29430004    3.33862213]
Reward: 9  Episode Reward:  -255
xxxxx
xa. x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1756.9822571   238.35800069    0.        ]
------
Step:16, Action:South
State  103
Old Q Values:  [ 221.30610858 1756.9822571   238.35800069    0.        ]
New Q values:  [ 221.30610858 1798.67889663  238.35800069    0.        ]
Reward: -1  Episode Reward:  -256
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 717.58161629  301.45090469 3654.95331264 1554.80203889]
------
Step:17, Action:East
State  183
Old Q Values:  [ 717.58161629  301.45090469 3654.95331264 1554.80203889]
New Q values:  [ 717.58161629  301.45090469 9120.06194307 1554.80203889]
Reward: -1  Episode Reward:  -257
xxxxx
x . x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.55289354e+04  6.43154746e+03  2.17939995e+03]
------
Step:18, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.67023181e+04 4.48001054e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 2.67023181e+04 3.66626057e+03 2.45392999e+03]
Reward: -1  Episode Reward:  -258
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6249.52118013  1953.40960832 -4584.50430574  2134.51399906]
------
Step:19, Action:North
State  208
Old Q Values:  [ 6249.52118013  1953.40960832 -4584.50430574  2134.51399906]
New Q values:  [23107.79644387  1953.40960832 -4584.50430574  2134.51399906]
Reward: -1  Episode Reward:  -259
xxxxx
x .ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[36583.68530748  5276.29178842  -180.00807518 68695.29323938]
------
Step:20, Action:West
State  130
Old Q Values:  [36583.68530748  5276.29178842  -180.00807518 68695.29323938]
New Q values:  [ 36583.68530748   5276.29178842   -180.00807518 108243.03318512]
Reward: 100009  Episode Reward:  99750
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2669.16547888 -6396.61506955 -5588.09647059  1662.23335923]
------
Step:1, Action:West
State  288
Old Q Values:  [-2669.16547888 -6396.61506955 -5588.09647059  1662.23335923]
New Q values:  [-2669.16547888 -6396.61506955 -5588.09647059 13776.32153277]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  3165.93616513 43686.76063027]
------
Step:2, Action:West
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  3165.93616513 43686.76063027]
New Q values:  [ 3995.40702467 -8521.23367799  3165.93616513 12419.16479129]
Reward: -9991  Episode Reward:  -9982
xxxxx
x ..x
x...x
xg  x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4401.31795881 4388.19962443  790.72804752 6552.76129389]
------
Step:1, Action:West
State  210
Old Q Values:  [4401.31795881 4388.19962443  790.72804752 6552.76129389]
New Q values:  [ 4401.31795881  4388.19962443   790.72804752 10285.18513557]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.55289354e+04  6.43154746e+03  2.17939995e+03]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.55289354e+04  6.43154746e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  2.55289354e+04  5.65757452e+03  2.17939995e+03]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4401.31795881  4388.19962443   790.72804752 10285.18513557]
------
Step:3, Action:West
State  210
Old Q Values:  [ 4401.31795881  4388.19962443   790.72804752 10285.18513557]
New Q values:  [ 4401.31795881  4388.19962443   790.72804752 11772.15467224]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x.a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.55289354e+04  5.65757452e+03  2.17939995e+03]
------
Step:4, Action:East
State  195
Old Q Values:  [   38.85388605 15377.4345861  19015.79464603  1169.39963074]
New Q values:  [   38.85388605 15377.4345861  11137.36426008  1169.39963074]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x. ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4401.31795881  4388.19962443   790.72804752 11772.15467224]
------
Step:5, Action:West
State  208
Old Q Values:  [23107.79644387  1953.40960832 -4584.50430574  2134.51399906]
New Q values:  [23107.79644387  1953.40960832 -4584.50430574  1760.14679604]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x.agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3023.13732139 -3827.07475774   767.35890262]
------
Step:6, Action:South
State  195
Old Q Values:  [   38.85388605 15377.4345861  11137.36426008  1169.39963074]
New Q values:  [   38.85388605 10173.86051256 11137.36426008  1169.39963074]
Reward: 9  Episode Reward:  14
xxxxx
x...x
x.  x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1113.04903879  1625.15849924 13391.6222604 ]
------
Step:7, Action:West
State  273
Old Q Values:  [ 1637.72437281  1113.04903879  1625.15849924 13391.6222604 ]
New Q values:  [1637.72437281 1113.04903879 1625.15849924 5700.44175643]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1147.97617422    26.73544252 -3956.23312151   -12.17474163]
------
Step:8, Action:North
State  261
Old Q Values:  [ 1147.97617422    26.73544252 -3956.23312151   -12.17474163]
New Q values:  [ 2.71297437e+04  2.67354425e+01 -3.95623312e+03 -1.21747416e+01]
Reward: 9  Episode Reward:  22
xxxxx
x..gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[88883.84400489  5488.43861631 19175.2012714      0.        ]
------
Step:9, Action:North
State  181
Old Q Values:  [ 768.80369186  148.54036549 1989.29430004    3.33862213]
New Q values:  [ 852.52514573  148.54036549 1989.29430004    3.33862213]
Reward: 9  Episode Reward:  31
xxxxx
xa..x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1798.67889663  238.35800069    0.        ]
------
Step:10, Action:South
State  103
Old Q Values:  [ 221.30610858 1798.67889663  238.35800069    0.        ]
New Q values:  [ 221.30610858 3454.89014157  238.35800069    0.        ]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 717.58161629  301.45090469 9120.06194307 1554.80203889]
------
Step:11, Action:East
State  183
Old Q Values:  [ 717.58161629  301.45090469 9120.06194307 1554.80203889]
New Q values:  [  717.58161629   301.45090469 11306.10539524  1554.80203889]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.55289354e+04  5.65757452e+03  2.17939995e+03]
------
Step:12, Action:East
State  195
Old Q Values:  [   38.85388605 10173.86051256 11137.36426008  1169.39963074]
New Q values:  [   38.85388605 10173.86051256  7985.99210571  1169.39963074]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4401.31795881  4388.19962443   790.72804752 11772.15467224]
------
Step:13, Action:West
State  208
Old Q Values:  [23107.79644387  1953.40960832 -4584.50430574  1760.14679604]
New Q values:  [23107.79644387  1953.40960832 -4584.50430574  1610.39991483]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3023.13732139 -3827.07475774   767.35890262]
------
Step:14, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.67023181e+04 3.66626057e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 1.16604748e+04 3.66626057e+03 2.45392999e+03]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3267.15869975 -5807.06396197  1641.99116812   868.47068878]
------
Step:15, Action:East
State  273
Old Q Values:  [1637.72437281 1113.04903879 1625.15849924 5700.44175643]
New Q values:  [1637.72437281 1113.04903879 4788.35985953 5700.44175643]
Reward: 9  Episode Reward:  35
xxxxx
x ..x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2669.16547888 -6396.61506955 -5588.09647059 13776.32153277]
------
Step:16, Action:West
State  288
Old Q Values:  [-2669.16547888 -6396.61506955 -5588.09647059 13776.32153277]
New Q values:  [-2669.16547888 -6396.61506955 -5588.09647059  7220.06114004]
Reward: -1  Episode Reward:  34
xxxxx
x .gx
x   x
x a x
xxxxx
Step:17, Action:South
State  273
Old Q Values:  [1637.72437281 1113.04903879 4788.35985953 5700.44175643]
New Q values:  [1637.72437281 1974.75214244 4788.35985953 5700.44175643]
Reward: -301  Episode Reward:  -267
xxxxx
x ..x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 4788.35985953 5700.44175643]
------
Step:18, Action:West
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  3165.93616513 12419.16479129]
New Q values:  [ 3995.40702467 -8521.23367799  3165.93616513 13105.98901786]
Reward: -1  Episode Reward:  -268
xxxxx
x ..x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2.71297437e+04  2.67354425e+01 -3.95623312e+03 -1.21747416e+01]
------
Step:19, Action:North
State  261
Old Q Values:  [ 2.71297437e+04  2.67354425e+01 -3.95623312e+03 -1.21747416e+01]
New Q values:  [11448.08575847    26.73544252 -3956.23312151   -12.17474163]
Reward: -1  Episode Reward:  -269
xxxxx
x ..x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 852.52514573  148.54036549 1989.29430004    3.33862213]
------
Step:20, Action:East
State  183
Old Q Values:  [  717.58161629   301.45090469 11306.10539524  1554.80203889]
New Q values:  [ 717.58161629  301.45090469 7574.00031186 1554.80203889]
Reward: -1  Episode Reward:  -270
xxxxx
x ..x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[   38.85388605 10173.86051256  7985.99210571  1169.39963074]
------
Step:21, Action:South
State  195
Old Q Values:  [   38.85388605 10173.86051256  7985.99210571  1169.39963074]
New Q values:  [  38.85388605 5779.07673195 7985.99210571 1169.39963074]
Reward: -1  Episode Reward:  -271
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 4788.35985953 5700.44175643]
------
Step:22, Action:West
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  3165.93616513 13105.98901786]
New Q values:  [ 3995.40702467 -8521.23367799  3165.93616513  8676.22133469]
Reward: -1  Episode Reward:  -272
xxxxx
x ..x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[11448.08575847    26.73544252 -3956.23312151   -12.17474163]
------
Step:23, Action:North
State  260
Old Q Values:  [ 1085.31627907 -5704.51612281  3130.20179729 -5679.36893145]
New Q values:  [ 7860.71685247 -5704.51612281  3130.20179729 -5679.36893145]
Reward: -1  Episode Reward:  -273
xxxxx
x ..x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243 24757.30113613     0.        ]
------
Step:24, Action:East
State  183
Old Q Values:  [ 717.58161629  301.45090469 7574.00031186 1554.80203889]
New Q values:  [  717.58161629   301.45090469 10687.68074276  1554.80203889]
Reward: -1  Episode Reward:  -274
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.55289354e+04  5.65757452e+03  2.17939995e+03]
------
Step:25, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.16604748e+04 3.66626057e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 1.16604748e+04 8.39824316e+03 2.45392999e+03]
Reward: -1  Episode Reward:  -275
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23107.79644387  1953.40960832 -4584.50430574  1610.39991483]
------
Step:26, Action:North
State  208
Old Q Values:  [23107.79644387  1953.40960832 -4584.50430574  1610.39991483]
New Q values:  [41721.42853308  1953.40960832 -4584.50430574  1610.39991483]
Reward: 9  Episode Reward:  -266
xxxxx
x .ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36583.68530748   5276.29178842   -180.00807518 108243.03318512]
------
Step:27, Action:West
State  130
Old Q Values:  [ 36583.68530748   5276.29178842   -180.00807518 108243.03318512]
New Q values:  [ 36583.68530748   5276.29178842   -180.00807518 124062.12916341]
Reward: 100009  Episode Reward:  99743
xxxxx
x a x
x g x
x   x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2669.16547888 -6396.61506955 -5588.09647059  7220.06114004]
------
Step:1, Action:West
State  288
Old Q Values:  [-2669.16547888 -6396.61506955 -5588.09647059  7220.06114004]
New Q values:  [-2669.16547888 -6396.61506955 -5588.09647059  5496.29085642]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  3165.93616513  8676.22133469]
------
Step:2, Action:West
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  3165.93616513  8676.22133469]
New Q values:  [ 3995.40702467 -8521.23367799  3165.93616513  6910.31426142]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[11448.08575847    26.73544252 -3956.23312151   -12.17474163]
------
Step:3, Action:North
State  261
Old Q Values:  [11448.08575847    26.73544252 -3956.23312151   -12.17474163]
New Q values:  [ 5181.4225934     26.73544252 -3956.23312151   -12.17474163]
Reward: 9  Episode Reward:  27
xxxxx
x. gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 852.52514573  148.54036549 1989.29430004    3.33862213]
------
Step:4, Action:East
State  177
Old Q Values:  [88883.84400489  5488.43861631 19175.2012714      0.        ]
New Q values:  [88883.84400489  5488.43861631  8582.42170498     0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x. .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  3023.13732139 -3827.07475774   767.35890262]
------
Step:5, Action:South
State  196
Old Q Values:  [-2469.90645144  2079.17768709  6570.2578975    231.67262594]
New Q values:  [-2469.90645144  1811.21868476  6570.2578975    231.67262594]
Reward: -1  Episode Reward:  35
xxxxx
x. .x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3267.15869975 -5807.06396197  1641.99116812   868.47068878]
------
Step:6, Action:East
State  276
Old Q Values:  [ 3267.15869975 -5807.06396197  1641.99116812   868.47068878]
New Q values:  [ 3267.15869975 -5807.06396197  2305.08372417   868.47068878]
Reward: -1  Episode Reward:  34
xxxxx
x. .x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[-2669.16547888 -6396.61506955 -5588.09647059  5496.29085642]
------
Step:7, Action:North
State  288
Old Q Values:  [-2669.16547888 -6396.61506955 -5588.09647059  5496.29085642]
New Q values:  [ 2469.38021012 -6396.61506955 -5588.09647059  5496.29085642]
Reward: 9  Episode Reward:  43
xxxxx
x. .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4401.31795881  4388.19962443   790.72804752 11772.15467224]
------
Step:8, Action:West
State  210
Old Q Values:  [ 4401.31795881  4388.19962443   790.72804752 11772.15467224]
New Q values:  [ 4401.31795881  4388.19962443   790.72804752 12366.94248691]
Reward: -1  Episode Reward:  42
xxxxx
x. .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.55289354e+04  5.65757452e+03  2.17939995e+03]
------
Step:9, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.16604748e+04 8.39824316e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 1.16604748e+04 1.58751258e+04 2.45392999e+03]
Reward: -1  Episode Reward:  41
xxxxx
x. .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[41721.42853308  1953.40960832 -4584.50430574  1610.39991483]
------
Step:10, Action:North
State  208
Old Q Values:  [41721.42853308  1953.40960832 -4584.50430574  1610.39991483]
New Q values:  [53190.62721823  1953.40960832 -4584.50430574  1610.39991483]
Reward: 9  Episode Reward:  50
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 32116.70833183  15794.84178348  -8652.84       121655.51934999]
------
Step:11, Action:North
State  128
Old Q Values:  [ 32116.70833183  15794.84178348  -8652.84       121655.51934999]
New Q values:  [ 43162.73913773  15794.84178348  -8652.84       121655.51934999]
Reward: -10301  Episode Reward:  -10251
xxxxx
x. gx
x   x
x   x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1632.98656698  403.77554135 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 3454.89014157  238.35800069    0.        ]
New Q values:  [ 221.30610858 4593.66027946  238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  717.58161629   301.45090469 10687.68074276  1554.80203889]
------
Step:2, Action:East
State  181
Old Q Values:  [ 852.52514573  148.54036549 1989.29430004    3.33862213]
New Q values:  [ 852.52514573  148.54036549 1280.60580978    3.33862213]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 1598.25945099 -789.02220255 1598.29363255]
------
Step:3, Action:West
State  200
Old Q Values:  [ 169.9257398  3934.68362957 2924.1337054   568.38654082]
New Q values:  [ 169.9257398  3934.68362957 2924.1337054   610.93635926]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 852.52514573  148.54036549 1280.60580978    3.33862213]
------
Step:4, Action:North
State  180
Old Q Values:  [  471.29065272  2441.21181368  7713.770822   -4966.32149798]
New Q values:  [ 2101.49583535  2441.21181368  7713.770822   -4966.32149798]
Reward: -1  Episode Reward:  16
xxxxx
xa..x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        6378.59858087 2846.56389321 -180.6       ]
------
Step:5, Action:East
State  108
Old Q Values:  [-8463.16477134  1779.08434045   980.97051902     0.        ]
New Q values:  [-8463.16477134  1779.08434045  1096.09124309     0.        ]
Reward: 9  Episode Reward:  25
xxxxx
xga.x
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2327.67678494    65.73076831]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  9204.5590114    632.53805907]
New Q values:  [ -281.736      -1150.91067548  4816.17759213   632.53805907]
Reward: 9  Episode Reward:  34
xxxxx
x  ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.63441799e+03  3.76317996e+03 -3.22965309e-01  9.55058236e+02]
------
Step:7, Action:South
State  136
Old Q Values:  [-2129.37064562  3637.88181659   660.86649319   563.20615877]
New Q values:  [-2129.37064562  2448.00620957   660.86649319   563.20615877]
Reward: -1  Episode Reward:  33
xxxxx
xg  x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2286.62705463  3311.51160976 -8220.10378799  2122.55739604]
------
Step:8, Action:South
State  216
Old Q Values:  [ 2286.62705463  3311.51160976 -8220.10378799  2122.55739604]
New Q values:  [ 2286.62705463  2978.89190083 -8220.10378799  2122.55739604]
Reward: 9  Episode Reward:  42
xxxxx
x g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2469.38021012 -6396.61506955 -5588.09647059  5496.29085642]
------
Step:9, Action:West
State  288
Old Q Values:  [ 2469.38021012 -6396.61506955 -5588.09647059  5496.29085642]
New Q values:  [ 2469.38021012 -6396.61506955 -5588.09647059  4277.01062099]
Reward: 9  Episode Reward:  51
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  3165.93616513  6910.31426142]
------
Step:10, Action:West
State  273
Old Q Values:  [1637.72437281 1974.75214244 4788.35985953 5700.44175643]
New Q values:  [ 1637.72437281  1974.75214244  4788.35985953 75263.8436338 ]
Reward: 100009  Episode Reward:  100060
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1632.98656698  403.77554135 -120.29354603]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094   682.47215562 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   662.57060518 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  9
xxxxx
x .gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 852.52514573  148.54036549 1280.60580978    3.33862213]
------
Step:2, Action:East
State  181
Old Q Values:  [ 852.52514573  148.54036549 1280.60580978    3.33862213]
New Q values:  [852.52514573 148.54036549 997.13041368   3.33862213]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 1598.25945099 -789.02220255 1598.29363255]
------
Step:3, Action:West
State  201
Old Q Values:  [ 613.33320563 1598.25945099 -789.02220255 1598.29363255]
New Q values:  [ 613.33320563 1598.25945099 -789.02220255 1209.42904959]
Reward: -1  Episode Reward:  17
xxxxx
x .gx
xa  x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627  489.19212398 1902.37198857  154.04646645]
------
Step:4, Action:East
State  189
Old Q Values:  [ 337.36081627  489.19212398 1902.37198857  154.04646645]
New Q values:  [ 337.36081627  489.19212398 1940.7538843   154.04646645]
Reward: -1  Episode Reward:  16
xxxxx
x g.x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  3934.68362957 2924.1337054   610.93635926]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.16604748e+04 1.58751258e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 6.74268421e+03 1.58751258e+04 2.45392999e+03]
Reward: 9  Episode Reward:  25
xxxxx
xg..x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  3165.93616513  6910.31426142]
------
Step:6, Action:West
State  276
Old Q Values:  [ 3267.15869975 -5807.06396197  2305.08372417   868.47068878]
New Q values:  [ 3267.15869975 -5807.06396197  2305.08372417  2711.00333125]
Reward: 9  Episode Reward:  34
xxxxx
x ..x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 7860.71685247 -5704.51612281  3130.20179729 -5679.36893145]
------
Step:7, Action:East
State  261
Old Q Values:  [ 5181.4225934     26.73544252 -3956.23312151   -12.17474163]
New Q values:  [5181.4225934    26.73544252 -602.94563868  -12.17474163]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3267.15869975 -5807.06396197  2305.08372417  2711.00333125]
------
Step:8, Action:West
State  276
Old Q Values:  [ 3267.15869975 -5807.06396197  2305.08372417  2711.00333125]
New Q values:  [ 3267.15869975 -5807.06396197  2305.08372417  3442.01638824]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 7860.71685247 -5704.51612281  3130.20179729 -5679.36893145]
------
Step:9, Action:East
State  261
Old Q Values:  [5181.4225934    26.73544252 -602.94563868  -12.17474163]
New Q values:  [5181.4225934    26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3267.15869975 -5807.06396197  2305.08372417  3442.01638824]
------
Step:10, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 1.46883995e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 2.14136276e+03]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5181.4225934    26.73544252  790.826661    -12.17474163]
------
Step:11, Action:North
State  261
Old Q Values:  [5181.4225934    26.73544252  790.826661    -12.17474163]
New Q values:  [5278.27326019   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  717.58161629   301.45090469 10687.68074276  1554.80203889]
------
Step:12, Action:East
State  183
Old Q Values:  [  717.58161629   301.45090469 10687.68074276  1554.80203889]
New Q values:  [  717.58161629   301.45090469 11933.15291511  1554.80203889]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.55289354e+04  5.65757452e+03  2.17939995e+03]
------
Step:13, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.55289354e+04  5.65757452e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  2.55289354e+04  5.97251256e+03  2.17939995e+03]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4401.31795881  4388.19962443   790.72804752 12366.94248691]
------
Step:14, Action:West
State  208
Old Q Values:  [53190.62721823  1953.40960832 -4584.50430574  1610.39991483]
New Q values:  [53190.62721823  1953.40960832 -4584.50430574  5406.09771317]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.74268421e+03 1.58751258e+04 2.45392999e+03]
------
Step:15, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.74268421e+03 1.58751258e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 6.74268421e+03 2.23066385e+04 2.45392999e+03]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[53190.62721823  1953.40960832 -4584.50430574  5406.09771317]
------
Step:16, Action:North
State  208
Old Q Values:  [53190.62721823  1953.40960832 -4584.50430574  5406.09771317]
New Q values:  [58500.28963631  1953.40960832 -4584.50430574  5406.09771317]
Reward: 9  Episode Reward:  34
xxxxx
x .ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36583.68530748   5276.29178842   -180.00807518 124062.12916341]
------
Step:17, Action:West
State  136
Old Q Values:  [-2129.37064562  2448.00620957   660.86649319   563.20615877]
New Q values:  [-2129.37064562  2448.00620957   660.86649319   231.00690689]
Reward: 9  Episode Reward:  43
xxxxx
x agx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NE
[0.         0.         0.         1.08147795]
------
Step:18, Action:West
State  127
Old Q Values:  [0.00000000e+00 1.67014986e+00 8.95487000e+02 1.70709492e+03]
New Q values:  [   0.            1.67014986  895.48700012 1172.13393898]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1632.98656698  403.77554135 -120.29354603]
------
Step:19, Action:South
State  111
Old Q Values:  [-177.44732869 1632.98656698  403.77554135 -120.29354603]
New Q values:  [-177.44732869 1234.82079208  403.77554135 -120.29354603]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627  489.19212398 1940.7538843   154.04646645]
------
Step:20, Action:South
State  191
Old Q Values:  [3.06655861e+00 1.80251852e+03 3.21633155e+03 0.00000000e+00]
New Q values:  [3.06655861e+00 2.30388938e+03 3.21633155e+03 0.00000000e+00]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5278.27326019   26.73544252  790.826661    -12.17474163]
------
Step:21, Action:North
State  257
Old Q Values:  [43260.88977077 12764.58618105  3851.09595999  1875.31501677]
New Q values:  [47563.50689931 12764.58618105  3851.09595999  1875.31501677]
Reward: -1  Episode Reward:  39
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[ 82228.67666629  16101.90751562 100865.83663667      0.        ]
------
Step:22, Action:East
State  191
Old Q Values:  [3.06655861e+00 2.30388938e+03 3.21633155e+03 0.00000000e+00]
New Q values:  [   3.06655861 2303.8893844  2029.22435297    0.        ]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:SW
[   0.         2477.63911564 1418.80979599    0.        ]
------
Step:23, Action:East
State  204
Old Q Values:  [   0.         3157.19864976 3881.10435421  441.58769553]
New Q values:  [   0.         3157.19864976 2445.50931193  441.58769553]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2286.62705463  2978.89190083 -8220.10378799  2122.55739604]
------
Step:24, Action:South
State  208
Old Q Values:  [58500.28963631  1953.40960832 -4584.50430574  5406.09771317]
New Q values:  [58500.28963631 62069.86702963 -4584.50430574  5406.09771317]
Reward: 100009  Episode Reward:  100046
xxxxx
x   x
x  gx
x  ax
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4816.17759213   632.53805907]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2327.67678494    65.73076831]
New Q values:  [-9594.56523706 -8069.05606225  1670.87257685    65.73076831]
Reward: 9  Episode Reward:  9
xxxxx
xg ax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  2448.00620957   660.86649319   231.00690689]
------
Step:2, Action:South
State  138
Old Q Values:  [ 3.63441799e+03  3.76317996e+03 -3.22965309e-01  9.55058236e+02]
New Q values:  [ 3.63441799e+03  2.40433955e+03 -3.22965309e-01  9.55058236e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2286.62705463  2978.89190083 -8220.10378799  2122.55739604]
------
Step:3, Action:South
State  216
Old Q Values:  [ 2286.62705463  2978.89190083 -8220.10378799  2122.55739604]
New Q values:  [ 2286.62705463  2480.05994663 -8220.10378799  2122.55739604]
Reward: 9  Episode Reward:  27
xxxxx
x.  x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2469.38021012 -6396.61506955 -5588.09647059  4277.01062099]
------
Step:4, Action:West
State  288
Old Q Values:  [ 2469.38021012 -6396.61506955 -5588.09647059  4277.01062099]
New Q values:  [ 2469.38021012 -6396.61506955 -5588.09647059 -2210.70147318]
Reward: -9991  Episode Reward:  -9964
xxxxx
x.  x
x . x
x.g x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[852.52514573 148.54036549 997.13041368   3.33862213]
------
Step:1, Action:North
State  181
Old Q Values:  [852.52514573 148.54036549 997.13041368   3.33862213]
New Q values:  [545.18123985 148.54036549 997.13041368   3.33862213]
Reward: 9  Episode Reward:  9
xxxxx
xag.x
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   662.57060518 -2165.66138672  -180.6       ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 1234.82079208  403.77554135 -120.29354603]
New Q values:  [-177.44732869  792.46744094  403.77554135 -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[545.18123985 148.54036549 997.13041368   3.33862213]
------
Step:3, Action:North
State  181
Old Q Values:  [545.18123985 148.54036549 997.13041368   3.33862213]
New Q values:  [1595.57057978  148.54036549  997.13041368    3.33862213]
Reward: -1  Episode Reward:  7
xxxxx
xa..x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 4593.66027946  238.35800069    0.        ]
------
Step:4, Action:South
State  109
Old Q Values:  [ -241.10880094   662.57060518 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   743.09941601 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  6
xxxxx
x .gx
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1595.57057978  148.54036549  997.13041368    3.33862213]
------
Step:5, Action:North
State  181
Old Q Values:  [1595.57057978  148.54036549  997.13041368    3.33862213]
New Q values:  [860.55805671 148.54036549 997.13041368   3.33862213]
Reward: -1  Episode Reward:  5
xxxxx
xag.x
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   743.09941601 -2165.66138672  -180.6       ]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869  792.46744094  403.77554135 -120.29354603]
New Q values:  [-177.44732869  615.52610048  403.77554135 -120.29354603]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[860.55805671 148.54036549 997.13041368   3.33862213]
------
Step:7, Action:North
State  181
Old Q Values:  [860.55805671 148.54036549 997.13041368   3.33862213]
New Q values:  [566.55304749 148.54036549 997.13041368   3.33862213]
Reward: -1  Episode Reward:  3
xxxxx
xag.x
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   743.09941601 -2165.66138672  -180.6       ]
------
Step:8, Action:South
State  109
Old Q Values:  [ -241.10880094   743.09941601 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   595.77889051 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  2
xxxxx
x .gx
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[566.55304749 148.54036549 997.13041368   3.33862213]
------
Step:9, Action:East
State  181
Old Q Values:  [566.55304749 148.54036549 997.13041368   3.33862213]
New Q values:  [566.55304749 148.54036549 877.73000077   3.33862213]
Reward: -1  Episode Reward:  1
xxxxx
x ..x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 1598.25945099 -789.02220255 1209.42904959]
------
Step:10, Action:South
State  193
Old Q Values:  [-5922.26708831  3023.13732139 -3827.07475774   767.35890262]
New Q values:  [-5922.26708831 23793.8080187  -3827.07475774   767.35890262]
Reward: 9  Episode Reward:  10
xxxxx
x .gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  4788.35985953 75263.8436338 ]
------
Step:11, Action:West
State  276
Old Q Values:  [ 3267.15869975 -5807.06396197  2305.08372417  3442.01638824]
New Q values:  [ 3267.15869975 -5807.06396197  2305.08372417  2965.68853335]
Reward: 9  Episode Reward:  19
xxxxx
x g.x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5278.27326019   26.73544252  790.826661    -12.17474163]
------
Step:12, Action:North
State  261
Old Q Values:  [5278.27326019   26.73544252  790.826661    -12.17474163]
New Q values:  [2374.02830431   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  18
xxxxx
x ..x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[566.55304749 148.54036549 877.73000077   3.33862213]
------
Step:13, Action:North
State  181
Old Q Values:  [566.55304749 148.54036549 877.73000077   3.33862213]
New Q values:  [404.75488615 148.54036549 877.73000077   3.33862213]
Reward: -1  Episode Reward:  17
xxxxx
xag.x
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   595.77889051 -2165.66138672  -180.6       ]
------
Step:14, Action:South
State  109
Old Q Values:  [ -241.10880094   595.77889051 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094   501.03055643 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  16
xxxxx
x .gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[404.75488615 148.54036549 877.73000077   3.33862213]
------
Step:15, Action:East
State  181
Old Q Values:  [404.75488615 148.54036549 877.73000077   3.33862213]
New Q values:  [4.04754886e+02 1.48540365e+02 7.48863441e+03 3.33862213e+00]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 23793.8080187  -3827.07475774   767.35890262]
------
Step:16, Action:South
State  199
Old Q Values:  [  14.86214194 1482.85185902 7748.13858789 1915.70494401]
New Q values:  [  14.86214194 1549.78616729 7748.13858789 1915.70494401]
Reward: -1  Episode Reward:  14
xxxxx
x ..x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.19081808e+03 2.14136276e+03]
------
Step:17, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 2.14136276e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 1.56815359e+03]
Reward: -1  Episode Reward:  13
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2374.02830431   26.73544252  790.826661    -12.17474163]
------
Step:18, Action:North
State  261
Old Q Values:  [2374.02830431   26.73544252  790.826661    -12.17474163]
New Q values:  [3195.6016435    26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  12
xxxxx
x ..x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.48540365e+02 7.48863441e+03 3.33862213e+00]
------
Step:19, Action:North
State  183
Old Q Values:  [  717.58161629   301.45090469 11933.15291511  1554.80203889]
New Q values:  [ 1664.53073035   301.45090469 11933.15291511  1554.80203889]
Reward: -1  Episode Reward:  11
xxxxx
xa..x
x  .x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 4593.66027946  238.35800069    0.        ]
------
Step:20, Action:South
State  102
Old Q Values:  [-180.6        6378.59858087 2846.56389321 -180.6       ]
New Q values:  [-180.6        9978.02977319 2846.56389321 -180.6       ]
Reward: -1  Episode Reward:  10
xxxxx
x ..x
xa .x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243 24757.30113613     0.        ]
------
Step:21, Action:East
State  180
Old Q Values:  [ 2101.49583535  2441.21181368  7713.770822   -4966.32149798]
New Q values:  [ 2101.49583535  2441.21181368  5055.98569805 -4966.32149798]
Reward: -1  Episode Reward:  9
xxxxx
x ..x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1811.21868476  6570.2578975    231.67262594]
------
Step:22, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.74268421e+03 2.23066385e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 6.74268421e+03 2.75490155e+04 2.45392999e+03]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[58500.28963631 62069.86702963 -4584.50430574  5406.09771317]
------
Step:23, Action:South
State  210
Old Q Values:  [ 4401.31795881  4388.19962443   790.72804752 12366.94248691]
New Q values:  [ 4401.31795881  2501.49391281   790.72804752 12366.94248691]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2469.38021012 -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:24, Action:North
State  288
Old Q Values:  [ 2469.38021012 -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [ 4697.23483012 -6396.61506955 -5588.09647059 -2210.70147318]
Reward: -1  Episode Reward:  26
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4401.31795881  2501.49391281   790.72804752 12366.94248691]
------
Step:25, Action:West
State  208
Old Q Values:  [58500.28963631 62069.86702963 -4584.50430574  5406.09771317]
New Q values:  [58500.28963631 62069.86702963 -4584.50430574 10426.54373735]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.74268421e+03 2.75490155e+04 2.45392999e+03]
------
Step:26, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.74268421e+03 2.75490155e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 6.74268421e+03 2.96399663e+04 2.45392999e+03]
Reward: -1  Episode Reward:  24
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[58500.28963631 62069.86702963 -4584.50430574 10426.54373735]
------
Step:27, Action:South
State  208
Old Q Values:  [58500.28963631 62069.86702963 -4584.50430574 10426.54373735]
New Q values:  [58500.28963631 26236.51726089 -4584.50430574 10426.54373735]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4697.23483012 -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:28, Action:North
State  288
Old Q Values:  [ 4697.23483012 -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [ 5588.37667812 -6396.61506955 -5588.09647059 -2210.70147318]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 4401.31795881  2501.49391281   790.72804752 12366.94248691]
------
Step:29, Action:West
State  210
Old Q Values:  [ 4401.31795881  2501.49391281   790.72804752 12366.94248691]
New Q values:  [ 4401.31795881  2501.49391281   790.72804752 12604.85761277]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.55289354e+04  5.97251256e+03  2.17939995e+03]
------
Step:30, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.74268421e+03 2.96399663e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 6.74268421e+03 2.94054734e+04 2.45392999e+03]
Reward: -1  Episode Reward:  20
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[58500.28963631 26236.51726089 -4584.50430574 10426.54373735]
------
Step:31, Action:North
State  210
Old Q Values:  [ 4401.31795881  2501.49391281   790.72804752 12604.85761277]
New Q values:  [38984.56593255  2501.49391281   790.72804752 12604.85761277]
Reward: 9  Episode Reward:  29
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 36583.68530748   5276.29178842   -180.00807518 124062.12916341]
------
Step:32, Action:West
State  130
Old Q Values:  [ 36583.68530748   5276.29178842   -180.00807518 124062.12916341]
New Q values:  [ 36583.68530748   5276.29178842   -180.00807518 149038.46617938]
Reward: 100009  Episode Reward:  100038
xxxxx
x a x
x   x
x  gx
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   294.84238464]
------
Step:1, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1670.87257685    65.73076831]
New Q values:  [-9594.56523706 -8069.05606225  1670.87257685   182.00147425]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   501.03055643 -2165.66138672  -180.6       ]
------
Step:2, Action:South
State  108
Old Q Values:  [-8463.16477134  1779.08434045  1096.09124309     0.        ]
New Q values:  [-8463.16477134  2233.8294456   1096.09124309     0.        ]
Reward: 9  Episode Reward:  18
xxxxx
xg  x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 2101.49583535  2441.21181368  5055.98569805 -4966.32149798]
------
Step:3, Action:East
State  188
Old Q Values:  [-6523.78898263  2790.67422621  1895.68698353     0.        ]
New Q values:  [-6523.78898263  2790.67422621  1944.07988228     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  3934.68362957 2924.1337054   610.93635926]
------
Step:4, Action:South
State  196
Old Q Values:  [-2469.90645144  1811.21868476  6570.2578975    231.67262594]
New Q values:  [-2469.90645144  1710.03508383  6570.2578975    231.67262594]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3267.15869975 -5807.06396197  2305.08372417  2965.68853335]
------
Step:5, Action:West
State  276
Old Q Values:  [ 3267.15869975 -5807.06396197  2305.08372417  2965.68853335]
New Q values:  [ 3267.15869975 -5807.06396197  2305.08372417  2150.35590639]
Reward: 9  Episode Reward:  45
xxxxx
x g x
x  .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3195.6016435    26.73544252  790.826661    -12.17474163]
------
Step:6, Action:North
State  261
Old Q Values:  [3195.6016435    26.73544252  790.826661    -12.17474163]
New Q values:  [3524.23097917   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.48540365e+02 7.48863441e+03 3.33862213e+00]
------
Step:7, Action:North
State  180
Old Q Values:  [ 2101.49583535  2441.21181368  5055.98569805 -4966.32149798]
New Q values:  [ 1934.87827965  2441.21181368  5055.98569805 -4966.32149798]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
xg .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 3649.59981836 1540.97816408 -180.6       ]
------
Step:8, Action:East
State  111
Old Q Values:  [-177.44732869  615.52610048  403.77554135 -120.29354603]
New Q values:  [-177.44732869  615.52610048  431.25952337 -120.29354603]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x g.x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         331.64678262 901.16435612 399.8615296 ]
------
Step:9, Action:East
State  124
Old Q Values:  [   0.         1166.51141701 3562.34358094 1772.94838375]
New Q values:  [   0.         1166.51141701 2158.73929524 1772.94838375]
Reward: -1  Episode Reward:  41
xxxxx
x gax
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  2448.00620957   660.86649319   231.00690689]
------
Step:10, Action:South
State  136
Old Q Values:  [-2129.37064562  2448.00620957   660.86649319   231.00690689]
New Q values:  [-2129.37064562  1728.62046782   660.86649319   231.00690689]
Reward: 9  Episode Reward:  50
xxxxx
x  gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2286.62705463  2480.05994663 -8220.10378799  2122.55739604]
------
Step:11, Action:South
State  208
Old Q Values:  [58500.28963631 26236.51726089 -4584.50430574 10426.54373735]
New Q values:  [58500.28963631 72176.51990779 -4584.50430574 10426.54373735]
Reward: 100009  Episode Reward:  100059
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5588.37667812 -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:1, Action:North
State  288
Old Q Values:  [ 5588.37667812 -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [13936.12045101 -6396.61506955 -5588.09647059 -2210.70147318]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[38984.56593255  2501.49391281   790.72804752 12604.85761277]
------
Step:2, Action:North
State  210
Old Q Values:  [38984.56593255  2501.49391281   790.72804752 12604.85761277]
New Q values:  [16689.55177088  2501.49391281   790.72804752 12604.85761277]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.63441799e+03  2.40433955e+03 -3.22965309e-01  9.55058236e+02]
------
Step:3, Action:North
State  130
Old Q Values:  [ 36583.68530748   5276.29178842   -180.00807518 149038.46617938]
New Q values:  [ 59164.41397681   5276.29178842   -180.00807518 149038.46617938]
Reward: -301  Episode Reward:  -283
xxxxx
x..ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 59164.41397681   5276.29178842   -180.00807518 149038.46617938]
------
Step:4, Action:West
State  138
Old Q Values:  [ 3.63441799e+03  2.40433955e+03 -3.22965309e-01  9.55058236e+02]
New Q values:  [ 3.63441799e+03  2.40433955e+03 -3.22965309e-01  1.83227657e+03]
Reward: 9  Episode Reward:  -274
xxxxx
x.a x
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4816.17759213   632.53805907]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  4816.17759213   632.53805907]
New Q values:  [ -281.736      -1150.91067548  3016.19643471   632.53805907]
Reward: -1  Episode Reward:  -275
xxxxx
x. ax
xg. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 3.63441799e+03  2.40433955e+03 -3.22965309e-01  1.83227657e+03]
------
Step:6, Action:North
State  138
Old Q Values:  [ 3.63441799e+03  2.40433955e+03 -3.22965309e-01  1.83227657e+03]
New Q values:  [ 2.36349260e+03  2.40433955e+03 -3.22965309e-01  1.83227657e+03]
Reward: -301  Episode Reward:  -576
xxxxx
x. ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.36349260e+03  2.40433955e+03 -3.22965309e-01  1.83227657e+03]
------
Step:7, Action:South
State  138
Old Q Values:  [ 2.36349260e+03  2.40433955e+03 -3.22965309e-01  1.83227657e+03]
New Q values:  [ 2.36349260e+03  2.26140918e+04 -3.22965309e-01  1.83227657e+03]
Reward: -1  Episode Reward:  -577
xxxxx
x.  x
xg.ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[58500.28963631 72176.51990779 -4584.50430574 10426.54373735]
------
Step:8, Action:South
State  208
Old Q Values:  [58500.28963631 72176.51990779 -4584.50430574 10426.54373735]
New Q values:  [58500.28963631 33050.84409842 -4584.50430574 10426.54373735]
Reward: -1  Episode Reward:  -578
xxxxx
xg  x
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13936.12045101 -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:9, Action:North
State  288
Old Q Values:  [13936.12045101 -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [23123.9350713  -6396.61506955 -5588.09647059 -2210.70147318]
Reward: -1  Episode Reward:  -579
xxxxx
x.g x
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[58500.28963631 33050.84409842 -4584.50430574 10426.54373735]
------
Step:10, Action:North
State  208
Old Q Values:  [58500.28963631 33050.84409842 -4584.50430574 10426.54373735]
New Q values:  [23918.10199487 33050.84409842 -4584.50430574 10426.54373735]
Reward: -1  Episode Reward:  -580
xxxxx
xg ax
x.. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  1728.62046782   660.86649319   231.00690689]
------
Step:11, Action:South
State  136
Old Q Values:  [-2129.37064562  1728.62046782   660.86649319   231.00690689]
New Q values:  [-2129.37064562 10606.10141665   660.86649319   231.00690689]
Reward: -1  Episode Reward:  -581
xxxxx
x.g x
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23918.10199487 33050.84409842 -4584.50430574 10426.54373735]
------
Step:12, Action:South
State  208
Old Q Values:  [23918.10199487 33050.84409842 -4584.50430574 10426.54373735]
New Q values:  [23918.10199487 20156.91816076 -4584.50430574 10426.54373735]
Reward: -1  Episode Reward:  -582
xxxxx
x. gx
x.. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[23123.9350713  -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:13, Action:North
State  288
Old Q Values:  [23123.9350713  -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [10424.40462698 -6396.61506955 -5588.09647059 -2210.70147318]
Reward: -10001  Episode Reward:  -10583
xxxxx
x.  x
x..gx
x . x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243 24757.30113613     0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [ 1664.53073035   301.45090469 11933.15291511  1554.80203889]
New Q values:  [ 1664.53073035   301.45090469 12437.34178406  1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.55289354e+04  5.97251256e+03  2.17939995e+03]
------
Step:2, Action:East
State  195
Old Q Values:  [  38.85388605 5779.07673195 7985.99210571 1169.39963074]
New Q values:  [  38.85388605 5779.07673195 8206.66237355 1169.39963074]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[16689.55177088  2501.49391281   790.72804752 12604.85761277]
------
Step:3, Action:North
State  210
Old Q Values:  [16689.55177088  2501.49391281   790.72804752 12604.85761277]
New Q values:  [51392.76056217  2501.49391281   790.72804752 12604.85761277]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 59164.41397681   5276.29178842   -180.00807518 149038.46617938]
------
Step:4, Action:West
State  130
Old Q Values:  [ 59164.41397681   5276.29178842   -180.00807518 149038.46617938]
New Q values:  [59164.41397681  5276.29178842  -180.00807518 99029.00098578]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 131360.71504674]
------
Step:5, Action:West
State  126
Old Q Values:  [  0.         331.64678262 901.16435612 399.8615296 ]
New Q values:  [  0.         331.64678262 901.16435612 350.00244198]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  615.52610048  431.25952337 -120.29354603]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869  615.52610048  431.25952337 -120.29354603]
New Q values:  [-177.44732869  827.83660548  431.25952337 -120.29354603]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627  489.19212398 1940.7538843   154.04646645]
------
Step:7, Action:South
State  189
Old Q Values:  [ 337.36081627  489.19212398 1940.7538843   154.04646645]
New Q values:  [ 337.36081627 1252.34614334 1940.7538843   154.04646645]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3524.23097917   26.73544252  790.826661    -12.17474163]
------
Step:8, Action:North
State  260
Old Q Values:  [ 7860.71685247 -5704.51612281  3130.20179729 -5679.36893145]
New Q values:  [ 3980.88900885 -5704.51612281  3130.20179729 -5679.36893145]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  2790.67422621  1944.07988228     0.        ]
------
Step:9, Action:South
State  189
Old Q Values:  [ 337.36081627 1252.34614334 1940.7538843   154.04646645]
New Q values:  [ 337.36081627 1557.60775109 1940.7538843   154.04646645]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3524.23097917   26.73544252  790.826661    -12.17474163]
------
Step:10, Action:North
State  261
Old Q Values:  [3524.23097917   26.73544252  790.826661    -12.17474163]
New Q values:  [1991.31855696   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  40
xxxxx
x  gx
xa  x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1557.60775109 1940.7538843   154.04646645]
------
Step:11, Action:East
State  189
Old Q Values:  [ 337.36081627 1557.60775109 1940.7538843   154.04646645]
New Q values:  [ 337.36081627 1557.60775109 1255.17938902  154.04646645]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 1598.25945099 -789.02220255 1209.42904959]
------
Step:12, Action:South
State  204
Old Q Values:  [   0.         3157.19864976 2445.50931193  441.58769553]
New Q values:  [   0.         2248.42706983 2445.50931193  441.58769553]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3267.15869975 -5807.06396197  2305.08372417  2150.35590639]
------
Step:13, Action:East
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  3165.93616513  6910.31426142]
New Q values:  [ 3995.40702467 -8521.23367799 64399.09585415  6910.31426142]
Reward: 100009  Episode Reward:  100057
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.36349260e+03  2.26140918e+04 -3.22965309e-01  1.83227657e+03]
------
Step:1, Action:South
State  136
Old Q Values:  [-2129.37064562 10606.10141665   660.86649319   231.00690689]
New Q values:  [-2129.37064562  4991.85855065   660.86649319   231.00690689]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2286.62705463  2480.05994663 -8220.10378799  2122.55739604]
------
Step:2, Action:South
State  208
Old Q Values:  [23918.10199487 20156.91816076 -4584.50430574 10426.54373735]
New Q values:  [23918.10199487 11195.4886524  -4584.50430574 10426.54373735]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10424.40462698 -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:3, Action:North
State  288
Old Q Values:  [10424.40462698 -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [11344.59244925 -6396.61506955 -5588.09647059 -2210.70147318]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23918.10199487 11195.4886524  -4584.50430574 10426.54373735]
------
Step:4, Action:North
State  208
Old Q Values:  [23918.10199487 11195.4886524  -4584.50430574 10426.54373735]
New Q values:  [ 5064.19836314 11195.4886524  -4584.50430574 10426.54373735]
Reward: -10001  Episode Reward:  -9984
xxxxx
x..gx
x.  x
x.. x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11344.59244925 -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:1, Action:North
State  288
Old Q Values:  [11344.59244925 -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [ 1901.88357542 -6396.61506955 -5588.09647059 -2210.70147318]
Reward: -9991  Episode Reward:  -9991
xxxxx
x.. x
x..gx
x.. x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5064.19836314 11195.4886524  -4584.50430574 10426.54373735]
------
Step:1, Action:South
State  208
Old Q Values:  [ 5064.19836314 11195.4886524  -4584.50430574 10426.54373735]
New Q values:  [ 5064.19836314  5054.16053359 -4584.50430574 10426.54373735]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1901.88357542 -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:2, Action:North
State  288
Old Q Values:  [ 1901.88357542 -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [16177.98159882 -6396.61506955 -5588.09647059 -2210.70147318]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[51392.76056217  2501.49391281   790.72804752 12604.85761277]
------
Step:3, Action:North
State  208
Old Q Values:  [ 5064.19836314  5054.16053359 -4584.50430574 10426.54373735]
New Q values:  [ 8815.3068834   5054.16053359 -4584.50430574 10426.54373735]
Reward: 9  Episode Reward:  17
xxxxx
x .ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.36349260e+03  2.26140918e+04 -3.22965309e-01  1.83227657e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 2.36349260e+03  2.26140918e+04 -3.22965309e-01  1.83227657e+03]
New Q values:  [ 2.36349260e+03  2.44628649e+04 -3.22965309e-01  1.83227657e+03]
Reward: -1  Episode Reward:  16
xxxxx
x . x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[51392.76056217  2501.49391281   790.72804752 12604.85761277]
------
Step:5, Action:North
State  208
Old Q Values:  [ 8815.3068834   5054.16053359 -4584.50430574 10426.54373735]
New Q values:  [10864.38221921  5054.16053359 -4584.50430574 10426.54373735]
Reward: -1  Episode Reward:  15
xxxxx
x .ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.36349260e+03  2.44628649e+04 -3.22965309e-01  1.83227657e+03]
------
Step:6, Action:South
State  138
Old Q Values:  [ 2.36349260e+03  2.44628649e+04 -3.22965309e-01  1.83227657e+03]
New Q values:  [ 2.36349260e+03  2.52023741e+04 -3.22965309e-01  1.83227657e+03]
Reward: -1  Episode Reward:  14
xxxxx
x . x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[51392.76056217  2501.49391281   790.72804752 12604.85761277]
------
Step:7, Action:North
State  210
Old Q Values:  [51392.76056217  2501.49391281   790.72804752 12604.85761277]
New Q values:  [28117.2164618   2501.49391281   790.72804752 12604.85761277]
Reward: -1  Episode Reward:  13
xxxxx
x .ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.36349260e+03  2.52023741e+04 -3.22965309e-01  1.83227657e+03]
------
Step:8, Action:South
State  138
Old Q Values:  [ 2.36349260e+03  2.52023741e+04 -3.22965309e-01  1.83227657e+03]
New Q values:  [ 2.36349260e+03  1.85155146e+04 -3.22965309e-01  1.83227657e+03]
Reward: -1  Episode Reward:  12
xxxxx
x . x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28117.2164618   2501.49391281   790.72804752 12604.85761277]
------
Step:9, Action:North
State  208
Old Q Values:  [10864.38221921  5054.16053359 -4584.50430574 10426.54373735]
New Q values:  [ 9899.80726402  5054.16053359 -4584.50430574 10426.54373735]
Reward: -1  Episode Reward:  11
xxxxx
x .ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.36349260e+03  1.85155146e+04 -3.22965309e-01  1.83227657e+03]
------
Step:10, Action:North
State  138
Old Q Values:  [ 2.36349260e+03  1.85155146e+04 -3.22965309e-01  1.83227657e+03]
New Q values:  [ 6.31945141e+03  1.85155146e+04 -3.22965309e-01  1.83227657e+03]
Reward: -301  Episode Reward:  -290
xxxxx
x .ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.31945141e+03  1.85155146e+04 -3.22965309e-01  1.83227657e+03]
------
Step:11, Action:South
State  138
Old Q Values:  [ 6.31945141e+03  1.85155146e+04 -3.22965309e-01  1.83227657e+03]
New Q values:  [ 6.31945141e+03  4.53356896e+03 -3.22965309e-01  1.83227657e+03]
Reward: -10001  Episode Reward:  -10291
xxxxx
x . x
x..gx
x.. x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16177.98159882 -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:1, Action:North
State  288
Old Q Values:  [16177.98159882 -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [ 9604.55576073 -6396.61506955 -5588.09647059 -2210.70147318]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9899.80726402  5054.16053359 -4584.50430574 10426.54373735]
------
Step:2, Action:West
State  208
Old Q Values:  [ 9899.80726402  5054.16053359 -4584.50430574 10426.54373735]
New Q values:  [ 9899.80726402  5054.16053359 -4584.50430574  6997.65951961]
Reward: -9991  Episode Reward:  -9982
xxxxx
x ..x
x.g x
x.. x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3016.19643471   632.53805907]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3016.19643471   632.53805907]
New Q values:  [ -281.736      -1150.91067548  3107.71399819   632.53805907]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 6.31945141e+03  4.53356896e+03 -3.22965309e-01  1.83227657e+03]
------
Step:2, Action:North
State  138
Old Q Values:  [ 6.31945141e+03  4.53356896e+03 -3.22965309e-01  1.83227657e+03]
New Q values:  [ 4.24301599e+03  4.53356896e+03 -3.22965309e-01  1.83227657e+03]
Reward: -301  Episode Reward:  -292
xxxxx
x. ax
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.24301599e+03  4.53356896e+03 -3.22965309e-01  1.83227657e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 4.24301599e+03  4.53356896e+03 -3.22965309e-01  1.83227657e+03]
New Q values:  [ 4.24301599e+03 -3.43715443e+03 -3.22965309e-01  1.83227657e+03]
Reward: -9991  Episode Reward:  -10283
xxxxx
x.  x
x. gx
x...x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9604.55576073 -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:1, Action:North
State  288
Old Q Values:  [ 9604.55576073 -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [ 6817.1644835  -6396.61506955 -5588.09647059 -2210.70147318]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9899.80726402  5054.16053359 -4584.50430574  6997.65951961]
------
Step:2, Action:North
State  208
Old Q Values:  [ 9899.80726402  5054.16053359 -4584.50430574  6997.65951961]
New Q values:  [ -537.1195292   5054.16053359 -4584.50430574  6997.65951961]
Reward: -9991  Episode Reward:  -9982
xxxxx
x .gx
x.. x
x.. x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  827.83660548  431.25952337 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 4593.66027946  238.35800069    0.        ]
New Q values:  [ 221.30610858 5574.066647    238.35800069    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa .x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1664.53073035   301.45090469 12437.34178406  1554.80203889]
------
Step:2, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243 24757.30113613     0.        ]
New Q values:  [    0.         -5536.05678243 11182.7662635      0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.7887208e-01 -2.0061023e+02  4.2681527e+03  0.0000000e+00]
------
Step:3, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.55289354e+04  5.97251256e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  2.55289354e+04  1.08295700e+04  2.17939995e+03]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28117.2164618   2501.49391281   790.72804752 12604.85761277]
------
Step:4, Action:North
State  218
Old Q Values:  [2001.50506071 2755.30078316    0.         2598.71043955]
New Q values:  [2078.90682129 2755.30078316    0.         2598.71043955]
Reward: 9  Episode Reward:  26
xxxxx
x .ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.24301599e+03 -3.43715443e+03 -3.22965309e-01  1.83227657e+03]
------
Step:5, Action:North
State  138
Old Q Values:  [ 4.24301599e+03 -3.43715443e+03 -3.22965309e-01  1.83227657e+03]
New Q values:  [ 2.78951119e+03 -3.43715443e+03 -3.22965309e-01  1.83227657e+03]
Reward: -301  Episode Reward:  -275
xxxxx
x .ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.78951119e+03 -3.43715443e+03 -3.22965309e-01  1.83227657e+03]
------
Step:6, Action:North
State  138
Old Q Values:  [ 2.78951119e+03 -3.43715443e+03 -3.22965309e-01  1.83227657e+03]
New Q values:  [ 1.77205784e+03 -3.43715443e+03 -3.22965309e-01  1.83227657e+03]
Reward: -301  Episode Reward:  -576
xxxxx
x .ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.77205784e+03 -3.43715443e+03 -3.22965309e-01  1.83227657e+03]
------
Step:7, Action:West
State  138
Old Q Values:  [ 1.77205784e+03 -3.43715443e+03 -3.22965309e-01  1.83227657e+03]
New Q values:  [ 1.77205784e+03 -3.43715443e+03 -3.22965309e-01  1.67062483e+03]
Reward: 9  Episode Reward:  -567
xxxxx
x a x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3107.71399819   632.53805907]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3107.71399819   632.53805907]
New Q values:  [ -281.736      -1150.91067548  1774.10294981   632.53805907]
Reward: -1  Episode Reward:  -568
xxxxx
x  ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.77205784e+03 -3.43715443e+03 -3.22965309e-01  1.67062483e+03]
------
Step:9, Action:North
State  138
Old Q Values:  [ 1.77205784e+03 -3.43715443e+03 -3.22965309e-01  1.67062483e+03]
New Q values:  [ 1.05984048e+03 -3.43715443e+03 -3.22965309e-01  1.67062483e+03]
Reward: -301  Episode Reward:  -869
xxxxx
x  ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.05984048e+03 -3.43715443e+03 -3.22965309e-01  1.67062483e+03]
------
Step:10, Action:West
State  136
Old Q Values:  [-2129.37064562  4991.85855065   660.86649319   231.00690689]
New Q values:  [-2129.37064562  4991.85855065   660.86649319   593.06453581]
Reward: -1  Episode Reward:  -870
xxxxx
xga x
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1670.87257685   182.00147425]
------
Step:11, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1670.87257685   182.00147425]
New Q values:  [-9594.56523706 -8069.05606225  2165.30659593   182.00147425]
Reward: -1  Episode Reward:  -871
xxxxx
x gax
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  4991.85855065   660.86649319   593.06453581]
------
Step:12, Action:South
State  138
Old Q Values:  [ 1.05984048e+03 -3.43715443e+03 -3.22965309e-01  1.67062483e+03]
New Q values:  [ 1.05984048e+03 -6.31443789e+02 -3.22965309e-01  1.67062483e+03]
Reward: -1  Episode Reward:  -872
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2286.62705463  2480.05994663 -8220.10378799  2122.55739604]
------
Step:13, Action:South
State  216
Old Q Values:  [ 2286.62705463  2480.05994663 -8220.10378799  2122.55739604]
New Q values:  [ 2286.62705463  3042.5733237  -8220.10378799  2122.55739604]
Reward: 9  Episode Reward:  -863
xxxxx
x   x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6817.1644835  -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:14, Action:North
State  288
Old Q Values:  [ 6817.1644835  -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [ 3552.85602835 -6396.61506955 -5588.09647059 -2210.70147318]
Reward: -1  Episode Reward:  -864
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2078.90682129 2755.30078316    0.         2598.71043955]
------
Step:15, Action:South
State  216
Old Q Values:  [ 2286.62705463  3042.5733237  -8220.10378799  2122.55739604]
New Q values:  [ 2286.62705463  2282.28613798 -8220.10378799  2122.55739604]
Reward: -1  Episode Reward:  -865
xxxxx
x   x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3552.85602835 -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:16, Action:North
State  288
Old Q Values:  [ 3552.85602835 -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [ 2106.53052773 -6396.61506955 -5588.09647059 -2210.70147318]
Reward: -1  Episode Reward:  -866
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2286.62705463  2282.28613798 -8220.10378799  2122.55739604]
------
Step:17, Action:North
State  216
Old Q Values:  [ 2286.62705463  2282.28613798 -8220.10378799  2122.55739604]
New Q values:  [ 2411.60838705  2282.28613798 -8220.10378799  2122.55739604]
Reward: -1  Episode Reward:  -867
xxxxx
x gax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  4991.85855065   660.86649319   593.06453581]
------
Step:18, Action:South
State  136
Old Q Values:  [-2129.37064562  4991.85855065   660.86649319   593.06453581]
New Q values:  [-2129.37064562  2719.62593637   660.86649319   593.06453581]
Reward: -1  Episode Reward:  -868
xxxxx
x  gx
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2411.60838705  2282.28613798 -8220.10378799  2122.55739604]
------
Step:19, Action:South
State  216
Old Q Values:  [ 2411.60838705  2282.28613798 -8220.10378799  2122.55739604]
New Q values:  [ 2411.60838705  1544.27361351 -8220.10378799  2122.55739604]
Reward: -1  Episode Reward:  -869
xxxxx
x   x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2106.53052773 -6396.61506955 -5588.09647059 -2210.70147318]
------
Step:20, Action:West
State  288
Old Q Values:  [ 2106.53052773 -6396.61506955 -5588.09647059 -2210.70147318]
New Q values:  [ 2106.53052773 -6396.61506955 -5588.09647059 18440.84816697]
Reward: 9  Episode Reward:  -860
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799 64399.09585415  6910.31426142]
------
Step:21, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  4788.35985953 75263.8436338 ]
New Q values:  [ 1637.72437281  1974.75214244  7446.9983939  75263.8436338 ]
Reward: -1  Episode Reward:  -861
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2106.53052773 -6396.61506955 -5588.09647059 18440.84816697]
------
Step:22, Action:West
State  288
Old Q Values:  [ 2106.53052773 -6396.61506955 -5588.09647059 18440.84816697]
New Q values:  [ 2106.53052773 -6396.61506955 -5588.09647059 29954.89235693]
Reward: -1  Episode Reward:  -862
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  7446.9983939  75263.8436338 ]
------
Step:23, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  7446.9983939  75263.8436338 ]
New Q values:  [  1637.72437281   1974.75214244   7446.9983939  104379.98952331]
Reward: 100009  Episode Reward:  99147
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.74268421e+03 2.94054734e+04 2.45392999e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [ 169.9257398  3934.68362957 2924.1337054   610.93635926]
New Q values:  [ 169.9257398  3934.68362957 1898.53599827  610.93635926]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2411.60838705  1544.27361351 -8220.10378799  2122.55739604]
------
Step:2, Action:North
State  210
Old Q Values:  [28117.2164618   2501.49391281   790.72804752 12604.85761277]
New Q values:  [11753.47403319  2501.49391281   790.72804752 12604.85761277]
Reward: 9  Episode Reward:  18
xxxxx
x .ax
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.05984048e+03 -6.31443789e+02 -3.22965309e-01  1.67062483e+03]
------
Step:3, Action:West
State  138
Old Q Values:  [ 1.05984048e+03 -6.31443789e+02 -3.22965309e-01  1.67062483e+03]
New Q values:  [ 1.05984048e+03 -6.31443789e+02 -3.22965309e-01  1.20588082e+03]
Reward: 9  Episode Reward:  27
xxxxx
x a x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1774.10294981   632.53805907]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1774.10294981   632.53805907]
New Q values:  [ -281.736      -1150.91067548  1070.80542479   632.53805907]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.05984048e+03 -6.31443789e+02 -3.22965309e-01  1.20588082e+03]
------
Step:5, Action:West
State  138
Old Q Values:  [ 1.05984048e+03 -6.31443789e+02 -3.22965309e-01  1.20588082e+03]
New Q values:  [ 1.05984048e+03 -6.31443789e+02 -3.22965309e-01  8.02993954e+02]
Reward: -1  Episode Reward:  25
xxxxx
x a x
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1070.80542479   632.53805907]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1070.80542479   632.53805907]
New Q values:  [ -281.736      -1150.91067548   745.67431529   632.53805907]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.05984048e+03 -6.31443789e+02 -3.22965309e-01  8.02993954e+02]
------
Step:7, Action:North
State  138
Old Q Values:  [ 1.05984048e+03 -6.31443789e+02 -3.22965309e-01  8.02993954e+02]
New Q values:  [ 5.61288339e+02 -6.31443789e+02 -3.22965309e-01  8.02993954e+02]
Reward: -301  Episode Reward:  -277
xxxxx
x  ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.61288339e+02 -6.31443789e+02 -3.22965309e-01  8.02993954e+02]
------
Step:8, Action:West
State  138
Old Q Values:  [ 5.61288339e+02 -6.31443789e+02 -3.22965309e-01  8.02993954e+02]
New Q values:  [ 5.61288339e+02 -6.31443789e+02 -3.22965309e-01  5.44299876e+02]
Reward: -1  Episode Reward:  -278
xxxxx
x a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   745.67431529   632.53805907]
------
Step:9, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2165.30659593   182.00147425]
New Q values:  [-9594.56523706 -8069.05606225  1681.41041929   182.00147425]
Reward: -1  Episode Reward:  -279
xxxxx
x gax
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  2719.62593637   660.86649319   593.06453581]
------
Step:10, Action:South
State  138
Old Q Values:  [ 5.61288339e+02 -6.31443789e+02 -3.22965309e-01  5.44299876e+02]
New Q values:  [ 5.61288339e+02  4.70305000e+02 -3.22965309e-01  5.44299876e+02]
Reward: -1  Episode Reward:  -280
xxxxx
x   x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2411.60838705  1544.27361351 -8220.10378799  2122.55739604]
------
Step:11, Action:North
State  218
Old Q Values:  [2078.90682129 2755.30078316    0.         2598.71043955]
New Q values:  [ 999.34923028 2755.30078316    0.         2598.71043955]
Reward: -1  Episode Reward:  -281
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 5.61288339e+02  4.70305000e+02 -3.22965309e-01  5.44299876e+02]
------
Step:12, Action:North
State  138
Old Q Values:  [ 5.61288339e+02  4.70305000e+02 -3.22965309e-01  5.44299876e+02]
New Q values:  [ 2.12301837e+02  4.70305000e+02 -3.22965309e-01  5.44299876e+02]
Reward: -301  Episode Reward:  -582
xxxxx
x  ax
x.  x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  4.70305000e+02 -3.22965309e-01  5.44299876e+02]
------
Step:13, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  4.70305000e+02 -3.22965309e-01  5.44299876e+02]
New Q values:  [ 2.12301837e+02  4.70305000e+02 -3.22965309e-01  4.40822245e+02]
Reward: -1  Episode Reward:  -583
xxxxx
x a x
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   745.67431529   632.53805907]
------
Step:14, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   611.86548126   740.11029907]
New Q values:  [ -253.44886264 -1902.20915811   385.23769261   740.11029907]
Reward: -1  Episode Reward:  -584
xxxxx
x  ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  4.70305000e+02 -3.22965309e-01  4.40822245e+02]
------
Step:15, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  4.70305000e+02 -3.22965309e-01  4.40822245e+02]
New Q values:  [ 2.12301837e+02  1.01411224e+03 -3.22965309e-01  4.40822245e+02]
Reward: -1  Episode Reward:  -585
xxxxx
x   x
x. ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 999.34923028 2755.30078316    0.         2598.71043955]
------
Step:16, Action:South
State  216
Old Q Values:  [ 2411.60838705  1544.27361351 -8220.10378799  2122.55739604]
New Q values:  [ 2411.60838705  9609.57715248 -8220.10378799  2122.55739604]
Reward: 9  Episode Reward:  -576
xxxxx
x   x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2106.53052773 -6396.61506955 -5588.09647059 29954.89235693]
------
Step:17, Action:West
State  288
Old Q Values:  [ 2106.53052773 -6396.61506955 -5588.09647059 29954.89235693]
New Q values:  [ 2106.53052773 -6396.61506955 -5588.09647059 43301.35379977]
Reward: 9  Episode Reward:  -567
xxxxx
x   x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  1637.72437281   1974.75214244   7446.9983939  104379.98952331]
------
Step:18, Action:West
State  273
Old Q Values:  [  1637.72437281   1974.75214244   7446.9983939  104379.98952331]
New Q values:  [ 1637.72437281  1974.75214244  7446.9983939  56026.44787912]
Reward: 9  Episode Reward:  -558
xxxxx
x  gx
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[47563.50689931 12764.58618105  3851.09595999  1875.31501677]
------
Step:19, Action:North
State  257
Old Q Values:  [47563.50689931 12764.58618105  3851.09595999  1875.31501677]
New Q values:  [105695.95596119  12764.58618105   3851.09595999   1875.31501677]
Reward: 100009  Episode Reward:  99451
xxxxx
x   x
xa gx
x   x
xxxxx
Episode # 1300
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   745.67431529   632.53805907]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1681.41041929   182.00147425]
New Q values:  [-9594.56523706 -8069.05606225  1493.85194863   182.00147425]
Reward: 9  Episode Reward:  9
xxxxx
x.gax
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  2719.62593637   660.86649319   593.06453581]
------
Step:2, Action:South
State  136
Old Q Values:  [-2129.37064562  2719.62593637   660.86649319   593.06453581]
New Q values:  [-2129.37064562  3976.12352029   660.86649319   593.06453581]
Reward: 9  Episode Reward:  18
xxxxx
x. gx
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2411.60838705  9609.57715248 -8220.10378799  2122.55739604]
------
Step:3, Action:South
State  208
Old Q Values:  [ -537.1195292   5054.16053359 -4584.50430574  6997.65951961]
New Q values:  [ -537.1195292  15017.47035336 -4584.50430574  6997.65951961]
Reward: 9  Episode Reward:  27
xxxxx
x.  x
x. gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2106.53052773 -6396.61506955 -5588.09647059 43301.35379977]
------
Step:4, Action:West
State  288
Old Q Values:  [ 2106.53052773 -6396.61506955 -5588.09647059 43301.35379977]
New Q values:  [ 2106.53052773 -6396.61506955 -5588.09647059 34133.87588364]
Reward: 9  Episode Reward:  36
xxxxx
x.  x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  7446.9983939  56026.44787912]
------
Step:5, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  7446.9983939  56026.44787912]
New Q values:  [ 1637.72437281  1974.75214244  7446.9983939  54124.76594   ]
Reward: 9  Episode Reward:  45
xxxxx
x.  x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[105695.95596119  12764.58618105   3851.09595999   1875.31501677]
------
Step:6, Action:North
State  257
Old Q Values:  [105695.95596119  12764.58618105   3851.09595999   1875.31501677]
New Q values:  [72543.53337548 12764.58618105  3851.09595999  1875.31501677]
Reward: 9  Episode Reward:  54
xxxxx
x.  x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[ 82228.67666629  16101.90751562 100865.83663667      0.        ]
------
Step:7, Action:East
State  179
Old Q Values:  [ 82228.67666629  16101.90751562 100865.83663667      0.        ]
New Q values:  [82228.67666629 16101.90751562 48004.41527268     0.        ]
Reward: -1  Episode Reward:  53
xxxxx
x.  x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  2.55289354e+04  1.08295700e+04  2.17939995e+03]
------
Step:8, Action:East
State  195
Old Q Values:  [  38.85388605 5779.07673195 8206.66237355 1169.39963074]
New Q values:  [  38.85388605 5779.07673195 7063.52223325 1169.39963074]
Reward: -1  Episode Reward:  52
xxxxx
x.  x
x  ax
x  gx
xxxxx
Step:9, Action:South
State  210
Old Q Values:  [11753.47403319  2501.49391281   790.72804752 12604.85761277]
New Q values:  [11753.47403319 -1218.54515104   790.72804752 12604.85761277]
Reward: -10001  Episode Reward:  -9949
xxxxx
x.  x
x  ax
x g x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.48540365e+02 7.48863441e+03 3.33862213e+00]
------
Step:1, Action:East
State  180
Old Q Values:  [ 1934.87827965  2441.21181368  5055.98569805 -4966.32149798]
New Q values:  [ 1934.87827965  2441.21181368  3998.87164847 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1710.03508383  6570.2578975    231.67262594]
------
Step:2, Action:East
State  200
Old Q Values:  [ 169.9257398  3934.68362957 1898.53599827  610.93635926]
New Q values:  [ 169.9257398  3934.68362957 3647.68754505  610.93635926]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2411.60838705  9609.57715248 -8220.10378799  2122.55739604]
------
Step:3, Action:South
State  216
Old Q Values:  [ 2411.60838705  9609.57715248 -8220.10378799  2122.55739604]
New Q values:  [ 2411.60838705 14089.39362609 -8220.10378799  2122.55739604]
Reward: 9  Episode Reward:  27
xxxxx
x. gx
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2106.53052773 -6396.61506955 -5588.09647059 34133.87588364]
------
Step:4, Action:West
State  288
Old Q Values:  [ 2106.53052773 -6396.61506955 -5588.09647059 34133.87588364]
New Q values:  [ 2106.53052773 -6396.61506955 -5588.09647059 32978.6791097 ]
Reward: 9  Episode Reward:  36
xxxxx
x.g.x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799 64399.09585415  6910.31426142]
------
Step:5, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  7446.9983939  54124.76594   ]
New Q values:  [ 1637.72437281  1974.75214244 12871.80309047 54124.76594   ]
Reward: -1  Episode Reward:  35
xxxxx
x. gx
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2106.53052773 -6396.61506955 -5588.09647059 32978.6791097 ]
------
Step:6, Action:West
State  288
Old Q Values:  [ 2106.53052773 -6396.61506955 -5588.09647059 32978.6791097 ]
New Q values:  [ 2106.53052773 -6396.61506955 -5588.09647059 29428.30142588]
Reward: -1  Episode Reward:  34
xxxxx
x. .x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244 12871.80309047 54124.76594   ]
------
Step:7, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 12871.80309047 54124.76594   ]
New Q values:  [ 1637.72437281  1974.75214244 12871.80309047 43418.36638864]
Reward: 9  Episode Reward:  43
xxxxx
x. .x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[72543.53337548 12764.58618105  3851.09595999  1875.31501677]
------
Step:8, Action:North
State  257
Old Q Values:  [72543.53337548 12764.58618105  3851.09595999  1875.31501677]
New Q values:  [55681.96655166 12764.58618105  3851.09595999  1875.31501677]
Reward: -1  Episode Reward:  42
xxxxx
x. .x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[88883.84400489  5488.43861631  8582.42170498     0.        ]
------
Step:9, Action:North
State  183
Old Q Values:  [ 1664.53073035   301.45090469 12437.34178406  1554.80203889]
New Q values:  [ 2343.43228624   301.45090469 12437.34178406  1554.80203889]
Reward: 9  Episode Reward:  51
xxxxx
xa .x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 5574.066647    238.35800069    0.        ]
------
Step:10, Action:South
State  103
Old Q Values:  [ 221.30610858 5574.066647    238.35800069    0.        ]
New Q values:  [ 221.30610858 5960.22919402  238.35800069    0.        ]
Reward: -1  Episode Reward:  50
xxxxx
x  .x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2343.43228624   301.45090469 12437.34178406  1554.80203889]
------
Step:11, Action:East
State  181
Old Q Values:  [4.04754886e+02 1.48540365e+02 7.48863441e+03 3.33862213e+00]
New Q values:  [  404.75488615   148.54036549 -1034.06886838     3.33862213]
Reward: -10001  Episode Reward:  -9951
xxxxx
x  .x
x g x
x   x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 23793.8080187  -3827.07475774   767.35890262]
------
Step:1, Action:South
State  193
Old Q Values:  [-5922.26708831 23793.8080187  -3827.07475774   767.35890262]
New Q values:  [-5922.26708831 22548.43312407 -3827.07475774   767.35890262]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244 12871.80309047 43418.36638864]
------
Step:2, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 12871.80309047 43418.36638864]
New Q values:  [ 1637.72437281  1974.75214244 12871.80309047 17970.14212255]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1991.31855696   26.73544252  790.826661    -12.17474163]
------
Step:3, Action:North
State  261
Old Q Values:  [1991.31855696   26.73544252  790.826661    -12.17474163]
New Q values:  [4533.129958     26.73544252  790.826661    -12.17474163]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2343.43228624   301.45090469 12437.34178406  1554.80203889]
------
Step:4, Action:East
State  181
Old Q Values:  [  404.75488615   148.54036549 -1034.06886838     3.33862213]
New Q values:  [4.04754886e+02 1.48540365e+02 6.35030239e+03 3.33862213e+00]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 22548.43312407 -3827.07475774   767.35890262]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.74268421e+03 2.94054734e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 2.20162024e+04 2.94054734e+04 2.45392999e+03]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799 64399.09585415  6910.31426142]
------
Step:6, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 12871.80309047 17970.14212255]
New Q values:  [ 1637.72437281  1974.75214244 13982.61166395 17970.14212255]
Reward: 9  Episode Reward:  34
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2106.53052773 -6396.61506955 -5588.09647059 29428.30142588]
------
Step:7, Action:West
State  288
Old Q Values:  [ 2106.53052773 -6396.61506955 -5588.09647059 29428.30142588]
New Q values:  [ 2106.53052773 -6396.61506955 -5588.09647059 17161.76320712]
Reward: -1  Episode Reward:  33
xxxxx
x..gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244 13982.61166395 17970.14212255]
------
Step:8, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 13982.61166395 17970.14212255]
New Q values:  [ 1637.72437281  1974.75214244 13982.61166395  8547.39583642]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4533.129958     26.73544252  790.826661    -12.17474163]
------
Step:9, Action:North
State  261
Old Q Values:  [4533.129958     26.73544252  790.826661    -12.17474163]
New Q values:  [3717.74270016   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  31
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.48540365e+02 6.35030239e+03 3.33862213e+00]
------
Step:10, Action:North
State  180
Old Q Values:  [ 1934.87827965  2441.21181368  3998.87164847 -4966.32149798]
New Q values:  [ 3772.76024382  2441.21181368  3998.87164847 -4966.32149798]
Reward: 9  Episode Reward:  40
xxxxx
xa..x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        9978.02977319 2846.56389321 -180.6       ]
------
Step:11, Action:East
State  100
Old Q Values:  [   0.         4228.78310806  443.65288404    0.        ]
New Q values:  [   0.         4228.78310806  493.77732793    0.        ]
Reward: 9  Episode Reward:  49
xxxxx
xga.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:NW
[   0.         1036.38724771    0.            0.        ]
------
Step:12, Action:South
State  118
Old Q Values:  [ 617.06804554 9204.39250305    0.          503.49427758]
New Q values:  [ 617.06804554 5652.23437047    0.          503.49427758]
Reward: -1  Episode Reward:  48
xxxxx
x  .x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1710.03508383  6570.2578975    231.67262594]
------
Step:13, Action:East
State  196
Old Q Values:  [-2469.90645144  1710.03508383  6570.2578975    231.67262594]
New Q values:  [-2469.90645144  1710.03508383  7132.74426501   231.67262594]
Reward: -1  Episode Reward:  47
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ -537.1195292  15017.47035336 -4584.50430574  6997.65951961]
------
Step:14, Action:South
State  208
Old Q Values:  [ -537.1195292  15017.47035336 -4584.50430574  6997.65951961]
New Q values:  [ -537.1195292  11154.91710348 -4584.50430574  6997.65951961]
Reward: -1  Episode Reward:  46
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2106.53052773 -6396.61506955 -5588.09647059 17161.76320712]
------
Step:15, Action:West
State  288
Old Q Values:  [ 2106.53052773 -6396.61506955 -5588.09647059 17161.76320712]
New Q values:  [ 2106.53052773 -6396.61506955 -5588.09647059  7844.25289277]
Reward: -1  Episode Reward:  45
xxxxx
x  .x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3267.15869975 -5807.06396197  2305.08372417  2150.35590639]
------
Step:16, Action:North
State  276
Old Q Values:  [ 3267.15869975 -5807.06396197  2305.08372417  2150.35590639]
New Q values:  [ 2586.70928896 -5807.06396197  2305.08372417  2150.35590639]
Reward: -1  Episode Reward:  44
xxxxx
x  .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.7887208e-01 -2.0061023e+02  4.2681527e+03  0.0000000e+00]
------
Step:17, Action:East
State  199
Old Q Values:  [  14.86214194 1549.78616729 7748.13858789 1915.70494401]
New Q values:  [  14.86214194 1549.78616729 6880.11271899 1915.70494401]
Reward: -1  Episode Reward:  43
xxxxx
x  .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11753.47403319 -1218.54515104   790.72804752 12604.85761277]
------
Step:18, Action:West
State  210
Old Q Values:  [11753.47403319 -1218.54515104   790.72804752 12604.85761277]
New Q values:  [11753.47403319 -1218.54515104   790.72804752  6321.78885416]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.7887208e-01 -2.0061023e+02  4.2681527e+03  0.0000000e+00]
------
Step:19, Action:East
State  198
Old Q Values:  [-2.7887208e-01 -2.0061023e+02  4.2681527e+03  0.0000000e+00]
New Q values:  [-2.78872080e-01 -2.00610230e+02  5.23270329e+03  0.00000000e+00]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11753.47403319 -1218.54515104   790.72804752  6321.78885416]
------
Step:20, Action:North
State  208
Old Q Values:  [ -537.1195292  11154.91710348 -4584.50430574  6997.65951961]
New Q values:  [89499.25248405 11154.91710348 -4584.50430574  6997.65951961]
Reward: 100009  Episode Reward:  100050
xxxxx
x  ax
xg  x
x   x
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244 13982.61166395  8547.39583642]
------
Step:1, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 13982.61166395  8547.39583642]
New Q values:  [ 1637.72437281  1974.75214244 13982.61166395  4539.68114462]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3717.74270016   26.73544252  790.826661    -12.17474163]
------
Step:2, Action:North
State  261
Old Q Values:  [3717.74270016   26.73544252  790.826661    -12.17474163]
New Q values:  [3397.58779702   26.73544252  790.826661    -12.17474163]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.48540365e+02 6.35030239e+03 3.33862213e+00]
------
Step:3, Action:North
State  180
Old Q Values:  [ 3772.76024382  2441.21181368  3998.87164847 -4966.32149798]
New Q values:  [ 4507.91302948  2441.21181368  3998.87164847 -4966.32149798]
Reward: 9  Episode Reward:  27
xxxxx
xa..x
xg..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        9978.02977319 2846.56389321 -180.6       ]
------
Step:4, Action:East
State  103
Old Q Values:  [ 221.30610858 5960.22919402  238.35800069    0.        ]
New Q values:  [ 221.30610858 5960.22919402 1796.41351142    0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x a.x
x g.x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[ 617.06804554 5652.23437047    0.          503.49427758]
------
Step:5, Action:North
State  118
Old Q Values:  [ 617.06804554 5652.23437047    0.          503.49427758]
New Q values:  [1761.89752936 5652.23437047    0.          503.49427758]
Reward: -301  Episode Reward:  -265
xxxxx
x a.x
xg..x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[1761.89752936 5652.23437047    0.          503.49427758]
------
Step:6, Action:South
State  118
Old Q Values:  [1761.89752936 5652.23437047    0.          503.49427758]
New Q values:  [1761.89752936 3836.1047348     0.          503.49427758]
Reward: 9  Episode Reward:  -256
xxxxx
x  .x
x a.x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  5.23270329e+03  0.00000000e+00]
------
Step:7, Action:East
State  196
Old Q Values:  [-2469.90645144  1710.03508383  7132.74426501   231.67262594]
New Q values:  [-2469.90645144  1710.03508383 29708.27345122   231.67262594]
Reward: 9  Episode Reward:  -247
xxxxx
x  .x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[89499.25248405 11154.91710348 -4584.50430574  6997.65951961]
------
Step:8, Action:North
State  208
Old Q Values:  [89499.25248405 11154.91710348 -4584.50430574  6997.65951961]
New Q values:  [132301.75679862  11154.91710348  -4584.50430574   6997.65951961]
Reward: 100009  Episode Reward:  99762
xxxxx
xg ax
x   x
x   x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 22548.43312407 -3827.07475774   767.35890262]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.20162024e+04 2.94054734e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 2.81316097e+04 2.94054734e+04 2.45392999e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799 64399.09585415  6910.31426142]
------
Step:2, Action:East
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799 64399.09585415  6910.31426142]
New Q values:  [ 3995.40702467 -8521.23367799 28118.31420949  6910.31426142]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2106.53052773 -6396.61506955 -5588.09647059  7844.25289277]
------
Step:3, Action:West
State  288
Old Q Values:  [ 2106.53052773 -6396.61506955 -5588.09647059  7844.25289277]
New Q values:  [ 2106.53052773 -6396.61506955 -5588.09647059 11572.59541996]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x. .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799 28118.31420949  6910.31426142]
------
Step:4, Action:East
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799 28118.31420949  6910.31426142]
New Q values:  [ 3995.40702467 -8521.23367799 14718.50430978  6910.31426142]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2106.53052773 -6396.61506955 -5588.09647059 11572.59541996]
------
Step:5, Action:West
State  288
Old Q Values:  [ 2106.53052773 -6396.61506955 -5588.09647059 11572.59541996]
New Q values:  [ 2106.53052773 -6396.61506955 -5588.09647059  9043.98946092]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
xg .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799 14718.50430978  6910.31426142]
------
Step:6, Action:East
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799 14718.50430978  6910.31426142]
New Q values:  [ 3995.40702467 -8521.23367799  8599.99856219  6910.31426142]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x. .x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2106.53052773 -6396.61506955 -5588.09647059  9043.98946092]
------
Step:7, Action:West
State  288
Old Q Values:  [ 2106.53052773 -6396.61506955 -5588.09647059  9043.98946092]
New Q values:  [ 2106.53052773 -6396.61506955 -5588.09647059   196.99535302]
Reward: -10001  Episode Reward:  -9987
xxxxx
x.. x
x. .x
x.g x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   385.23769261   740.11029907]
------
Step:1, Action:West
State  121
Old Q Values:  [    0.             0.         -8209.41191864   294.84238464]
New Q values:  [    0.             0.         -8209.41191864   273.64612079]
Reward: 9  Episode Reward:  9
xxxxx
xa gx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   501.03055643 -2165.66138672  -180.6       ]
------
Step:2, Action:South
State  109
Old Q Values:  [ -241.10880094   501.03055643 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  2110.90293953 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  18
xxxxx
x g.x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.48540365e+02 6.35030239e+03 3.33862213e+00]
------
Step:3, Action:East
State  188
Old Q Values:  [-6523.78898263  2790.67422621  1944.07988228     0.        ]
New Q values:  [-6523.78898263  2790.67422621  1963.43704178     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  3934.68362957 3647.68754505  610.93635926]
------
Step:4, Action:South
State  196
Old Q Values:  [-2469.90645144  1710.03508383 29708.27345122   231.67262594]
New Q values:  [-2469.90645144  1465.42682022 29708.27345122   231.67262594]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
xg  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 2586.70928896 -5807.06396197  2305.08372417  2150.35590639]
------
Step:5, Action:North
State  276
Old Q Values:  [ 2586.70928896 -5807.06396197  2305.08372417  2150.35590639]
New Q values:  [ 2603.89470219 -5807.06396197  2305.08372417  2150.35590639]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  5.23270329e+03  0.00000000e+00]
------
Step:6, Action:East
State  196
Old Q Values:  [-2469.90645144  1465.42682022 29708.27345122   231.67262594]
New Q values:  [-2469.90645144  1465.42682022 16109.52746831   231.67262594]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
xg ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2411.60838705 14089.39362609 -8220.10378799  2122.55739604]
------
Step:7, Action:South
State  208
Old Q Values:  [132301.75679862  11154.91710348  -4584.50430574   6997.65951961]
New Q values:  [132301.75679862   5099.32599971  -4584.50430574   6997.65951961]
Reward: 9  Episode Reward:  43
xxxxx
xg .x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2106.53052773 -6396.61506955 -5588.09647059   196.99535302]
------
Step:8, Action:North
State  288
Old Q Values:  [ 2106.53052773 -6396.61506955 -5588.09647059   196.99535302]
New Q values:  [40532.53925068 -6396.61506955 -5588.09647059   196.99535302]
Reward: -1  Episode Reward:  42
xxxxx
x g.x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[132301.75679862   5099.32599971  -4584.50430574   6997.65951961]
------
Step:9, Action:North
State  216
Old Q Values:  [ 2411.60838705 14089.39362609 -8220.10378799  2122.55739604]
New Q values:  [ 1274.27702535 14089.39362609 -8220.10378799  2122.55739604]
Reward: 9  Episode Reward:  51
xxxxx
x  ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  1.01411224e+03 -3.22965309e-01  4.40822245e+02]
------
Step:10, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  1.01411224e+03 -3.22965309e-01  4.40822245e+02]
New Q values:  [ 2.12301837e+02  1.23163513e+03 -3.22965309e-01  4.40822245e+02]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 999.34923028 2755.30078316    0.         2598.71043955]
------
Step:11, Action:South
State  216
Old Q Values:  [ 1274.27702535 14089.39362609 -8220.10378799  2122.55739604]
New Q values:  [ 1274.27702535 17794.91922564 -8220.10378799  2122.55739604]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[40532.53925068 -6396.61506955 -5588.09647059   196.99535302]
------
Step:12, Action:North
State  288
Old Q Values:  [40532.53925068 -6396.61506955 -5588.09647059   196.99535302]
New Q values:  [21550.89146796 -6396.61506955 -5588.09647059   196.99535302]
Reward: -1  Episode Reward:  48
xxxxx
x g x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1274.27702535 17794.91922564 -8220.10378799  2122.55739604]
------
Step:13, Action:South
State  216
Old Q Values:  [ 1274.27702535 17794.91922564 -8220.10378799  2122.55739604]
New Q values:  [ 1274.27702535 13582.63513064 -8220.10378799  2122.55739604]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21550.89146796 -6396.61506955 -5588.09647059   196.99535302]
------
Step:14, Action:North
State  288
Old Q Values:  [21550.89146796 -6396.61506955 -5588.09647059   196.99535302]
New Q values:  [12694.54712638 -6396.61506955 -5588.09647059   196.99535302]
Reward: -1  Episode Reward:  46
xxxxx
x g x
x  ax
x.  x
xxxxx
Step:15, Action:East
State  216
Old Q Values:  [ 1274.27702535 13582.63513064 -8220.10378799  2122.55739604]
New Q values:  [ 1274.27702535 13582.63513064   606.149024    2122.55739604]
Reward: -301  Episode Reward:  -255
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1274.27702535 13582.63513064   606.149024    2122.55739604]
------
Step:16, Action:South
State  216
Old Q Values:  [ 1274.27702535 13582.63513064   606.149024    2122.55739604]
New Q values:  [1274.27702535 9240.81819017  606.149024   2122.55739604]
Reward: -1  Episode Reward:  -256
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12694.54712638 -6396.61506955 -5588.09647059   196.99535302]
------
Step:17, Action:North
State  288
Old Q Values:  [12694.54712638 -6396.61506955 -5588.09647059   196.99535302]
New Q values:  [ 7849.4643076  -6396.61506955 -5588.09647059   196.99535302]
Reward: -1  Episode Reward:  -257
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1274.27702535 9240.81819017  606.149024   2122.55739604]
------
Step:18, Action:South
State  216
Old Q Values:  [1274.27702535 9240.81819017  606.149024   2122.55739604]
New Q values:  [1274.27702535 6050.56656835  606.149024   2122.55739604]
Reward: -1  Episode Reward:  -258
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7849.4643076  -6396.61506955 -5588.09647059   196.99535302]
------
Step:19, Action:North
State  288
Old Q Values:  [ 7849.4643076  -6396.61506955 -5588.09647059   196.99535302]
New Q values:  [ 4954.35569355 -6396.61506955 -5588.09647059   196.99535302]
Reward: -1  Episode Reward:  -259
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1274.27702535 6050.56656835  606.149024   2122.55739604]
------
Step:20, Action:South
State  216
Old Q Values:  [1274.27702535 6050.56656835  606.149024   2122.55739604]
New Q values:  [1274.27702535 3905.9333354   606.149024   2122.55739604]
Reward: -1  Episode Reward:  -260
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4954.35569355 -6396.61506955 -5588.09647059   196.99535302]
------
Step:21, Action:West
State  288
Old Q Values:  [ 4954.35569355 -6396.61506955 -5588.09647059   196.99535302]
New Q values:  [ 4954.35569355 -6396.61506955 -5588.09647059  4272.9816404 ]
Reward: -1  Episode Reward:  -261
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244 13982.61166395  4539.68114462]
------
Step:22, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 13982.61166395  4539.68114462]
New Q values:  [ 1637.72437281  1974.75214244 13982.61166395 78525.86242334]
Reward: 100009  Episode Reward:  99748
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4954.35569355 -6396.61506955 -5588.09647059  4272.9816404 ]
------
Step:1, Action:North
State  288
Old Q Values:  [ 4954.35569355 -6396.61506955 -5588.09647059  4272.9816404 ]
New Q values:  [ 5513.18448737 -6396.61506955 -5588.09647059  4272.9816404 ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11753.47403319 -1218.54515104   790.72804752  6321.78885416]
------
Step:2, Action:North
State  210
Old Q Values:  [11753.47403319 -1218.54515104   790.72804752  6321.78885416]
New Q values:  [ 5076.28015197 -1218.54515104   790.72804752  6321.78885416]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  1.23163513e+03 -3.22965309e-01  4.40822245e+02]
------
Step:3, Action:South
State  130
Old Q Values:  [59164.41397681  5276.29178842  -180.00807518 99029.00098578]
New Q values:  [59164.41397681 41800.44375495  -180.00807518 99029.00098578]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[132301.75679862   5099.32599971  -4584.50430574   6997.65951961]
------
Step:4, Action:North
State  208
Old Q Values:  [132301.75679862   5099.32599971  -4584.50430574   6997.65951961]
New Q values:  [53289.59325814  5099.32599971 -4584.50430574  6997.65951961]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  1.23163513e+03 -3.22965309e-01  4.40822245e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  1.23163513e+03 -3.22965309e-01  4.40822245e+02]
New Q values:  [ 2.12301837e+02  1.23163513e+03 -3.22965309e-01  4.03761988e+02]
Reward: 9  Episode Reward:  25
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   385.23769261   740.11029907]
------
Step:6, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   385.23769261   740.11029907]
New Q values:  [ -253.44886264 -1902.20915811   385.23769261   685.71535638]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1280.9041225   496.66968841 -252.78192178]
------
Step:7, Action:South
State  107
Old Q Values:  [-252.35169558 1280.9041225   496.66968841 -252.78192178]
New Q values:  [-252.35169558 4248.96418422  496.66968841 -252.78192178]
Reward: 9  Episode Reward:  43
xxxxx
x   x
xa. x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2343.43228624   301.45090469 12437.34178406  1554.80203889]
------
Step:8, Action:East
State  187
Old Q Values:  [ 320.07341842    0.         1824.39604684    0.        ]
New Q values:  [ 320.07341842    0.         1283.40404137    0.        ]
Reward: 9  Episode Reward:  52
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:9, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.55289354e+04  1.08295700e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  2.55289354e+04  6.22776464e+03  2.17939995e+03]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5076.28015197 -1218.54515104   790.72804752  6321.78885416]
------
Step:10, Action:West
State  218
Old Q Values:  [ 999.34923028 2755.30078316    0.         2598.71043955]
New Q values:  [ 999.34923028 2755.30078316    0.         1587.12979845]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:11, Action:East
State  203
Old Q Values:  [3.60604218e+00 2.47391199e+03 1.49157236e+03 4.59156348e+03]
New Q values:  [3.60604218e+00 2.47391199e+03 1.42261918e+03 4.59156348e+03]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 999.34923028 2755.30078316    0.         1587.12979845]
------
Step:12, Action:West
State  218
Old Q Values:  [ 999.34923028 2755.30078316    0.         1587.12979845]
New Q values:  [ 999.34923028 2755.30078316    0.         1182.49754201]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:13, Action:East
State  194
Old Q Values:  [-6.00000000e-01  2.55289354e+04  6.22776464e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  2.55289354e+04  4.38704251e+03  2.17939995e+03]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5076.28015197 -1218.54515104   790.72804752  6321.78885416]
------
Step:14, Action:West
State  216
Old Q Values:  [1274.27702535 3905.9333354   606.149024   2122.55739604]
New Q values:  [1274.27702535 3905.9333354   606.149024   2028.82804729]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  3934.68362957 3647.68754505  610.93635926]
------
Step:15, Action:South
State  194
Old Q Values:  [-6.00000000e-01  2.55289354e+04  4.38704251e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  1.27909737e+04  4.38704251e+03  2.17939995e+03]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  8599.99856219  6910.31426142]
------
Step:16, Action:East
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  8599.99856219  6910.31426142]
New Q values:  [ 3995.40702467 -8521.23367799  5093.35477109  6910.31426142]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5513.18448737 -6396.61506955 -5588.09647059  4272.9816404 ]
------
Step:17, Action:North
State  288
Old Q Values:  [ 5513.18448737 -6396.61506955 -5588.09647059  4272.9816404 ]
New Q values:  [ 4101.2104512  -6396.61506955 -5588.09647059  4272.9816404 ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5076.28015197 -1218.54515104   790.72804752  6321.78885416]
------
Step:18, Action:West
State  218
Old Q Values:  [ 999.34923028 2755.30078316    0.         1182.49754201]
New Q values:  [ 999.34923028 2755.30078316    0.         1020.64463944]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:19, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.27909737e+04  4.38704251e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  1.27909737e+04  3.65075366e+03  2.17939995e+03]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5076.28015197 -1218.54515104   790.72804752  6321.78885416]
------
Step:20, Action:West
State  218
Old Q Values:  [ 999.34923028 2755.30078316    0.         1020.64463944]
New Q values:  [ 999.34923028 2755.30078316    0.          955.90347841]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:21, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.27909737e+04  3.65075366e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  1.27909737e+04  3.35623812e+03  2.17939995e+03]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5076.28015197 -1218.54515104   790.72804752  6321.78885416]
------
Step:22, Action:West
State  218
Old Q Values:  [ 999.34923028 2755.30078316    0.          955.90347841]
New Q values:  [ 999.34923028 2755.30078316    0.          930.00701399]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:23, Action:East
State  200
Old Q Values:  [ 169.9257398  3934.68362957 3647.68754505  610.93635926]
New Q values:  [ 169.9257398  3934.68362957 2630.25501864  610.93635926]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1274.27702535 3905.9333354   606.149024   2028.82804729]
------
Step:24, Action:South
State  216
Old Q Values:  [1274.27702535 3905.9333354   606.149024   2028.82804729]
New Q values:  [1274.27702535 2843.66782628  606.149024   2028.82804729]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4101.2104512  -6396.61506955 -5588.09647059  4272.9816404 ]
------
Step:25, Action:West
State  288
Old Q Values:  [ 4101.2104512  -6396.61506955 -5588.09647059  4272.9816404 ]
New Q values:  [ 4101.2104512  -6396.61506955 -5588.09647059 25266.35138316]
Reward: -1  Episode Reward:  35
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244 13982.61166395 78525.86242334]
------
Step:26, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 13982.61166395 78525.86242334]
New Q values:  [  1637.72437281   1974.75214244  13982.61166395 108120.33493483]
Reward: 100009  Episode Reward:  100044
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  1.23163513e+03 -3.22965309e-01  4.03761988e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  1.23163513e+03 -3.22965309e-01  4.03761988e+02]
New Q values:  [ 2.12301837e+02  2.39459071e+03 -3.22965309e-01  4.03761988e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5076.28015197 -1218.54515104   790.72804752  6321.78885416]
------
Step:2, Action:West
State  216
Old Q Values:  [1274.27702535 2843.66782628  606.149024   2028.82804729]
New Q values:  [1274.27702535 2843.66782628  606.149024   9638.57324358]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.81316097e+04 2.94054734e+04 2.45392999e+03]
------
Step:3, Action:East
State  200
Old Q Values:  [ 169.9257398  3934.68362957 2630.25501864  610.93635926]
New Q values:  [ 169.9257398  3934.68362957 3943.07398053  610.93635926]
Reward: -1  Episode Reward:  17
xxxxx
xg. x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1274.27702535 2843.66782628  606.149024   9638.57324358]
------
Step:4, Action:West
State  216
Old Q Values:  [1274.27702535 2843.66782628  606.149024   9638.57324358]
New Q values:  [ 1274.27702535  2843.66782628   606.149024   12676.4713221 ]
Reward: -1  Episode Reward:  16
xxxxx
x.. x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.81316097e+04 2.94054734e+04 2.45392999e+03]
------
Step:5, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.27909737e+04  3.35623812e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  1.27909737e+04  3.23843190e+03  2.17939995e+03]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 5076.28015197 -1218.54515104   790.72804752  6321.78885416]
------
Step:6, Action:West
State  210
Old Q Values:  [ 5076.28015197 -1218.54515104   790.72804752  6321.78885416]
New Q values:  [ 5076.28015197 -1218.54515104   790.72804752  6365.40765947]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.27909737e+04  3.23843190e+03  2.17939995e+03]
------
Step:7, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.81316097e+04 2.94054734e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 2.81316097e+04 1.55645308e+04 2.45392999e+03]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1274.27702535  2843.66782628   606.149024   12676.4713221 ]
------
Step:8, Action:South
State  208
Old Q Values:  [53289.59325814  5099.32599971 -4584.50430574  6997.65951961]
New Q values:  [53289.59325814  9625.03581483 -4584.50430574  6997.65951961]
Reward: 9  Episode Reward:  22
xxxxx
x.. x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4101.2104512  -6396.61506955 -5588.09647059 25266.35138316]
------
Step:9, Action:West
State  288
Old Q Values:  [ 4101.2104512  -6396.61506955 -5588.09647059 25266.35138316]
New Q values:  [ 4101.2104512  -6396.61506955 -5588.09647059 12185.03483169]
Reward: 9  Episode Reward:  31
xxxxx
x.. x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  5093.35477109  6910.31426142]
------
Step:10, Action:West
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  5093.35477109  6910.31426142]
New Q values:  [ 3995.40702467 -8521.23367799  5093.35477109 19474.11567006]
Reward: 9  Episode Reward:  40
xxxxx
x.g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[55681.96655166 12764.58618105  3851.09595999  1875.31501677]
------
Step:11, Action:North
State  257
Old Q Values:  [55681.96655166 12764.58618105  3851.09595999  1875.31501677]
New Q values:  [48937.33982213 12764.58618105  3851.09595999  1875.31501677]
Reward: -1  Episode Reward:  39
xxxxx
x..gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[88883.84400489  5488.43861631  8582.42170498     0.        ]
------
Step:12, Action:North
State  177
Old Q Values:  [88883.84400489  5488.43861631  8582.42170498     0.        ]
New Q values:  [60476.05138135  5488.43861631  8582.42170498     0.        ]
Reward: 9  Episode Reward:  48
xxxxx
xag x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:NE
[    0.         83057.04593133     0.             0.        ]
------
Step:13, Action:South
State  101
Old Q Values:  [  0.         523.74497759   0.           0.        ]
New Q values:  [   0.       2113.988708    0.          0.      ]
Reward: -1  Episode Reward:  47
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.48540365e+02 6.35030239e+03 3.33862213e+00]
------
Step:14, Action:East
State  177
Old Q Values:  [60476.05138135  5488.43861631  8582.42170498     0.        ]
New Q values:  [60476.05138135  5488.43861631 11871.85160192     0.        ]
Reward: -1  Episode Reward:  46
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.81316097e+04 1.55645308e+04 2.45392999e+03]
------
Step:15, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.81316097e+04 1.55645308e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.70942786e+04 1.55645308e+04 2.45392999e+03]
Reward: -1  Episode Reward:  45
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  5093.35477109 19474.11567006]
------
Step:16, Action:West
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  5093.35477109 19474.11567006]
New Q values:  [ 3995.40702467 -8521.23367799  5093.35477109  8983.31297068]
Reward: -1  Episode Reward:  44
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3980.88900885 -5704.51612281  3130.20179729 -5679.36893145]
------
Step:17, Action:East
State  260
Old Q Values:  [ 3980.88900885 -5704.51612281  3130.20179729 -5679.36893145]
New Q values:  [ 3980.88900885 -5704.51612281  3946.47461012 -5679.36893145]
Reward: -1  Episode Reward:  43
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  5093.35477109  8983.31297068]
------
Step:18, Action:West
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  5093.35477109  8983.31297068]
New Q values:  [ 3995.40702467 -8521.23367799  5093.35477109 18273.92713491]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[48937.33982213 12764.58618105  3851.09595999  1875.31501677]
------
Step:19, Action:North
State  261
Old Q Values:  [3397.58779702   26.73544252  790.826661    -12.17474163]
New Q values:  [3263.52583577   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  41
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.48540365e+02 6.35030239e+03 3.33862213e+00]
------
Step:20, Action:East
State  177
Old Q Values:  [60476.05138135  5488.43861631 11871.85160192     0.        ]
New Q values:  [60476.05138135  5488.43861631  9876.42421904     0.        ]
Reward: -1  Episode Reward:  40
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.70942786e+04 1.55645308e+04 2.45392999e+03]
------
Step:21, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.70942786e+04 1.55645308e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.23192896e+04 1.55645308e+04 2.45392999e+03]
Reward: -1  Episode Reward:  39
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  5093.35477109 18273.92713491]
------
Step:22, Action:West
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  5093.35477109 18273.92713491]
New Q values:  [ 3995.40702467 -8521.23367799  5093.35477109  8288.0286047 ]
Reward: -1  Episode Reward:  38
xxxxx
x . x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3263.52583577   26.73544252  790.826661    -12.17474163]
------
Step:23, Action:North
State  261
Old Q Values:  [3263.52583577   26.73544252  790.826661    -12.17474163]
New Q values:  [5036.01286953   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  37
xxxxx
x . x
xa  x
x  gx
xxxxx
Step:24, Action:South
State  181
Old Q Values:  [4.04754886e+02 1.48540365e+02 6.35030239e+03 3.33862213e+00]
New Q values:  [4.04754886e+02 1.56962001e+03 6.35030239e+03 3.33862213e+00]
Reward: -1  Episode Reward:  36
xxxxx
x . x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5036.01286953   26.73544252  790.826661    -12.17474163]
------
Step:25, Action:North
State  261
Old Q Values:  [5036.01286953   26.73544252  790.826661    -12.17474163]
New Q values:  [3918.89586477   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  35
xxxxx
x . x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.56962001e+03 6.35030239e+03 3.33862213e+00]
------
Step:26, Action:South
State  177
Old Q Values:  [60476.05138135  5488.43861631  9876.42421904     0.        ]
New Q values:  [60476.05138135 16875.97739316  9876.42421904     0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[48937.33982213 12764.58618105  3851.09595999  1875.31501677]
------
Step:27, Action:North
State  261
Old Q Values:  [3918.89586477   26.73544252  790.826661    -12.17474163]
New Q values:  [3472.04906287   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  33
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.56962001e+03 6.35030239e+03 3.33862213e+00]
------
Step:28, Action:East
State  181
Old Q Values:  [4.04754886e+02 1.56962001e+03 6.35030239e+03 3.33862213e+00]
New Q values:  [4.04754886e+02 1.56962001e+03 9.30405089e+03 3.33862213e+00]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 22548.43312407 -3827.07475774   767.35890262]
------
Step:29, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.23192896e+04 1.55645308e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 7.41352441e+03 1.55645308e+04 2.45392999e+03]
Reward: -1  Episode Reward:  31
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  5093.35477109  8288.0286047 ]
------
Step:30, Action:West
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  5093.35477109  8288.0286047 ]
New Q values:  [ 3995.40702467 -8521.23367799  5093.35477109  4356.22616074]
Reward: -1  Episode Reward:  30
xxxxx
x . x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3472.04906287   26.73544252  790.826661    -12.17474163]
------
Step:31, Action:North
State  261
Old Q Values:  [3472.04906287   26.73544252  790.826661    -12.17474163]
New Q values:  [4179.4348931    26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  29
xxxxx
x . x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.56962001e+03 9.30405089e+03 3.33862213e+00]
------
Step:32, Action:South
State  183
Old Q Values:  [ 2343.43228624   301.45090469 12437.34178406  1554.80203889]
New Q values:  [ 2343.43228624  1373.81082981 12437.34178406  1554.80203889]
Reward: -1  Episode Reward:  28
xxxxx
x . x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4179.4348931    26.73544252  790.826661    -12.17474163]
------
Step:33, Action:North
State  261
Old Q Values:  [4179.4348931    26.73544252  790.826661    -12.17474163]
New Q values:  [5402.37649246   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  27
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2343.43228624  1373.81082981 12437.34178406  1554.80203889]
------
Step:34, Action:East
State  181
Old Q Values:  [4.04754886e+02 1.56962001e+03 9.30405089e+03 3.33862213e+00]
New Q values:  [4.04754886e+02 1.56962001e+03 1.04855503e+04 3.33862213e+00]
Reward: -1  Episode Reward:  26
xxxxx
x . x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 22548.43312407 -3827.07475774   767.35890262]
------
Step:35, Action:South
State  192
Old Q Values:  [3.89777037e-01 7.41352441e+03 1.55645308e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 4.49281620e+03 1.55645308e+04 2.45392999e+03]
Reward: -1  Episode Reward:  25
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  5093.35477109  4356.22616074]
------
Step:36, Action:East
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  5093.35477109  4356.22616074]
New Q values:  [ 3995.40702467 -8521.23367799  5692.25235794  4356.22616074]
Reward: -1  Episode Reward:  24
xxxxx
x . x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4101.2104512  -6396.61506955 -5588.09647059 12185.03483169]
------
Step:37, Action:West
State  288
Old Q Values:  [ 4101.2104512  -6396.61506955 -5588.09647059 12185.03483169]
New Q values:  [ 4101.2104512  -6396.61506955 -5588.09647059  6581.08964006]
Reward: -1  Episode Reward:  23
xxxxx
x . x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  5692.25235794  4356.22616074]
------
Step:38, Action:East
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  5692.25235794  4356.22616074]
New Q values:  [ 3995.40702467 -8521.23367799  4250.62783519  4356.22616074]
Reward: -1  Episode Reward:  22
xxxxx
x . x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4101.2104512  -6396.61506955 -5588.09647059  6581.08964006]
------
Step:39, Action:West
State  288
Old Q Values:  [ 4101.2104512  -6396.61506955 -5588.09647059  6581.08964006]
New Q values:  [ 4101.2104512  -6396.61506955 -5588.09647059  3938.70370424]
Reward: -1  Episode Reward:  21
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3995.40702467 -8521.23367799  4250.62783519  4356.22616074]
------
Step:40, Action:West
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  4250.62783519  4356.22616074]
New Q values:  [ 3995.40702467 -8521.23367799  4250.62783519 16423.09241093]
Reward: -1  Episode Reward:  20
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[48937.33982213 12764.58618105  3851.09595999  1875.31501677]
------
Step:41, Action:North
State  261
Old Q Values:  [5402.37649246   26.73544252  790.826661    -12.17474163]
New Q values:  [5306.01568533   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  19
xxxxx
x . x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.56962001e+03 1.04855503e+04 3.33862213e+00]
------
Step:42, Action:South
State  177
Old Q Values:  [60476.05138135 16875.97739316  9876.42421904     0.        ]
New Q values:  [60476.05138135 21430.9929039   9876.42421904     0.        ]
Reward: -1  Episode Reward:  18
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[48937.33982213 12764.58618105  3851.09595999  1875.31501677]
------
Step:43, Action:North
State  260
Old Q Values:  [ 3980.88900885 -5704.51612281  3946.47461012 -5679.36893145]
New Q values:  [ 2944.12951238 -5704.51612281  3946.47461012 -5679.36893145]
Reward: -1  Episode Reward:  17
xxxxx
xg. x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 4507.91302948  2441.21181368  3998.87164847 -4966.32149798]
------
Step:44, Action:East
State  180
Old Q Values:  [ 4507.91302948  2441.21181368  3998.87164847 -4966.32149798]
New Q values:  [ 4507.91302948  2441.21181368  6268.30788824 -4966.32149798]
Reward: -1  Episode Reward:  16
xxxxx
x . x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.49281620e+03 1.55645308e+04 2.45392999e+03]
------
Step:45, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.49281620e+03 1.55645308e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 4.49281620e+03 2.22120903e+04 2.45392999e+03]
Reward: -1  Episode Reward:  15
xxxxx
x . x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[53289.59325814  9625.03581483 -4584.50430574  6997.65951961]
------
Step:46, Action:North
State  210
Old Q Values:  [ 5076.28015197 -1218.54515104   790.72804752  6365.40765947]
New Q values:  [31738.61235652 -1218.54515104   790.72804752  6365.40765947]
Reward: -1  Episode Reward:  14
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[59164.41397681 41800.44375495  -180.00807518 99029.00098578]
------
Step:47, Action:West
State  130
Old Q Values:  [59164.41397681 41800.44375495  -180.00807518 99029.00098578]
New Q values:  [ 59164.41397681  41800.44375495   -180.00807518 120376.51628367]
Reward: 100009  Episode Reward:  100023
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4101.2104512  -6396.61506955 -5588.09647059  3938.70370424]
------
Step:1, Action:West
State  288
Old Q Values:  [ 4101.2104512  -6396.61506955 -5588.09647059  3938.70370424]
New Q values:  [ 4101.2104512  -6396.61506955 -5588.09647059 34016.98196215]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[  1637.72437281   1974.75214244  13982.61166395 108120.33493483]
------
Step:2, Action:West
State  273
Old Q Values:  [  1637.72437281   1974.75214244  13982.61166395 108120.33493483]
New Q values:  [ 1637.72437281  1974.75214244 13982.61166395 44845.33867953]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.. x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5306.01568533   26.73544252  790.826661    -12.17474163]
------
Step:3, Action:North
State  261
Old Q Values:  [5306.01568533   26.73544252  790.826661    -12.17474163]
New Q values:  [5273.47136248   26.73544252  790.826661    -12.17474163]
Reward: 9  Episode Reward:  27
xxxxx
x...x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.56962001e+03 1.04855503e+04 3.33862213e+00]
------
Step:4, Action:East
State  181
Old Q Values:  [4.04754886e+02 1.56962001e+03 1.04855503e+04 3.33862213e+00]
New Q values:  [4.04754886e+02 1.56962001e+03 4.86324720e+03 3.33862213e+00]
Reward: -9991  Episode Reward:  -9964
xxxxx
x...x
x g x
x   x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5273.47136248   26.73544252  790.826661    -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [5273.47136248   26.73544252  790.826661    -12.17474163]
New Q values:  [3573.76270576   26.73544252  790.826661    -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.04754886e+02 1.56962001e+03 4.86324720e+03 3.33862213e+00]
------
Step:2, Action:East
State  181
Old Q Values:  [4.04754886e+02 1.56962001e+03 4.86324720e+03 3.33862213e+00]
New Q values:  [4.04754886e+02 1.56962001e+03 8.71522882e+03 3.33862213e+00]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 22548.43312407 -3827.07475774   767.35890262]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831 22548.43312407 -3827.07475774   767.35890262]
New Q values:  [-5922.26708831 22478.37485349 -3827.07475774   767.35890262]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244 13982.61166395 44845.33867953]
------
Step:4, Action:West
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  4250.62783519 16423.09241093]
New Q values:  [ 3995.40702467 -8521.23367799  4250.62783519  7640.7657761 ]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3573.76270576   26.73544252  790.826661    -12.17474163]
------
Step:5, Action:North
State  261
Old Q Values:  [3573.76270576   26.73544252  790.826661    -12.17474163]
New Q values:  [ 1.95717205e+04  2.67354425e+01  7.90826661e+02 -1.21747416e+01]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039   9876.42421904     0.        ]
------
Step:6, Action:North
State  181
Old Q Values:  [4.04754886e+02 1.56962001e+03 8.71522882e+03 3.33862213e+00]
New Q values:  [4.15652936e+02 1.56962001e+03 8.71522882e+03 3.33862213e+00]
Reward: 9  Episode Reward:  34
xxxxx
xa. x
x g.x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869  827.83660548  431.25952337 -120.29354603]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869  827.83660548  431.25952337 -120.29354603]
New Q values:  [-177.44732869 4061.73717741  431.25952337 -120.29354603]
Reward: -1  Episode Reward:  33
xxxxx
x . x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2343.43228624  1373.81082981 12437.34178406  1554.80203889]
------
Step:8, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243 11182.7662635      0.        ]
New Q values:  [    0.         -5536.05678243  8309.7986232      0.        ]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  1.27909737e+04  3.23843190e+03  2.17939995e+03]
------
Step:9, Action:South
State  192
Old Q Values:  [3.89777037e-01 4.49281620e+03 2.22120903e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 2.57769489e+03 2.22120903e+04 2.45392999e+03]
Reward: -1  Episode Reward:  31
xxxxx
x . x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 2603.89470219 -5807.06396197  2305.08372417  2150.35590639]
------
Step:10, Action:North
State  276
Old Q Values:  [ 2603.89470219 -5807.06396197  2305.08372417  2150.35590639]
New Q values:  [ 7704.58496565 -5807.06396197  2305.08372417  2150.35590639]
Reward: -1  Episode Reward:  30
xxxxx
xg. x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.57769489e+03 2.22120903e+04 2.45392999e+03]
------
Step:11, Action:East
State  204
Old Q Values:  [   0.         2248.42706983 2445.50931193  441.58769553]
New Q values:  [   0.         2248.42706983 4786.5451214   441.58769553]
Reward: 9  Episode Reward:  39
xxxxx
x g x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1274.27702535  2843.66782628   606.149024   12676.4713221 ]
------
Step:12, Action:West
State  208
Old Q Values:  [53289.59325814  9625.03581483 -4584.50430574  6997.65951961]
New Q values:  [53289.59325814  9625.03581483 -4584.50430574  9541.97626389]
Reward: -1  Episode Reward:  38
xxxxx
x .gx
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 22478.37485349 -3827.07475774   767.35890262]
------
Step:13, Action:South
State  204
Old Q Values:  [   0.         2248.42706983 4786.5451214   441.58769553]
New Q values:  [   0.         3210.14631763 4786.5451214   441.58769553]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7704.58496565 -5807.06396197  2305.08372417  2150.35590639]
------
Step:14, Action:North
State  272
Old Q Values:  [ 3995.40702467 -8521.23367799  4250.62783519  7640.7657761 ]
New Q values:  [ 8261.18989464 -8521.23367799  4250.62783519  7640.7657761 ]
Reward: -1  Episode Reward:  36
xxxxx
xg. x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.57769489e+03 2.22120903e+04 2.45392999e+03]
------
Step:15, Action:East
State  204
Old Q Values:  [   0.         3210.14631763 4786.5451214   441.58769553]
New Q values:  [   0.         3210.14631763 5716.95944519  441.58769553]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1274.27702535  2843.66782628   606.149024   12676.4713221 ]
------
Step:16, Action:West
State  208
Old Q Values:  [53289.59325814  9625.03581483 -4584.50430574  9541.97626389]
New Q values:  [53289.59325814  9625.03581483 -4584.50430574 10479.81759033]
Reward: -1  Episode Reward:  34
xxxxx
xg. x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.57769489e+03 2.22120903e+04 2.45392999e+03]
------
Step:17, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.57769489e+03 2.22120903e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 2.57769489e+03 2.48711141e+04 2.45392999e+03]
Reward: -1  Episode Reward:  33
xxxxx
x . x
xg ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[53289.59325814  9625.03581483 -4584.50430574 10479.81759033]
------
Step:18, Action:North
State  208
Old Q Values:  [53289.59325814  9625.03581483 -4584.50430574 10479.81759033]
New Q values:  [57428.19218836  9625.03581483 -4584.50430574 10479.81759033]
Reward: -1  Episode Reward:  32
xxxxx
x .ax
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 59164.41397681  41800.44375495   -180.00807518 120376.51628367]
------
Step:19, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  2.39459071e+03 -3.22965309e-01  4.03761988e+02]
New Q values:  [ 2.12301837e+02  2.39459071e+03 -3.22965309e-01  4.37254102e+02]
Reward: 9  Episode Reward:  41
xxxxx
x a x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         331.64678262 901.16435612 350.00244198]
------
Step:20, Action:East
State  126
Old Q Values:  [  0.         331.64678262 901.16435612 350.00244198]
New Q values:  [   0.          331.64678262 1078.2429548   350.00244198]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  2.39459071e+03 -3.22965309e-01  4.37254102e+02]
------
Step:21, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  2.39459071e+03 -3.22965309e-01  4.37254102e+02]
New Q values:  [ 2.12301837e+02  1.78382652e+03 -3.22965309e-01  4.37254102e+02]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 999.34923028 2755.30078316    0.          930.00701399]
------
Step:22, Action:South
State  210
Old Q Values:  [31738.61235652 -1218.54515104   790.72804752  6365.40765947]
New Q values:  [31738.61235652 69723.07652823   790.72804752  6365.40765947]
Reward: 100009  Episode Reward:  100048
xxxxx
x   x
x   x
xg ax
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4101.2104512  -6396.61506955 -5588.09647059 34016.98196215]
------
Step:1, Action:West
State  288
Old Q Values:  [ 4101.2104512  -6396.61506955 -5588.09647059 34016.98196215]
New Q values:  [ 4101.2104512  -6396.61506955 -5588.09647059 16090.54975325]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 8261.18989464 -8521.23367799  4250.62783519  7640.7657761 ]
------
Step:2, Action:North
State  272
Old Q Values:  [ 8261.18989464 -8521.23367799  4250.62783519  7640.7657761 ]
New Q values:  [10771.210185   -8521.23367799  4250.62783519  7640.7657761 ]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.a.x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.57769489e+03 2.48711141e+04 2.45392999e+03]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 2.57769489e+03 2.48711141e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 2.57769489e+03 2.71823033e+04 2.45392999e+03]
Reward: 9  Episode Reward:  27
xxxxx
xg. x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[57428.19218836  9625.03581483 -4584.50430574 10479.81759033]
------
Step:4, Action:North
State  208
Old Q Values:  [57428.19218836  9625.03581483 -4584.50430574 10479.81759033]
New Q values:  [24163.51393143  9625.03581483 -4584.50430574 10479.81759033]
Reward: -1  Episode Reward:  26
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  3976.12352029   660.86649319   593.06453581]
------
Step:5, Action:South
State  130
Old Q Values:  [ 59164.41397681  41800.44375495   -180.00807518 120376.51628367]
New Q values:  [ 59164.41397681  23968.63168141   -180.00807518 120376.51628367]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x.gax
x.  x
xxxxx
Step:6, Action:West
State  210
Old Q Values:  [31738.61235652 69723.07652823   790.72804752  6365.40765947]
New Q values:  [31738.61235652 69723.07652823   790.72804752 17462.48602225]
Reward: -10001  Episode Reward:  -9976
xxxxx
x.. x
x. ax
x.g x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1.95717205e+04  2.67354425e+01  7.90826661e+02 -1.21747416e+01]
------
Step:1, Action:North
State  261
Old Q Values:  [ 1.95717205e+04  2.67354425e+01  7.90826661e+02 -1.21747416e+01]
New Q values:  [10448.65684416    26.73544252   790.826661     -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 1.56962001e+03 8.71522882e+03 3.33862213e+00]
------
Step:2, Action:East
State  183
Old Q Values:  [ 2343.43228624  1373.81082981 12437.34178406  1554.80203889]
New Q values:  [2343.43228624 1373.81082981 7099.3933836  1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 5779.07673195 7063.52223325 1169.39963074]
------
Step:3, Action:East
State  193
Old Q Values:  [-5922.26708831 22478.37485349 -3827.07475774   767.35890262]
New Q values:  [-5922.26708831 22478.37485349  -276.37572367   767.35890262]
Reward: -9991  Episode Reward:  -9973
xxxxx
x...x
x  gx
x . x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 5779.07673195 7063.52223325 1169.39963074]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01  1.27909737e+04  3.23843190e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  1.27909737e+04  2.22176957e+04  2.17939995e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[31738.61235652 69723.07652823   790.72804752 17462.48602225]
------
Step:2, Action:South
State  208
Old Q Values:  [24163.51393143  9625.03581483 -4584.50430574 10479.81759033]
New Q values:  [24163.51393143  8676.57925191 -4584.50430574 10479.81759033]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4101.2104512  -6396.61506955 -5588.09647059 16090.54975325]
------
Step:3, Action:West
State  288
Old Q Values:  [ 4101.2104512  -6396.61506955 -5588.09647059 16090.54975325]
New Q values:  [ 4101.2104512  -6396.61506955 -5588.09647059  9672.9829568 ]
Reward: 9  Episode Reward:  17
xxxxx
x...x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10771.210185   -8521.23367799  4250.62783519  7640.7657761 ]
------
Step:4, Action:North
State  272
Old Q Values:  [10771.210185   -8521.23367799  4250.62783519  7640.7657761 ]
New Q values:  [ 6462.57506181 -8521.23367799  4250.62783519  7640.7657761 ]
Reward: -10001  Episode Reward:  -9984
xxxxx
x...x
x.g x
x.  x
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   273.64612079]
------
Step:1, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   385.23769261   685.71535638]
New Q values:  [ -253.44886264 -1902.20915811   385.23769261  1498.20729577]
Reward: 9  Episode Reward:  9
xxxxx
xa  x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4061.73717741  431.25952337 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 4061.73717741  431.25952337 -120.29354603]
New Q values:  [-177.44732869 2097.37719629  431.25952337 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1557.60775109 1255.17938902  154.04646645]
------
Step:3, Action:South
State  181
Old Q Values:  [4.15652936e+02 1.56962001e+03 8.71522882e+03 3.33862213e+00]
New Q values:  [4.15652936e+02 3.76784506e+03 8.71522882e+03 3.33862213e+00]
Reward: 9  Episode Reward:  27
xxxxx
x g x
x ..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[10448.65684416    26.73544252   790.826661     -12.17474163]
------
Step:4, Action:North
State  261
Old Q Values:  [10448.65684416    26.73544252   790.826661     -12.17474163]
New Q values:  [6793.43138314   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  26
xxxxx
x  gx
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 3.76784506e+03 8.71522882e+03 3.33862213e+00]
------
Step:5, Action:East
State  189
Old Q Values:  [ 337.36081627 1557.60775109 1255.17938902  154.04646645]
New Q values:  [ 337.36081627 1557.60775109 5340.3299961   154.04646645]
Reward: 9  Episode Reward:  35
xxxxx
x g x
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[-2469.90645144  1465.42682022 16109.52746831   231.67262594]
------
Step:6, Action:East
State  200
Old Q Values:  [ 169.9257398  3934.68362957 3943.07398053  610.93635926]
New Q values:  [ 169.9257398  3934.68362957 5385.57098884  610.93635926]
Reward: 9  Episode Reward:  44
xxxxx
xg  x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1274.27702535  2843.66782628   606.149024   12676.4713221 ]
------
Step:7, Action:West
State  216
Old Q Values:  [ 1274.27702535  2843.66782628   606.149024   12676.4713221 ]
New Q values:  [1274.27702535 2843.66782628  606.149024   6685.65982549]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  3934.68362957 5385.57098884  610.93635926]
------
Step:8, Action:East
State  200
Old Q Values:  [ 169.9257398  3934.68362957 5385.57098884  610.93635926]
New Q values:  [ 169.9257398  3934.68362957 4159.32634319  610.93635926]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1274.27702535 2843.66782628  606.149024   6685.65982549]
------
Step:9, Action:West
State  216
Old Q Values:  [1274.27702535 2843.66782628  606.149024   6685.65982549]
New Q values:  [1274.27702535 2843.66782628  606.149024   3921.46183315]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  3934.68362957 4159.32634319  610.93635926]
------
Step:10, Action:East
State  200
Old Q Values:  [ 169.9257398  3934.68362957 4159.32634319  610.93635926]
New Q values:  [ 169.9257398  3934.68362957 2839.56908722  610.93635926]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1274.27702535 2843.66782628  606.149024   3921.46183315]
------
Step:11, Action:West
State  216
Old Q Values:  [1274.27702535 2843.66782628  606.149024   3921.46183315]
New Q values:  [1274.27702535 2843.66782628  606.149024   2748.38982213]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  3934.68362957 2839.56908722  610.93635926]
------
Step:12, Action:South
State  204
Old Q Values:  [   0.         3210.14631763 5716.95944519  441.58769553]
New Q values:  [   0.         3600.83401675 5716.95944519  441.58769553]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7704.58496565 -5807.06396197  2305.08372417  2150.35590639]
------
Step:13, Action:East
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799  4250.62783519  7640.7657761 ]
New Q values:  [ 6462.57506181 -8521.23367799 64607.54602112  7640.7657761 ]
Reward: 100009  Episode Reward:  100057
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x.a.x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   745.67431529   632.53805907]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   745.67431529   632.53805907]
New Q values:  [ -281.736      -1150.91067548   838.81768154   632.53805907]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  1.78382652e+03 -3.22965309e-01  4.37254102e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  1.78382652e+03 -3.22965309e-01  4.37254102e+02]
New Q values:  [ 2.12301837e+02  7.96798479e+03 -3.22965309e-01  4.37254102e+02]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
xg.ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24163.51393143  8676.57925191 -4584.50430574 10479.81759033]
------
Step:3, Action:North
State  210
Old Q Values:  [31738.61235652 69723.07652823   790.72804752 17462.48602225]
New Q values:  [15085.24037861 69723.07652823   790.72804752 17462.48602225]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  7.96798479e+03 -3.22965309e-01  4.37254102e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  7.96798479e+03 -3.22965309e-01  4.37254102e+02]
New Q values:  [ 2.12301837e+02  2.41035169e+04 -3.22965309e-01  4.37254102e+02]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[15085.24037861 69723.07652823   790.72804752 17462.48602225]
------
Step:5, Action:South
State  208
Old Q Values:  [24163.51393143  8676.57925191 -4584.50430574 10479.81759033]
New Q values:  [24163.51393143  6377.9265878  -4584.50430574 10479.81759033]
Reward: 9  Episode Reward:  25
xxxxx
x.  x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4101.2104512  -6396.61506955 -5588.09647059  9672.9829568 ]
------
Step:6, Action:West
State  288
Old Q Values:  [ 4101.2104512  -6396.61506955 -5588.09647059  9672.9829568 ]
New Q values:  [ 4101.2104512  -6396.61506955 -5588.09647059 17250.85698906]
Reward: -10001  Episode Reward:  -9976
xxxxx
x.  x
x.. x
x.g x
xxxxx
xxxxx
x...x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 2944.12951238 -5704.51612281  3946.47461012 -5679.36893145]
------
Step:1, Action:East
State  260
Old Q Values:  [ 2944.12951238 -5704.51612281  3946.47461012 -5679.36893145]
New Q values:  [ 2944.12951238 -5704.51612281 20966.25365038 -5679.36893145]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x ..x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799 64607.54602112  7640.7657761 ]
------
Step:2, Action:East
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799 64607.54602112  7640.7657761 ]
New Q values:  [ 6462.57506181 -8521.23367799 31023.67550516  7640.7657761 ]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x ..x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4101.2104512  -6396.61506955 -5588.09647059 17250.85698906]
------
Step:3, Action:North
State  288
Old Q Values:  [ 4101.2104512  -6396.61506955 -5588.09647059 17250.85698906]
New Q values:  [ 8894.93835991 -6396.61506955 -5588.09647059 17250.85698906]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24163.51393143  6377.9265878  -4584.50430574 10479.81759033]
------
Step:4, Action:North
State  208
Old Q Values:  [24163.51393143  6377.9265878  -4584.50430574 10479.81759033]
New Q values:  [46167.46137757  6377.9265878  -4584.50430574 10479.81759033]
Reward: 9  Episode Reward:  36
xxxxx
x.gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 43162.73913773  15794.84178348  -8652.84       121655.51934999]
------
Step:5, Action:North
State  128
Old Q Values:  [ 43162.73913773  15794.84178348  -8652.84       121655.51934999]
New Q values:  [ 47581.15146009  15794.84178348  -8652.84       121655.51934999]
Reward: -10301  Episode Reward:  -10265
xxxxx
x..gx
x . x
x   x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[46167.46137757  6377.9265878  -4584.50430574 10479.81759033]
------
Step:1, Action:North
State  216
Old Q Values:  [1274.27702535 2843.66782628  606.149024   2748.38982213]
New Q values:  [1707.94786623 2843.66782628  606.149024   2748.38982213]
Reward: 9  Episode Reward:  9
xxxxx
xg ax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  3976.12352029   660.86649319   593.06453581]
------
Step:2, Action:South
State  136
Old Q Values:  [-2129.37064562  3976.12352029   660.86649319   593.06453581]
New Q values:  [-2129.37064562 15440.08782139   660.86649319   593.06453581]
Reward: -1  Episode Reward:  8
xxxxx
x.g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[46167.46137757  6377.9265878  -4584.50430574 10479.81759033]
------
Step:3, Action:North
State  216
Old Q Values:  [1707.94786623 2843.66782628  606.149024   2748.38982213]
New Q values:  [7913.63420843 2843.66782628  606.149024   2748.38982213]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  2.41035169e+04 -3.22965309e-01  4.37254102e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  2.41035169e+04 -3.22965309e-01  4.37254102e+02]
New Q values:  [ 2.12301837e+02  1.74910452e+04 -3.22965309e-01  4.37254102e+02]
Reward: -10001  Episode Reward:  -9994
xxxxx
x.  x
x..gx
x...x
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8894.93835991 -6396.61506955 -5588.09647059 17250.85698906]
------
Step:1, Action:West
State  288
Old Q Values:  [ 8894.93835991 -6396.61506955 -5588.09647059 17250.85698906]
New Q values:  [ 8894.93835991 -6396.61506955 -5588.09647059 16212.84544717]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799 31023.67550516  7640.7657761 ]
------
Step:2, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 13982.61166395 44845.33867953]
New Q values:  [ 1637.72437281  1974.75214244 10456.29829973 44845.33867953]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8894.93835991 -6396.61506955 -5588.09647059 16212.84544717]
------
Step:3, Action:West
State  288
Old Q Values:  [ 8894.93835991 -6396.61506955 -5588.09647059 16212.84544717]
New Q values:  [ 8894.93835991 -6396.61506955 -5588.09647059 19938.13978273]
Reward: -1  Episode Reward:  7
xxxxx
x..gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244 10456.29829973 44845.33867953]
------
Step:4, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 10456.29829973 44845.33867953]
New Q values:  [ 1637.72437281  1974.75214244 10456.29829973 19981.56488675]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6793.43138314   26.73544252  790.826661    -12.17474163]
------
Step:5, Action:North
State  261
Old Q Values:  [6793.43138314   26.73544252  790.826661    -12.17474163]
New Q values:  [4852.59056833   26.73544252  790.826661    -12.17474163]
Reward: 9  Episode Reward:  25
xxxxx
x...x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2343.43228624 1373.81082981 7099.3933836  1554.80203889]
------
Step:6, Action:East
State  181
Old Q Values:  [4.15652936e+02 3.76784506e+03 8.71522882e+03 3.33862213e+00]
New Q values:  [4.15652936e+02 3.76784506e+03 1.02350040e+04 3.33862213e+00]
Reward: 9  Episode Reward:  34
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 22478.37485349  -276.37572367   767.35890262]
------
Step:7, Action:South
State  195
Old Q Values:  [  38.85388605 5779.07673195 7063.52223325 1169.39963074]
New Q values:  [  38.85388605 8305.50015881 7063.52223325 1169.39963074]
Reward: -1  Episode Reward:  33
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244 10456.29829973 19981.56488675]
------
Step:8, Action:West
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799 31023.67550516  7640.7657761 ]
New Q values:  [ 6462.57506181 -8521.23367799 31023.67550516  4511.48348094]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4852.59056833   26.73544252  790.826661    -12.17474163]
------
Step:9, Action:North
State  261
Old Q Values:  [4852.59056833   26.73544252  790.826661    -12.17474163]
New Q values:  [5010.93742234   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  31
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 3.76784506e+03 1.02350040e+04 3.33862213e+00]
------
Step:10, Action:South
State  181
Old Q Values:  [4.15652936e+02 3.76784506e+03 1.02350040e+04 3.33862213e+00]
New Q values:  [4.15652936e+02 3.00981925e+03 1.02350040e+04 3.33862213e+00]
Reward: -1  Episode Reward:  30
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5010.93742234   26.73544252  790.826661    -12.17474163]
------
Step:11, Action:North
State  261
Old Q Values:  [5010.93742234   26.73544252  790.826661    -12.17474163]
New Q values:  [5074.27616394   26.73544252  790.826661    -12.17474163]
Reward: -1  Episode Reward:  29
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 3.00981925e+03 1.02350040e+04 3.33862213e+00]
------
Step:12, Action:South
State  180
Old Q Values:  [ 4507.91302948  2441.21181368  6268.30788824 -4966.32149798]
New Q values:  [ 4507.91302948  7265.76082059  6268.30788824 -4966.32149798]
Reward: -1  Episode Reward:  28
xxxxx
x...x
xg  x
xa  x
xxxxx
Step:13, Action:South
State  261
Old Q Values:  [5074.27616394   26.73544252  790.826661    -12.17474163]
New Q values:  [5074.27616394 1352.37702619  790.826661    -12.17474163]
Reward: -301  Episode Reward:  -273
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5074.27616394 1352.37702619  790.826661    -12.17474163]
------
Step:14, Action:North
State  261
Old Q Values:  [5074.27616394 1352.37702619  790.826661    -12.17474163]
New Q values:  [5099.61166058 1352.37702619  790.826661    -12.17474163]
Reward: -1  Episode Reward:  -274
xxxxx
x...x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 3.00981925e+03 1.02350040e+04 3.33862213e+00]
------
Step:15, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039   9876.42421904     0.        ]
New Q values:  [60476.05138135 21430.9929039  10693.48214366     0.        ]
Reward: -1  Episode Reward:  -275
xxxxx
x..gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 22478.37485349  -276.37572367   767.35890262]
------
Step:16, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.57769489e+03 2.71823033e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.03375806e+04 2.71823033e+04 2.45392999e+03]
Reward: -1  Episode Reward:  -276
xxxxx
x.g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799 31023.67550516  4511.48348094]
------
Step:17, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 10456.29829973 19981.56488675]
New Q values:  [ 1637.72437281  1974.75214244 10163.36125471 19981.56488675]
Reward: -1  Episode Reward:  -277
xxxxx
x..gx
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8894.93835991 -6396.61506955 -5588.09647059 19938.13978273]
------
Step:18, Action:West
State  288
Old Q Values:  [ 8894.93835991 -6396.61506955 -5588.09647059 19938.13978273]
New Q values:  [ 8894.93835991 -6396.61506955 -5588.09647059 17281.75856464]
Reward: -1  Episode Reward:  -278
xxxxx
x.g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799 31023.67550516  4511.48348094]
------
Step:19, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 10163.36125471 19981.56488675]
New Q values:  [ 1637.72437281  1974.75214244  9249.27207128 19981.56488675]
Reward: -1  Episode Reward:  -279
xxxxx
x..gx
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8894.93835991 -6396.61506955 -5588.09647059 17281.75856464]
------
Step:20, Action:West
State  288
Old Q Values:  [ 8894.93835991 -6396.61506955 -5588.09647059 17281.75856464]
New Q values:  [ 8894.93835991 -6396.61506955 -5588.09647059 12906.57289188]
Reward: -1  Episode Reward:  -280
xxxxx
x...x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  9249.27207128 19981.56488675]
------
Step:21, Action:West
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799 31023.67550516  4511.48348094]
New Q values:  [ 6462.57506181 -8521.23367799 31023.67550516  3333.87689055]
Reward: -1  Episode Reward:  -281
xxxxx
x...x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5099.61166058 1352.37702619  790.826661    -12.17474163]
------
Step:22, Action:North
State  261
Old Q Values:  [5099.61166058 1352.37702619  790.826661    -12.17474163]
New Q values:  [4169.06267931 1352.37702619  790.826661    -12.17474163]
Reward: -1  Episode Reward:  -282
xxxxx
x...x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2343.43228624 1373.81082981 7099.3933836  1554.80203889]
------
Step:23, Action:East
State  183
Old Q Values:  [2343.43228624 1373.81082981 7099.3933836  1554.80203889]
New Q values:  [2343.43228624 1373.81082981 5330.80740108 1554.80203889]
Reward: -1  Episode Reward:  -283
xxxxx
x...x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 8305.50015881 7063.52223325 1169.39963074]
------
Step:24, Action:South
State  193
Old Q Values:  [-5922.26708831 22478.37485349  -276.37572367   767.35890262]
New Q values:  [-5922.26708831 14985.21940742  -276.37572367   767.35890262]
Reward: -1  Episode Reward:  -284
xxxxx
x...x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  9249.27207128 19981.56488675]
------
Step:25, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  9249.27207128 19981.56488675]
New Q values:  [1637.72437281 1974.75214244 9249.27207128 9242.7447585 ]
Reward: -1  Episode Reward:  -285
xxxxx
x...x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4169.06267931 1352.37702619  790.826661    -12.17474163]
------
Step:26, Action:North
State  261
Old Q Values:  [4169.06267931 1352.37702619  790.826661    -12.17474163]
New Q values:  [3266.26729205 1352.37702619  790.826661    -12.17474163]
Reward: -1  Episode Reward:  -286
xxxxx
x...x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2343.43228624 1373.81082981 5330.80740108 1554.80203889]
------
Step:27, Action:East
State  183
Old Q Values:  [2343.43228624 1373.81082981 5330.80740108 1554.80203889]
New Q values:  [2343.43228624 1373.81082981 4623.37300807 1554.80203889]
Reward: -1  Episode Reward:  -287
xxxxx
x...x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 8305.50015881 7063.52223325 1169.39963074]
------
Step:28, Action:South
State  193
Old Q Values:  [-5922.26708831 14985.21940742  -276.37572367   767.35890262]
New Q values:  [-5922.26708831  8768.26938435  -276.37572367   767.35890262]
Reward: -1  Episode Reward:  -288
xxxxx
x...x
x  gx
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 9249.27207128 9242.7447585 ]
------
Step:29, Action:East
State  273
Old Q Values:  [1637.72437281 1974.75214244 9249.27207128 9242.7447585 ]
New Q values:  [1637.72437281 1974.75214244 1571.08069608 9242.7447585 ]
Reward: -10001  Episode Reward:  -10289
xxxxx
x...x
x   x
x  gx
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8894.93835991 -6396.61506955 -5588.09647059 12906.57289188]
------
Step:1, Action:West
State  288
Old Q Values:  [ 8894.93835991 -6396.61506955 -5588.09647059 12906.57289188]
New Q values:  [ 8894.93835991 -6396.61506955 -5588.09647059  7940.8525843 ]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 1571.08069608 9242.7447585 ]
------
Step:2, Action:West
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799 31023.67550516  3333.87689055]
New Q values:  [ 6462.57506181 -8521.23367799 31023.67550516 16020.15270286]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[48937.33982213 12764.58618105  3851.09595999  1875.31501677]
------
Step:3, Action:North
State  260
Old Q Values:  [ 2944.12951238 -5704.51612281 20966.25365038 -5679.36893145]
New Q values:  [ 3362.78005113 -5704.51612281 20966.25365038 -5679.36893145]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 4507.91302948  7265.76082059  6268.30788824 -4966.32149798]
------
Step:4, Action:South
State  180
Old Q Values:  [ 4507.91302948  7265.76082059  6268.30788824 -4966.32149798]
New Q values:  [ 4507.91302948  9195.58042335  6268.30788824 -4966.32149798]
Reward: -1  Episode Reward:  26
xxxxx
x...x
xg. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3362.78005113 -5704.51612281 20966.25365038 -5679.36893145]
------
Step:5, Action:East
State  260
Old Q Values:  [ 3362.78005113 -5704.51612281 20966.25365038 -5679.36893145]
New Q values:  [ 3362.78005113 -5704.51612281 17693.0041117  -5679.36893145]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x . x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799 31023.67550516 16020.15270286]
------
Step:6, Action:East
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799 31023.67550516 16020.15270286]
New Q values:  [ 6462.57506181 -8521.23367799 15077.35171004 16020.15270286]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xg. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8894.93835991 -6396.61506955 -5588.09647059  7940.8525843 ]
------
Step:7, Action:North
State  288
Old Q Values:  [ 8894.93835991 -6396.61506955 -5588.09647059  7940.8525843 ]
New Q values:  [17407.61375723 -6396.61506955 -5588.09647059  7940.8525843 ]
Reward: -1  Episode Reward:  23
xxxxx
xg..x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[46167.46137757  6377.9265878  -4584.50430574 10479.81759033]
------
Step:8, Action:North
State  208
Old Q Values:  [46167.46137757  6377.9265878  -4584.50430574 10479.81759033]
New Q values:  [54585.33943613  6377.9265878  -4584.50430574 10479.81759033]
Reward: 9  Episode Reward:  32
xxxxx
x..ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 59164.41397681  23968.63168141   -180.00807518 120376.51628367]
------
Step:9, Action:West
State  130
Old Q Values:  [ 59164.41397681  23968.63168141   -180.00807518 120376.51628367]
New Q values:  [59164.41397681 23968.63168141  -180.00807518 68915.52240283]
Reward: 9  Episode Reward:  41
xxxxx
x.a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  32608.63878402 69198.38629787]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   838.81768154   632.53805907]
New Q values:  [ -281.736      -1150.91067548   838.81768154   887.62838252]
Reward: 9  Episode Reward:  50
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2097.37719629  431.25952337 -120.29354603]
------
Step:11, Action:South
State  111
Old Q Values:  [-177.44732869 2097.37719629  431.25952337 -120.29354603]
New Q values:  [-177.44732869 2225.36278094  431.25952337 -120.29354603]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2343.43228624 1373.81082981 4623.37300807 1554.80203889]
------
Step:12, Action:East
State  179
Old Q Values:  [82228.67666629 16101.90751562 48004.41527268     0.        ]
New Q values:  [82228.67666629 16101.90751562 85872.47482516     0.        ]
Reward: 100009  Episode Reward:  100058
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  1.74910452e+04 -3.22965309e-01  4.37254102e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  1.74910452e+04 -3.22965309e-01  4.37254102e+02]
New Q values:  [ 2.12301837e+02  2.79187410e+04 -3.22965309e-01  4.37254102e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[15085.24037861 69723.07652823   790.72804752 17462.48602225]
------
Step:2, Action:South
State  208
Old Q Values:  [54585.33943613  6377.9265878  -4584.50430574 10479.81759033]
New Q values:  [54585.33943613  7772.85476229 -4584.50430574 10479.81759033]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17407.61375723 -6396.61506955 -5588.09647059  7940.8525843 ]
------
Step:3, Action:North
State  288
Old Q Values:  [17407.61375723 -6396.61506955 -5588.09647059  7940.8525843 ]
New Q values:  [23338.04733373 -6396.61506955 -5588.09647059  7940.8525843 ]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[54585.33943613  7772.85476229 -4584.50430574 10479.81759033]
------
Step:4, Action:North
State  208
Old Q Values:  [54585.33943613  7772.85476229 -4584.50430574 10479.81759033]
New Q values:  [26465.56212087  7772.85476229 -4584.50430574 10479.81759033]
Reward: -1  Episode Reward:  6
xxxxx
xg.ax
x.. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562 15440.08782139   660.86649319   593.06453581]
------
Step:5, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  2.79187410e+04 -3.22965309e-01  4.37254102e+02]
New Q values:  [ 2.12301837e+02  1.91065650e+04 -3.22965309e-01  4.37254102e+02]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26465.56212087  7772.85476229 -4584.50430574 10479.81759033]
------
Step:6, Action:North
State  210
Old Q Values:  [15085.24037861 69723.07652823   790.72804752 17462.48602225]
New Q values:  [11765.46566514 69723.07652823   790.72804752 17462.48602225]
Reward: -1  Episode Reward:  4
xxxxx
x..ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  1.91065650e+04 -3.22965309e-01  4.37254102e+02]
------
Step:7, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  1.91065650e+04 -3.22965309e-01  4.37254102e+02]
New Q values:  [ 2.12301837e+02  2.85589490e+04 -3.22965309e-01  4.37254102e+02]
Reward: -1  Episode Reward:  3
xxxxx
x.. x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11765.46566514 69723.07652823   790.72804752 17462.48602225]
------
Step:8, Action:South
State  210
Old Q Values:  [11765.46566514 69723.07652823   790.72804752 17462.48602225]
New Q values:  [11765.46566514 28890.04481141   790.72804752 17462.48602225]
Reward: -10001  Episode Reward:  -9998
xxxxx
x.. x
x.. x
x..gx
xxxxx
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[23338.04733373 -6396.61506955 -5588.09647059  7940.8525843 ]
------
Step:1, Action:West
State  288
Old Q Values:  [23338.04733373 -6396.61506955 -5588.09647059  7940.8525843 ]
New Q values:  [23338.04733373 -6396.61506955 -5588.09647059  5954.56446127]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 1571.08069608 9242.7447585 ]
------
Step:2, Action:West
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799 15077.35171004 16020.15270286]
New Q values:  [ 6462.57506181 -8521.23367799 15077.35171004 21094.66302778]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[48937.33982213 12764.58618105  3851.09595999  1875.31501677]
------
Step:3, Action:North
State  260
Old Q Values:  [ 3362.78005113 -5704.51612281 17693.0041117  -5679.36893145]
New Q values:  [ 4109.18614746 -5704.51612281 17693.0041117  -5679.36893145]
Reward: 9  Episode Reward:  27
xxxxx
xg..x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 4507.91302948  9195.58042335  6268.30788824 -4966.32149798]
------
Step:4, Action:South
State  181
Old Q Values:  [4.15652936e+02 3.00981925e+03 1.02350040e+04 3.33862213e+00]
New Q values:  [4.15652936e+02 2.18320789e+03 1.02350040e+04 3.33862213e+00]
Reward: -1  Episode Reward:  26
xxxxx
x.g.x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3266.26729205 1352.37702619  790.826661    -12.17474163]
------
Step:5, Action:North
State  261
Old Q Values:  [3266.26729205 1352.37702619  790.826661    -12.17474163]
New Q values:  [4376.40811182 1352.37702619  790.826661    -12.17474163]
Reward: -1  Episode Reward:  25
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.18320789e+03 1.02350040e+04 3.33862213e+00]
------
Step:6, Action:South
State  183
Old Q Values:  [2343.43228624 1373.81082981 4623.37300807 1554.80203889]
New Q values:  [2343.43228624 1861.84676547 4623.37300807 1554.80203889]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x . x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4376.40811182 1352.37702619  790.826661    -12.17474163]
------
Step:7, Action:North
State  261
Old Q Values:  [4376.40811182 1352.37702619  790.826661    -12.17474163]
New Q values:  [3136.97514715 1352.37702619  790.826661    -12.17474163]
Reward: -1  Episode Reward:  23
xxxxx
x...x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2343.43228624 1861.84676547 4623.37300807 1554.80203889]
------
Step:8, Action:East
State  181
Old Q Values:  [4.15652936e+02 2.18320789e+03 1.02350040e+04 3.33862213e+00]
New Q values:  [4.15652936e+02 2.18320789e+03 6.72988241e+03 3.33862213e+00]
Reward: 9  Episode Reward:  32
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8768.26938435  -276.37572367   767.35890262]
------
Step:9, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.03375806e+04 2.71823033e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.04628312e+04 2.71823033e+04 2.45392999e+03]
Reward: -1  Episode Reward:  31
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799 15077.35171004 21094.66302778]
------
Step:10, Action:West
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799 15077.35171004 21094.66302778]
New Q values:  [ 6462.57506181 -8521.23367799 15077.35171004 13745.16644462]
Reward: -1  Episode Reward:  30
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4109.18614746 -5704.51612281 17693.0041117  -5679.36893145]
------
Step:11, Action:East
State  261
Old Q Values:  [3136.97514715 1352.37702619  790.826661    -12.17474163]
New Q values:  [3136.97514715 1352.37702619 4838.93617741  -12.17474163]
Reward: -1  Episode Reward:  29
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799 15077.35171004 13745.16644462]
------
Step:12, Action:East
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799 15077.35171004 13745.16644462]
New Q values:  [ 6462.57506181 -8521.23367799 13031.75488414 13745.16644462]
Reward: -1  Episode Reward:  28
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[23338.04733373 -6396.61506955 -5588.09647059  5954.56446127]
------
Step:13, Action:North
State  288
Old Q Values:  [23338.04733373 -6396.61506955 -5588.09647059  5954.56446127]
New Q values:  [18001.63237692 -6396.61506955 -5588.09647059  5954.56446127]
Reward: -1  Episode Reward:  27
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11765.46566514 28890.04481141   790.72804752 17462.48602225]
------
Step:14, Action:South
State  210
Old Q Values:  [11765.46566514 28890.04481141   790.72804752 17462.48602225]
New Q values:  [11765.46566514 16955.90763764   790.72804752 17462.48602225]
Reward: -1  Episode Reward:  26
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18001.63237692 -6396.61506955 -5588.09647059  5954.56446127]
------
Step:15, Action:North
State  288
Old Q Values:  [18001.63237692 -6396.61506955 -5588.09647059  5954.56446127]
New Q values:  [15139.72158703 -6396.61506955 -5588.09647059  5954.56446127]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26465.56212087  7772.85476229 -4584.50430574 10479.81759033]
------
Step:16, Action:North
State  208
Old Q Values:  [26465.56212087  7772.85476229 -4584.50430574 10479.81759033]
New Q values:  [31266.2815692   7772.85476229 -4584.50430574 10479.81759033]
Reward: 9  Episode Reward:  34
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[59164.41397681 23968.63168141  -180.00807518 68915.52240283]
------
Step:17, Action:West
State  130
Old Q Values:  [59164.41397681 23968.63168141  -180.00807518 68915.52240283]
New Q values:  [59164.41397681 23968.63168141  -180.00807518 66979.82347515]
Reward: 9  Episode Reward:  43
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 131360.71504674]
------
Step:18, Action:West
State  115
Old Q Values:  [  -180.6          1117.48597573  18639.04650494 131360.71504674]
New Q values:  [  -180.6          1117.48597573  18639.04650494 130233.23748005]
Reward: 100009  Episode Reward:  100052
xxxxx
xa  x
x  gx
x   x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3136.97514715 1352.37702619 4838.93617741  -12.17474163]
------
Step:1, Action:East
State  261
Old Q Values:  [3136.97514715 1352.37702619 4838.93617741  -12.17474163]
New Q values:  [3136.97514715 1352.37702619   64.52440435  -12.17474163]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. .x
x g.x
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
Step:1, Action:South
State  288
Old Q Values:  [15139.72158703 -6396.61506955 -5588.09647059  5954.56446127]
New Q values:  [15139.72158703  1802.67044829 -5588.09647059  5954.56446127]
Reward: -301  Episode Reward:  -301
xxxxx
x...x
xg..x
x .ax
xxxxx
Step:2, Action:North
State  288
Old Q Values:  [15139.72158703  1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [11300.03444149  1802.67044829 -5588.09647059  5954.56446127]
Reward: 9  Episode Reward:  -292
xxxxx
x...x
x..ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[11765.46566514 16955.90763764   790.72804752 17462.48602225]
------
Step:3, Action:West
State  208
Old Q Values:  [31266.2815692   7772.85476229 -4584.50430574 10479.81759033]
New Q values:  [31266.2815692   7772.85476229 -4584.50430574 12352.01802394]
Reward: 9  Episode Reward:  -283
xxxxx
x...x
xga x
x . x
xxxxx
Step:4, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.04628312e+04 2.71823033e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.04628312e+04 2.02522058e+04 2.45392999e+03]
Reward: -1  Episode Reward:  -284
xxxxx
x...x
x.gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[31266.2815692   7772.85476229 -4584.50430574 12352.01802394]
------
Step:5, Action:North
State  208
Old Q Values:  [31266.2815692   7772.85476229 -4584.50430574 12352.01802394]
New Q values:  [17143.9389741   7772.85476229 -4584.50430574 12352.01802394]
Reward: 9  Episode Reward:  -275
xxxxx
x.gax
x.  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562 15440.08782139   660.86649319   593.06453581]
------
Step:6, Action:South
State  136
Old Q Values:  [-2129.37064562 15440.08782139   660.86649319   593.06453581]
New Q values:  [-2129.37064562 11318.61682078   660.86649319   593.06453581]
Reward: -1  Episode Reward:  -276
xxxxx
xg. x
x. ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[17143.9389741   7772.85476229 -4584.50430574 12352.01802394]
------
Step:7, Action:North
State  208
Old Q Values:  [17143.9389741   7772.85476229 -4584.50430574 12352.01802394]
New Q values:  [26950.92263218  7772.85476229 -4584.50430574 12352.01802394]
Reward: -1  Episode Reward:  -277
xxxxx
x..ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[59164.41397681 23968.63168141  -180.00807518 66979.82347515]
------
Step:8, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  2.85589490e+04 -3.22965309e-01  4.37254102e+02]
New Q values:  [ 2.12301837e+02  2.85589490e+04 -3.22965309e-01  4.46590156e+02]
Reward: 9  Episode Reward:  -268
xxxxx
x.a x
x.g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   838.81768154   887.62838252]
------
Step:9, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   385.23769261  1498.20729577]
New Q values:  [ -253.44886264 -1902.20915811   385.23769261  1879.37217358]
Reward: 9  Episode Reward:  -259
xxxxx
xa  x
x. gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 4248.96418422  496.66968841 -252.78192178]
------
Step:10, Action:South
State  111
Old Q Values:  [-177.44732869 2225.36278094  431.25952337 -120.29354603]
New Q values:  [-177.44732869 2497.64411121  431.25952337 -120.29354603]
Reward: 9  Episode Reward:  -250
xxxxx
x   x
xag x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1557.60775109 5340.3299961   154.04646645]
------
Step:11, Action:South
State  189
Old Q Values:  [ 337.36081627 1557.60775109 5340.3299961   154.04646645]
New Q values:  [ 337.36081627 1563.53564458 5340.3299961   154.04646645]
Reward: -1  Episode Reward:  -251
xxxxx
x g x
x   x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3136.97514715 1352.37702619   64.52440435  -12.17474163]
------
Step:12, Action:North
State  261
Old Q Values:  [3136.97514715 1352.37702619   64.52440435  -12.17474163]
New Q values:  [2856.28905769 1352.37702619   64.52440435  -12.17474163]
Reward: -1  Episode Reward:  -252
xxxxx
x  gx
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1563.53564458 5340.3299961   154.04646645]
------
Step:13, Action:East
State  189
Old Q Values:  [ 337.36081627 1563.53564458 5340.3299961   154.04646645]
New Q values:  [ 337.36081627 1563.53564458 2615.00983374  154.04646645]
Reward: -1  Episode Reward:  -253
xxxxx
x   x
x agx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 1598.25945099 -789.02220255 1209.42904959]
------
Step:14, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.04628312e+04 2.02522058e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 6.83140824e+04 2.02522058e+04 2.45392999e+03]
Reward: 100009  Episode Reward:  99756
xxxxx
x   x
x g x
x a x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
Step:1, Action:North
State  196
Old Q Values:  [-2469.90645144  1465.42682022 16109.52746831   231.67262594]
New Q values:  [ -534.40699599  1465.42682022 16109.52746831   231.67262594]
Reward: 9  Episode Reward:  9
xxxxx
xga.x
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  1493.85194863   182.00147425]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1493.85194863   182.00147425]
New Q values:  [-9594.56523706 -8069.05606225  3998.52582569   182.00147425]
Reward: 9  Episode Reward:  18
xxxxx
x gax
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562 11318.61682078   660.86649319   593.06453581]
------
Step:3, Action:South
State  136
Old Q Values:  [-2129.37064562 11318.61682078   660.86649319   593.06453581]
New Q values:  [-2129.37064562  6906.93699084   660.86649319   593.06453581]
Reward: 9  Episode Reward:  27
xxxxx
xg  x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[7913.63420843 2843.66782628  606.149024   2748.38982213]
------
Step:4, Action:North
State  216
Old Q Values:  [7913.63420843 2843.66782628  606.149024   2748.38982213]
New Q values:  [11732.53837639  2843.66782628   606.149024    2748.38982213]
Reward: -1  Episode Reward:  26
xxxxx
x  ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  2.85589490e+04 -3.22965309e-01  4.46590156e+02]
------
Step:5, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  2.85589490e+04 -3.22965309e-01  4.46590156e+02]
New Q values:  [ 2.12301837e+02  1.49427411e+04 -3.22965309e-01  4.46590156e+02]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[11732.53837639  2843.66782628   606.149024    2748.38982213]
------
Step:6, Action:North
State  218
Old Q Values:  [ 999.34923028 2755.30078316    0.          930.00701399]
New Q values:  [4881.96202319 2755.30078316    0.          930.00701399]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  1.49427411e+04 -3.22965309e-01  4.46590156e+02]
------
Step:7, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  1.49427411e+04 -3.22965309e-01  4.46590156e+02]
New Q values:  [ 2.12301837e+02  7.44108505e+03 -3.22965309e-01  4.46590156e+02]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[4881.96202319 2755.30078316    0.          930.00701399]
------
Step:8, Action:North
State  216
Old Q Values:  [11732.53837639  2843.66782628   606.149024    2748.38982213]
New Q values:  [6924.74086508 2843.66782628  606.149024   2748.38982213]
Reward: -1  Episode Reward:  22
xxxxx
x  ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  7.44108505e+03 -3.22965309e-01  4.46590156e+02]
------
Step:9, Action:West
State  136
Old Q Values:  [-2129.37064562  6906.93699084   660.86649319   593.06453581]
New Q values:  [-2129.37064562  6906.93699084   660.86649319   318.71965056]
Reward: -1  Episode Reward:  21
xxxxx
x agx
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   273.64612079]
------
Step:10, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3998.52582569   182.00147425]
New Q values:  [-9594.56523706 -8069.05606225  3998.52582569   202.30477047]
Reward: -1  Episode Reward:  20
xxxxx
xag x
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6         433.68060256 -764.93196255    0.        ]
------
Step:11, Action:South
State  111
Old Q Values:  [-177.44732869 2497.64411121  431.25952337 -120.29354603]
New Q values:  [-177.44732869 1788.9605946   431.25952337 -120.29354603]
Reward: 9  Episode Reward:  29
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1563.53564458 2615.00983374  154.04646645]
------
Step:12, Action:South
State  188
Old Q Values:  [-6523.78898263  2790.67422621  1963.43704178     0.        ]
New Q values:  [-6523.78898263  6429.570924    1963.43704178     0.        ]
Reward: 9  Episode Reward:  38
xxxxx
x   x
xg  x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4109.18614746 -5704.51612281 17693.0041117  -5679.36893145]
------
Step:13, Action:East
State  260
Old Q Values:  [ 4109.18614746 -5704.51612281 17693.0041117  -5679.36893145]
New Q values:  [ 4109.18614746 -5704.51612281  9393.97713438 -5679.36893145]
Reward: 9  Episode Reward:  47
xxxxx
xg  x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7704.58496565 -5807.06396197  2305.08372417  2150.35590639]
------
Step:14, Action:North
State  276
Old Q Values:  [ 7704.58496565 -5807.06396197  2305.08372417  2150.35590639]
New Q values:  [ 4796.32181982 -5807.06396197  2305.08372417  2150.35590639]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         3600.83401675 5716.95944519  441.58769553]
------
Step:15, Action:East
State  206
Old Q Values:  [   0.         2477.63911564 1418.80979599    0.        ]
New Q values:  [   0.         2477.63911564 2031.51252535    0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[4881.96202319 2755.30078316    0.          930.00701399]
------
Step:16, Action:North
State  218
Old Q Values:  [4881.96202319 2755.30078316    0.          930.00701399]
New Q values:  [4184.5103238  2755.30078316    0.          930.00701399]
Reward: -1  Episode Reward:  44
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  7.44108505e+03 -3.22965309e-01  4.46590156e+02]
------
Step:17, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  7.44108505e+03 -3.22965309e-01  4.46590156e+02]
New Q values:  [ 2.12301837e+02  4.23118712e+03 -3.22965309e-01  4.46590156e+02]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[4184.5103238  2755.30078316    0.          930.00701399]
------
Step:18, Action:North
State  216
Old Q Values:  [6924.74086508 2843.66782628  606.149024   2748.38982213]
New Q values:  [4038.65248098 2843.66782628  606.149024   2748.38982213]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  4.23118712e+03 -3.22965309e-01  4.46590156e+02]
------
Step:19, Action:South
State  136
Old Q Values:  [-2129.37064562  6906.93699084   660.86649319   318.71965056]
New Q values:  [-2129.37064562  3973.77054063   660.86649319   318.71965056]
Reward: -1  Episode Reward:  41
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[4038.65248098 2843.66782628  606.149024   2748.38982213]
------
Step:20, Action:North
State  216
Old Q Values:  [4038.65248098 2843.66782628  606.149024   2748.38982213]
New Q values:  [2884.21712734 2843.66782628  606.149024   2748.38982213]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  4.23118712e+03 -3.22965309e-01  4.46590156e+02]
------
Step:21, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  4.23118712e+03 -3.22965309e-01  4.46590156e+02]
New Q values:  [ 2.12301837e+02  2.55713998e+03 -3.22965309e-01  4.46590156e+02]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2884.21712734 2843.66782628  606.149024   2748.38982213]
------
Step:22, Action:North
State  218
Old Q Values:  [4184.5103238  2755.30078316    0.          930.00701399]
New Q values:  [2440.34612496 2755.30078316    0.          930.00701399]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  2.55713998e+03 -3.22965309e-01  4.46590156e+02]
------
Step:23, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  2.55713998e+03 -3.22965309e-01  4.46590156e+02]
New Q values:  [ 2.12301837e+02  1.88752113e+03 -3.22965309e-01  4.46590156e+02]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2884.21712734 2843.66782628  606.149024   2748.38982213]
------
Step:24, Action:North
State  216
Old Q Values:  [2884.21712734 2843.66782628  606.149024   2748.38982213]
New Q values:  [1719.34319057 2843.66782628  606.149024   2748.38982213]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  1.88752113e+03 -3.22965309e-01  4.46590156e+02]
------
Step:25, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  1.88752113e+03 -3.22965309e-01  4.46590156e+02]
New Q values:  [ 2.12301837e+02  1.58099869e+03 -3.22965309e-01  4.46590156e+02]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2440.34612496 2755.30078316    0.          930.00701399]
------
Step:26, Action:South
State  208
Old Q Values:  [26950.92263218  7772.85476229 -4584.50430574 12352.01802394]
New Q values:  [26950.92263218 66504.55223736 -4584.50430574 12352.01802394]
Reward: 100009  Episode Reward:  100044
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x.agx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   273.64612079]
------
Step:1, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3998.52582569   202.30477047]
New Q values:  [-9594.56523706 -8069.05606225  3998.52582569   719.59279005]
Reward: 9  Episode Reward:  9
xxxxx
xag x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  2110.90293953 -2165.66138672  -180.6       ]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 1788.9605946   431.25952337 -120.29354603]
New Q values:  [-177.44732869 1505.48718796  431.25952337 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1563.53564458 2615.00983374  154.04646645]
------
Step:3, Action:South
State  180
Old Q Values:  [ 4507.91302948  9195.58042335  6268.30788824 -4966.32149798]
New Q values:  [ 4507.91302948  6501.82530965  6268.30788824 -4966.32149798]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 4109.18614746 -5704.51612281  9393.97713438 -5679.36893145]
------
Step:4, Action:East
State  260
Old Q Values:  [ 4109.18614746 -5704.51612281  9393.97713438 -5679.36893145]
New Q values:  [ 4109.18614746 -5704.51612281  5201.8873997  -5679.36893145]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x ..x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4796.32181982 -5807.06396197  2305.08372417  2150.35590639]
------
Step:5, Action:North
State  276
Old Q Values:  [ 4796.32181982 -5807.06396197  2305.08372417  2150.35590639]
New Q values:  [ 6756.78696842 -5807.06396197  2305.08372417  2150.35590639]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ -534.40699599  1465.42682022 16109.52746831   231.67262594]
------
Step:6, Action:East
State  206
Old Q Values:  [   0.         2477.63911564 2031.51252535    0.        ]
New Q values:  [   0.         2477.63911564 1644.59524509    0.        ]
Reward: 9  Episode Reward:  54
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2440.34612496 2755.30078316    0.          930.00701399]
------
Step:7, Action:South
State  210
Old Q Values:  [11765.46566514 16955.90763764   790.72804752 17462.48602225]
New Q values:  [11765.46566514 70177.7733875    790.72804752 17462.48602225]
Reward: 100009  Episode Reward:  100063
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799 13031.75488414 13745.16644462]
------
Step:1, Action:West
State  276
Old Q Values:  [ 6756.78696842 -5807.06396197  2305.08372417  2150.35590639]
New Q values:  [ 6756.78696842 -5807.06396197  2305.08372417  1722.42907986]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2856.28905769 1352.37702619   64.52440435  -12.17474163]
------
Step:2, Action:North
State  261
Old Q Values:  [2856.28905769 1352.37702619   64.52440435  -12.17474163]
New Q values:  [3166.88034567 1352.37702619   64.52440435  -12.17474163]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.18320789e+03 6.72988241e+03 3.33862213e+00]
------
Step:3, Action:East
State  181
Old Q Values:  [4.15652936e+02 2.18320789e+03 6.72988241e+03 3.33862213e+00]
New Q values:  [4.15652936e+02 2.18320789e+03 7.53021120e+03 3.33862213e+00]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ -534.40699599  1465.42682022 16109.52746831   231.67262594]
------
Step:4, Action:East
State  193
Old Q Values:  [-5922.26708831  8768.26938435  -276.37572367   767.35890262]
New Q values:  [-5922.26708831  8768.26938435 19846.21538174   767.35890262]
Reward: 9  Episode Reward:  36
xxxxx
x .gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26950.92263218 66504.55223736 -4584.50430574 12352.01802394]
------
Step:5, Action:South
State  208
Old Q Values:  [26950.92263218 66504.55223736 -4584.50430574 12352.01802394]
New Q values:  [26950.92263218 29997.23122739 -4584.50430574 12352.01802394]
Reward: 9  Episode Reward:  45
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11300.03444149  1802.67044829 -5588.09647059  5954.56446127]
------
Step:6, Action:North
State  288
Old Q Values:  [11300.03444149  1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [13518.58314481  1802.67044829 -5588.09647059  5954.56446127]
Reward: -1  Episode Reward:  44
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26950.92263218 29997.23122739 -4584.50430574 12352.01802394]
------
Step:7, Action:South
State  208
Old Q Values:  [26950.92263218 29997.23122739 -4584.50430574 12352.01802394]
New Q values:  [26950.92263218 16053.8674344  -4584.50430574 12352.01802394]
Reward: -1  Episode Reward:  43
xxxxx
x ..x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13518.58314481  1802.67044829 -5588.09647059  5954.56446127]
------
Step:8, Action:North
State  288
Old Q Values:  [13518.58314481  1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [13492.11004758  1802.67044829 -5588.09647059  5954.56446127]
Reward: -1  Episode Reward:  42
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26950.92263218 16053.8674344  -4584.50430574 12352.01802394]
------
Step:9, Action:North
State  208
Old Q Values:  [26950.92263218 16053.8674344  -4584.50430574 12352.01802394]
New Q values:  [30879.71609542 16053.8674344  -4584.50430574 12352.01802394]
Reward: 9  Episode Reward:  51
xxxxx
x .ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[59164.41397681 23968.63168141  -180.00807518 66979.82347515]
------
Step:10, Action:West
State  128
Old Q Values:  [ 47581.15146009  15794.84178348  -8652.84       121655.51934999]
New Q values:  [ 47581.15146009  15794.84178348  -8652.84       141952.53665887]
Reward: 100009  Episode Reward:  100060
xxxxx
xga x
x   x
x   x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1505.48718796  431.25952337 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 5960.22919402 1796.41351142    0.        ]
New Q values:  [ 221.30610858 3776.50358003 1796.41351142    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2343.43228624 1861.84676547 4623.37300807 1554.80203889]
------
Step:2, Action:East
State  181
Old Q Values:  [4.15652936e+02 2.18320789e+03 7.53021120e+03 3.33862213e+00]
New Q values:  [4.15652936e+02 2.18320789e+03 8.97134910e+03 3.33862213e+00]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831  8768.26938435 19846.21538174   767.35890262]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831  8768.26938435 19846.21538174   767.35890262]
New Q values:  [-5922.26708831  6279.53118129 19846.21538174   767.35890262]
Reward: -1  Episode Reward:  17
xxxxx
x .gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 1571.08069608 9242.7447585 ]
------
Step:4, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 1.56815359e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 1.58272554e+03]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3166.88034567 1352.37702619   64.52440435  -12.17474163]
------
Step:5, Action:North
State  261
Old Q Values:  [3166.88034567 1352.37702619   64.52440435  -12.17474163]
New Q values:  [3957.5568671  1352.37702619   64.52440435  -12.17474163]
Reward: -1  Episode Reward:  25
xxxxx
x .gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.18320789e+03 8.97134910e+03 3.33862213e+00]
------
Step:6, Action:East
State  181
Old Q Values:  [4.15652936e+02 2.18320789e+03 8.97134910e+03 3.33862213e+00]
New Q values:  [4.15652936e+02 2.18320789e+03 8.42079788e+03 3.33862213e+00]
Reward: -1  Episode Reward:  24
xxxxx
x g.x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ -534.40699599  1465.42682022 16109.52746831   231.67262594]
------
Step:7, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.83140824e+04 2.02522058e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 6.83140824e+04 1.73701971e+04 2.45392999e+03]
Reward: 9  Episode Reward:  33
xxxxx
x ..x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30879.71609542 16053.8674344  -4584.50430574 12352.01802394]
------
Step:8, Action:North
State  210
Old Q Values:  [11765.46566514 70177.7733875    790.72804752 17462.48602225]
New Q values:  [24805.5333086  70177.7733875    790.72804752 17462.48602225]
Reward: 9  Episode Reward:  42
xxxxx
x .ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[59164.41397681 23968.63168141  -180.00807518 66979.82347515]
------
Step:9, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  1.58099869e+03 -3.22965309e-01  4.46590156e+02]
New Q values:  [ 2.12301837e+02  1.58099869e+03 -3.22965309e-01  5.07508949e+02]
Reward: 9  Episode Reward:  51
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 1078.2429548   350.00244198]
------
Step:10, Action:East
State  124
Old Q Values:  [   0.         1166.51141701 2158.73929524 1772.94838375]
New Q values:  [   0.         1166.51141701 2055.02688029 1772.94838375]
Reward: -1  Episode Reward:  50
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  3973.77054063   660.86649319   318.71965056]
------
Step:11, Action:South
State  136
Old Q Values:  [-2129.37064562  3973.77054063   660.86649319   318.71965056]
New Q values:  [-2129.37064562  2442.00856414   660.86649319   318.71965056]
Reward: -1  Episode Reward:  49
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1719.34319057 2843.66782628  606.149024   2748.38982213]
------
Step:12, Action:South
State  208
Old Q Values:  [30879.71609542 16053.8674344  -4584.50430574 12352.01802394]
New Q values:  [30879.71609542 70474.57998803 -4584.50430574 12352.01802394]
Reward: 100009  Episode Reward:  100058
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x..ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  1.58099869e+03 -3.22965309e-01  5.07508949e+02]
------
Step:1, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  1.58099869e+03 -3.22965309e-01  5.07508949e+02]
New Q values:  [ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  5.07508949e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[24805.5333086  70177.7733875    790.72804752 17462.48602225]
------
Step:2, Action:South
State  210
Old Q Values:  [24805.5333086  70177.7733875    790.72804752 17462.48602225]
New Q values:  [24805.5333086  26124.14236927   790.72804752 17462.48602225]
Reward: -9991  Episode Reward:  -9982
xxxxx
x.. x
x.. x
x .gx
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  5.07508949e+02]
------
Step:1, Action:South
State  136
Old Q Values:  [-2129.37064562  2442.00856414   660.86649319   318.71965056]
New Q values:  [-2129.37064562  1835.30377354   660.86649319   318.71965056]
Reward: 9  Episode Reward:  9
xxxxx
xg. x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1719.34319057 2843.66782628  606.149024   2748.38982213]
------
Step:2, Action:South
State  208
Old Q Values:  [30879.71609542 70474.57998803 -4584.50430574 12352.01802394]
New Q values:  [30879.71609542 32242.86500949 -4584.50430574 12352.01802394]
Reward: 9  Episode Reward:  18
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13492.11004758  1802.67044829 -5588.09647059  5954.56446127]
------
Step:3, Action:North
State  288
Old Q Values:  [13492.11004758  1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [15069.10352188  1802.67044829 -5588.09647059  5954.56446127]
Reward: -1  Episode Reward:  17
xxxxx
xg. x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30879.71609542 32242.86500949 -4584.50430574 12352.01802394]
------
Step:4, Action:South
State  208
Old Q Values:  [30879.71609542 32242.86500949 -4584.50430574 12352.01802394]
New Q values:  [30879.71609542 17417.27706036 -4584.50430574 12352.01802394]
Reward: -1  Episode Reward:  16
xxxxx
x.g x
x . x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15069.10352188  1802.67044829 -5588.09647059  5954.56446127]
------
Step:5, Action:North
State  288
Old Q Values:  [15069.10352188  1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [15290.95623738  1802.67044829 -5588.09647059  5954.56446127]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[30879.71609542 17417.27706036 -4584.50430574 12352.01802394]
------
Step:6, Action:North
State  208
Old Q Values:  [30879.71609542 17417.27706036 -4584.50430574 12352.01802394]
New Q values:  [18858.62588558 17417.27706036 -4584.50430574 12352.01802394]
Reward: -1  Episode Reward:  14
xxxxx
x..ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  5.07508949e+02]
------
Step:7, Action:South
State  130
Old Q Values:  [59164.41397681 23968.63168141  -180.00807518 66979.82347515]
New Q values:  [59164.41397681 17424.09538335  -180.00807518 66979.82347515]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[24805.5333086  26124.14236927   790.72804752 17462.48602225]
------
Step:8, Action:South
State  208
Old Q Values:  [18858.62588558 17417.27706036 -4584.50430574 12352.01802394]
New Q values:  [18858.62588558 11553.59769536 -4584.50430574 12352.01802394]
Reward: -1  Episode Reward:  12
xxxxx
x.. x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15290.95623738  1802.67044829 -5588.09647059  5954.56446127]
------
Step:9, Action:North
State  288
Old Q Values:  [15290.95623738  1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [13953.02520573  1802.67044829 -5588.09647059  5954.56446127]
Reward: -1  Episode Reward:  11
xxxxx
x.. x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[24805.5333086  26124.14236927   790.72804752 17462.48602225]
------
Step:10, Action:South
State  208
Old Q Values:  [18858.62588558 11553.59769536 -4584.50430574 12352.01802394]
New Q values:  [18858.62588558  8806.74663986 -4584.50430574 12352.01802394]
Reward: -1  Episode Reward:  10
xxxxx
x.. x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13953.02520573  1802.67044829 -5588.09647059  5954.56446127]
------
Step:11, Action:North
State  288
Old Q Values:  [13953.02520573  1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [11238.19784797  1802.67044829 -5588.09647059  5954.56446127]
Reward: -1  Episode Reward:  9
xxxxx
xg. x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18858.62588558  8806.74663986 -4584.50430574 12352.01802394]
------
Step:12, Action:North
State  208
Old Q Values:  [18858.62588558  8806.74663986 -4584.50430574 12352.01802394]
New Q values:  [14050.18980164  8806.74663986 -4584.50430574 12352.01802394]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  5.07508949e+02]
------
Step:13, Action:South
State  130
Old Q Values:  [59164.41397681 17424.09538335  -180.00807518 66979.82347515]
New Q values:  [59164.41397681 14806.28086412  -180.00807518 66979.82347515]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[24805.5333086  26124.14236927   790.72804752 17462.48602225]
------
Step:14, Action:South
State  210
Old Q Values:  [24805.5333086  26124.14236927   790.72804752 17462.48602225]
New Q values:  [24805.5333086  13820.5163021    790.72804752 17462.48602225]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11238.19784797  1802.67044829 -5588.09647059  5954.56446127]
------
Step:15, Action:North
State  288
Old Q Values:  [11238.19784797  1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [ 8709.73607968  1802.67044829 -5588.09647059  5954.56446127]
Reward: -1  Episode Reward:  5
xxxxx
x.. x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14050.18980164  8806.74663986 -4584.50430574 12352.01802394]
------
Step:16, Action:North
State  208
Old Q Values:  [14050.18980164  8806.74663986 -4584.50430574 12352.01802394]
New Q values:  [12126.81536807  8806.74663986 -4584.50430574 12352.01802394]
Reward: -1  Episode Reward:  4
xxxxx
x..ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  5.07508949e+02]
------
Step:17, Action:South
State  136
Old Q Values:  [-2129.37064562  1835.30377354   660.86649319   318.71965056]
New Q values:  [-2129.37064562  4439.1269166    660.86649319   318.71965056]
Reward: -1  Episode Reward:  3
xxxxx
xg. x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12126.81536807  8806.74663986 -4584.50430574 12352.01802394]
------
Step:18, Action:West
State  208
Old Q Values:  [12126.81536807  8806.74663986 -4584.50430574 12352.01802394]
New Q values:  [12126.81536807  8806.74663986 -4584.50430574 25440.43192774]
Reward: 9  Episode Reward:  12
xxxxx
x.. x
xga x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.83140824e+04 1.73701971e+04 2.45392999e+03]
------
Step:19, Action:South
State  194
Old Q Values:  [-6.00000000e-01  1.27909737e+04  2.22176957e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  9.24533942e+03  2.22176957e+04  2.17939995e+03]
Reward: 9  Episode Reward:  21
xxxxx
x.. x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799 13031.75488414 13745.16644462]
------
Step:20, Action:East
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799 13031.75488414 13745.16644462]
New Q values:  [ 6462.57506181 -8521.23367799  7825.02277756 13745.16644462]
Reward: -1  Episode Reward:  20
xxxxx
x.. x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8709.73607968  1802.67044829 -5588.09647059  5954.56446127]
------
Step:21, Action:North
State  288
Old Q Values:  [ 8709.73607968  1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [11115.42401019  1802.67044829 -5588.09647059  5954.56446127]
Reward: -1  Episode Reward:  19
xxxxx
x.. x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12126.81536807  8806.74663986 -4584.50430574 25440.43192774]
------
Step:22, Action:North
State  210
Old Q Values:  [24805.5333086  13820.5163021    790.72804752 17462.48602225]
New Q values:  [30015.56036599 13820.5163021    790.72804752 17462.48602225]
Reward: -1  Episode Reward:  18
xxxxx
x..ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[59164.41397681 14806.28086412  -180.00807518 66979.82347515]
------
Step:23, Action:West
State  130
Old Q Values:  [59164.41397681 14806.28086412  -180.00807518 66979.82347515]
New Q values:  [59164.41397681 14806.28086412  -180.00807518 47556.84527942]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  32608.63878402 69198.38629787]
------
Step:24, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   385.23769261  1879.37217358]
New Q values:  [ -253.44886264 -1902.20915811   385.23769261  2031.8381247 ]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 4248.96418422  496.66968841 -252.78192178]
------
Step:25, Action:South
State  107
Old Q Values:  [-252.35169558 4248.96418422  496.66968841 -252.78192178]
New Q values:  [-252.35169558 2363.40269767  496.66968841 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 189.19059642    0.         2214.72341328 -178.98      ]
------
Step:26, Action:North
State  187
Old Q Values:  [ 320.07341842    0.         1283.40404137    0.        ]
New Q values:  [ 836.45017667    0.         1283.40404137    0.        ]
Reward: -1  Episode Reward:  34
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 2363.40269767  496.66968841 -252.78192178]
------
Step:27, Action:South
State  107
Old Q Values:  [-252.35169558 2363.40269767  496.66968841 -252.78192178]
New Q values:  [-252.35169558 1329.78229148  496.66968841 -252.78192178]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 836.45017667    0.         1283.40404137    0.        ]
------
Step:28, Action:East
State  185
Old Q Values:  [ 189.19059642    0.         2214.72341328 -178.98      ]
New Q values:  [ 189.19059642    0.         1364.76720061 -178.98      ]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 1598.25945099 -789.02220255 1209.42904959]
------
Step:29, Action:South
State  203
Old Q Values:  [3.60604218e+00 2.47391199e+03 1.42261918e+03 4.59156348e+03]
New Q values:  [3.60604218e+00 3.76178822e+03 1.42261918e+03 4.59156348e+03]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 1571.08069608 9242.7447585 ]
------
Step:30, Action:West
State  273
Old Q Values:  [1637.72437281 1974.75214244 1571.08069608 9242.7447585 ]
New Q values:  [ 1637.72437281  1974.75214244  1571.08069608 78383.69985004]
Reward: 100009  Episode Reward:  100040
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3957.5568671  1352.37702619   64.52440435  -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [3957.5568671  1352.37702619   64.52440435  -12.17474163]
New Q values:  [4114.66211052 1352.37702619   64.52440435  -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.18320789e+03 8.42079788e+03 3.33862213e+00]
------
Step:2, Action:East
State  183
Old Q Values:  [2343.43228624 1861.84676547 4623.37300807 1554.80203889]
New Q values:  [2343.43228624 1861.84676547 4346.39925087 1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 8305.50015881 7063.52223325 1169.39963074]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831  6279.53118129 19846.21538174   767.35890262]
New Q values:  [-5922.26708831 26032.32242753 19846.21538174   767.35890262]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  1571.08069608 78383.69985004]
------
Step:4, Action:West
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799  7825.02277756 13745.16644462]
New Q values:  [ 6462.57506181 -8521.23367799  7825.02277756  6731.86521101]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x g.x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4114.66211052 1352.37702619   64.52440435  -12.17474163]
------
Step:5, Action:North
State  260
Old Q Values:  [ 4109.18614746 -5704.51612281  5201.8873997  -5679.36893145]
New Q values:  [-2406.37794812 -5704.51612281  5201.8873997  -5679.36893145]
Reward: -10001  Episode Reward:  -9975
xxxxx
x.. x
xg .x
x  .x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  1571.08069608 78383.69985004]
------
Step:1, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  1571.08069608 78383.69985004]
New Q values:  [ 1637.72437281  1974.75214244  1571.08069608 32593.27857317]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.. x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4114.66211052 1352.37702619   64.52440435  -12.17474163]
------
Step:2, Action:North
State  261
Old Q Values:  [4114.66211052 1352.37702619   64.52440435  -12.17474163]
New Q values:  [4177.50420789 1352.37702619   64.52440435  -12.17474163]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.18320789e+03 8.42079788e+03 3.33862213e+00]
------
Step:3, Action:East
State  181
Old Q Values:  [4.15652936e+02 2.18320789e+03 8.42079788e+03 3.33862213e+00]
New Q values:  [4.15652936e+02 2.18320789e+03 1.78679439e+04 3.33862213e+00]
Reward: -9991  Episode Reward:  -9973
xxxxx
x...x
x g x
x  .x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799  7825.02277756  6731.86521101]
------
Step:1, Action:East
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799  7825.02277756  6731.86521101]
New Q values:  [ 6462.57506181 -8521.23367799  6470.03631408  6731.86521101]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11115.42401019  1802.67044829 -5588.09647059  5954.56446127]
------
Step:2, Action:North
State  288
Old Q Values:  [11115.42401019  1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [ 6083.6991824   1802.67044829 -5588.09647059  5954.56446127]
Reward: -9991  Episode Reward:  -9982
xxxxx
x. .x
x..gx
x.  x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12126.81536807  8806.74663986 -4584.50430574 25440.43192774]
------
Step:1, Action:West
State  208
Old Q Values:  [12126.81536807  8806.74663986 -4584.50430574 25440.43192774]
New Q values:  [12126.81536807  8806.74663986 -4584.50430574 24675.79748926]
Reward: -9991  Episode Reward:  -9991
xxxxx
x. .x
x.g x
x...x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.18320789e+03 1.78679439e+04 3.33862213e+00]
------
Step:1, Action:South
State  180
Old Q Values:  [ 4507.91302948  6501.82530965  6268.30788824 -4966.32149798]
New Q values:  [ 4507.91302948  4166.69634377  6268.30788824 -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-2406.37794812 -5704.51612281  5201.8873997  -5679.36893145]
------
Step:2, Action:East
State  261
Old Q Values:  [4177.50420789 1352.37702619   64.52440435  -12.17474163]
New Q values:  [4177.50420789 1352.37702619 2058.24585227  -12.17474163]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 6756.78696842 -5807.06396197  2305.08372417  1722.42907986]
------
Step:3, Action:East
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799  6470.03631408  6731.86521101]
New Q values:  [ 6462.57506181 -8521.23367799  4418.52428035  6731.86521101]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  .x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6083.6991824   1802.67044829 -5588.09647059  5954.56446127]
------
Step:4, Action:North
State  288
Old Q Values:  [ 6083.6991824   1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [11443.54778276  1802.67044829 -5588.09647059  5954.56446127]
Reward: 9  Episode Reward:  36
xxxxx
x...x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[30015.56036599 13820.5163021    790.72804752 17462.48602225]
------
Step:5, Action:North
State  210
Old Q Values:  [30015.56036599 13820.5163021    790.72804752 17462.48602225]
New Q values:  [29760.94833944 13820.5163021    790.72804752 17462.48602225]
Reward: 9  Episode Reward:  45
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[59164.41397681 14806.28086412  -180.00807518 47556.84527942]
------
Step:6, Action:North
State  130
Old Q Values:  [59164.41397681 14806.28086412  -180.00807518 47556.84527942]
New Q values:  [41234.48978377 14806.28086412  -180.00807518 47556.84527942]
Reward: -301  Episode Reward:  -256
xxxxx
x..ax
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 14806.28086412  -180.00807518 47556.84527942]
------
Step:7, Action:West
State  130
Old Q Values:  [41234.48978377 14806.28086412  -180.00807518 47556.84527942]
New Q values:  [41234.48978377 14806.28086412  -180.00807518 58098.10935578]
Reward: 9  Episode Reward:  -247
xxxxx
x.a x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 130233.23748005]
------
Step:8, Action:West
State  114
Old Q Values:  [ -180.6         3557.6642036  32608.63878402 69198.38629787]
New Q values:  [  -180.6          3557.6642036   32608.63878402 105368.3059805 ]
Reward: 100009  Episode Reward:  99762
xxxxx
xa  x
x g x
x   x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   838.81768154   887.62838252]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   838.81768154   887.62838252]
New Q values:  [ -281.736      -1150.91067548   838.81768154  1455.33129852]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 3649.59981836 1540.97816408 -180.6       ]
------
Step:2, Action:East
State  108
Old Q Values:  [-8463.16477134  2233.8294456   1096.09124309     0.        ]
New Q values:  [-8463.16477134  2233.8294456   1637.39424494     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
xga.x
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  3998.52582569   719.59279005]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   838.81768154  1455.33129852]
New Q values:  [ -281.736      -1150.91067548  6848.26652003  1455.33129852]
Reward: 9  Episode Reward:  17
xxxxx
x  ax
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  5.07508949e+02]
------
Step:4, Action:South
State  136
Old Q Values:  [-2129.37064562  4439.1269166    660.86649319   318.71965056]
New Q values:  [-2129.37064562  2634.15111452   660.86649319   318.71965056]
Reward: 9  Episode Reward:  26
xxxxx
xg  x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1719.34319057 2843.66782628  606.149024   2748.38982213]
------
Step:5, Action:South
State  216
Old Q Values:  [1719.34319057 2843.66782628  606.149024   2748.38982213]
New Q values:  [1719.34319057 4575.93146534  606.149024   2748.38982213]
Reward: 9  Episode Reward:  35
xxxxx
x g x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11443.54778276  1802.67044829 -5588.09647059  5954.56446127]
------
Step:6, Action:North
State  288
Old Q Values:  [11443.54778276  1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [ 5949.5985527   1802.67044829 -5588.09647059  5954.56446127]
Reward: -1  Episode Reward:  34
xxxxx
x  gx
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1719.34319057 4575.93146534  606.149024   2748.38982213]
------
Step:7, Action:South
State  216
Old Q Values:  [1719.34319057 4575.93146534  606.149024   2748.38982213]
New Q values:  [1719.34319057 3616.14192452  606.149024   2748.38982213]
Reward: -1  Episode Reward:  33
xxxxx
x  gx
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5949.5985527   1802.67044829 -5588.09647059  5954.56446127]
------
Step:8, Action:West
State  288
Old Q Values:  [ 5949.5985527   1802.67044829 -5588.09647059  5954.56446127]
New Q values:  [ 5949.5985527   1802.67044829 -5588.09647059  4406.78534781]
Reward: 9  Episode Reward:  42
xxxxx
x g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799  4418.52428035  6731.86521101]
------
Step:9, Action:West
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799  4418.52428035  6731.86521101]
New Q values:  [ 6462.57506181 -8521.23367799  4418.52428035 17379.34803104]
Reward: 9  Episode Reward:  51
xxxxx
x   x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[48937.33982213 12764.58618105  3851.09595999  1875.31501677]
------
Step:10, Action:North
State  256
Old Q Values:  [47016.92797738 15850.58020692  6661.12187543   644.94785455]
New Q values:  [105043.13448859  15850.58020692   6661.12187543    644.94785455]
Reward: 90009  Episode Reward:  90060
xxxxx
x   x
xg  x
x   x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799  4418.52428035 17379.34803104]
------
Step:1, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  1571.08069608 32593.27857317]
New Q values:  [ 1637.72437281  1974.75214244  1571.08069608 14295.96269163]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4177.50420789 1352.37702619 2058.24585227  -12.17474163]
------
Step:2, Action:North
State  261
Old Q Values:  [4177.50420789 1352.37702619 2058.24585227  -12.17474163]
New Q values:  [7036.78484408 1352.37702619 2058.24585227  -12.17474163]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.18320789e+03 1.78679439e+04 3.33862213e+00]
------
Step:3, Action:East
State  180
Old Q Values:  [ 4507.91302948  4166.69634377  6268.30788824 -4966.32149798]
New Q values:  [ 4507.91302948  4166.69634377  7345.58139579 -4966.32149798]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ -534.40699599  1465.42682022 16109.52746831   231.67262594]
------
Step:4, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.83140824e+04 1.73701971e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 6.83140824e+04 1.43562181e+04 2.45392999e+03]
Reward: 9  Episode Reward:  36
xxxxx
x.g.x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12126.81536807  8806.74663986 -4584.50430574 24675.79748926]
------
Step:5, Action:West
State  208
Old Q Values:  [12126.81536807  8806.74663986 -4584.50430574 24675.79748926]
New Q values:  [12126.81536807  8806.74663986 -4584.50430574 17679.41572396]
Reward: -1  Episode Reward:  35
xxxxx
x. gx
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 26032.32242753 19846.21538174   767.35890262]
------
Step:6, Action:South
State  193
Old Q Values:  [-5922.26708831 26032.32242753 19846.21538174   767.35890262]
New Q values:  [-5922.26708831 11369.57439469 19846.21538174   767.35890262]
Reward: -1  Episode Reward:  34
xxxxx
x. .x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.19081808e+03 1.58272554e+03]
------
Step:7, Action:East
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799  4418.52428035 17379.34803104]
New Q values:  [ 6462.57506181 -8521.23367799  3557.68927795 17379.34803104]
Reward: 9  Episode Reward:  43
xxxxx
x. .x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5949.5985527   1802.67044829 -5588.09647059  4406.78534781]
------
Step:8, Action:North
State  288
Old Q Values:  [ 5949.5985527   1802.67044829 -5588.09647059  4406.78534781]
New Q values:  [11307.52392291  1802.67044829 -5588.09647059  4406.78534781]
Reward: -1  Episode Reward:  42
xxxxx
x. .x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[29760.94833944 13820.5163021    790.72804752 17462.48602225]
------
Step:9, Action:North
State  208
Old Q Values:  [12126.81536807  8806.74663986 -4584.50430574 17679.41572396]
New Q values:  [22285.55895396  8806.74663986 -4584.50430574 17679.41572396]
Reward: 9  Episode Reward:  51
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 14806.28086412  -180.00807518 58098.10935578]
------
Step:10, Action:West
State  130
Old Q Values:  [41234.48978377 14806.28086412  -180.00807518 58098.10935578]
New Q values:  [41234.48978377 14806.28086412  -180.00807518 54849.13553646]
Reward: -1  Episode Reward:  50
xxxxx
x.a x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 105368.3059805 ]
------
Step:11, Action:West
State  115
Old Q Values:  [  -180.6          1117.48597573  18639.04650494 130233.23748005]
New Q values:  [  -180.6          1117.48597573  18639.04650494 129782.24645338]
Reward: 100009  Episode Reward:  100059
xxxxx
xa  x
x   x
x  gx
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 3649.59981836 1540.97816408 -180.6       ]
------
Step:1, Action:South
State  110
Old Q Values:  [-239.29051573 3649.59981836 1540.97816408 -180.6       ]
New Q values:  [ -239.29051573 -2331.08565392  1540.97816408  -180.6       ]
Reward: -9991  Episode Reward:  -9991
xxxxx
x ..x
xg..x
x ..x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[7036.78484408 1352.37702619 2058.24585227  -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [7036.78484408 1352.37702619 2058.24585227  -12.17474163]
New Q values:  [8180.49709855 1352.37702619 2058.24585227  -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.18320789e+03 1.78679439e+04 3.33862213e+00]
------
Step:2, Action:East
State  181
Old Q Values:  [4.15652936e+02 2.18320789e+03 1.78679439e+04 3.33862213e+00]
New Q values:  [4.15652936e+02 2.18320789e+03 1.31004422e+04 3.33862213e+00]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 11369.57439469 19846.21538174   767.35890262]
------
Step:3, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.83140824e+04 1.43562181e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 6.83140824e+04 1.24335549e+04 2.45392999e+03]
Reward: 9  Episode Reward:  17
xxxxx
x.g.x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22285.55895396  8806.74663986 -4584.50430574 17679.41572396]
------
Step:4, Action:North
State  208
Old Q Values:  [22285.55895396  8806.74663986 -4584.50430574 17679.41572396]
New Q values:  [25374.36424252  8806.74663986 -4584.50430574 17679.41572396]
Reward: 9  Episode Reward:  26
xxxxx
x..ax
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 14806.28086412  -180.00807518 54849.13553646]
------
Step:5, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  5.07508949e+02]
New Q values:  [ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  3.91430775e+04]
Reward: 9  Episode Reward:  35
xxxxx
x.a x
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 129782.24645338]
------
Step:6, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   385.23769261  2031.8381247 ]
New Q values:  [ -253.44886264 -1902.20915811   385.23769261  1269.78140627]
Reward: 9  Episode Reward:  44
xxxxx
xa  x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1505.48718796  431.25952337 -120.29354603]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869 1505.48718796  431.25952337 -120.29354603]
New Q values:  [-177.44732869 1292.7616905   431.25952337 -120.29354603]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[   3.06655861 2303.8893844  2029.22435297    0.        ]
------
Step:8, Action:South
State  189
Old Q Values:  [ 337.36081627 1563.53564458 2615.00983374  154.04646645]
New Q values:  [ 337.36081627 3078.9633874  2615.00983374  154.04646645]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[8180.49709855 1352.37702619 2058.24585227  -12.17474163]
------
Step:9, Action:North
State  261
Old Q Values:  [8180.49709855 1352.37702619 2058.24585227  -12.17474163]
New Q values:  [3962.76565474 1352.37702619 2058.24585227  -12.17474163]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[   3.06655861 2303.8893844  2029.22435297    0.        ]
------
Step:10, Action:South
State  189
Old Q Values:  [ 337.36081627 3078.9633874  2615.00983374  154.04646645]
New Q values:  [ 337.36081627 2419.81505138 2615.00983374  154.04646645]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3962.76565474 1352.37702619 2058.24585227  -12.17474163]
------
Step:11, Action:North
State  260
Old Q Values:  [-2406.37794812 -5704.51612281  5201.8873997  -5679.36893145]
New Q values:  [-5034.27990205 -5704.51612281  5201.8873997  -5679.36893145]
Reward: -10001  Episode Reward:  -9961
xxxxx
x   x
xg  x
x ..x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   385.23769261  1269.78140627]
------
Step:1, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   385.23769261  1269.78140627]
New Q values:  [ -253.44886264 -1902.20915811   385.23769261   912.24724995]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1329.78229148  496.66968841 -252.78192178]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 1292.7616905   431.25952337 -120.29354603]
New Q values:  [-177.44732869 4452.63732493  431.25952337 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.18320789e+03 1.31004422e+04 3.33862213e+00]
------
Step:3, Action:East
State  189
Old Q Values:  [ 337.36081627 2419.81505138 2615.00983374  154.04646645]
New Q values:  [ 337.36081627 2419.81505138 1530.88176879  154.04646645]
Reward: 9  Episode Reward:  27
xxxxx
x  gx
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 1598.25945099 -789.02220255 1209.42904959]
------
Step:4, Action:South
State  197
Old Q Values:  [-5833.78831344  1079.64014969 -5538.30598082   403.06255908]
New Q values:  [-5833.78831344  1394.50148356 -5538.30598082   403.06255908]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.19081808e+03 1.58272554e+03]
------
Step:5, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  1571.08069608 14295.96269163]
New Q values:  [ 1637.72437281  1974.75214244 -1973.9105447  14295.96269163]
Reward: -9991  Episode Reward:  -9955
xxxxx
x  .x
x   x
x. gx
xxxxx
xxxxx
x.a.x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   385.23769261   912.24724995]
------
Step:1, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   385.23769261   912.24724995]
New Q values:  [ -253.44886264 -1902.20915811   385.23769261   769.23358742]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1329.78229148  496.66968841 -252.78192178]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 4452.63732493  431.25952337 -120.29354603]
New Q values:  [-177.44732869 5716.5875787   431.25952337 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xag.x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.18320789e+03 1.31004422e+04 3.33862213e+00]
------
Step:3, Action:South
State  180
Old Q Values:  [ 4507.91302948  4166.69634377  7345.58139579 -4966.32149798]
New Q values:  [ 4507.91302948  3232.64475742  7345.58139579 -4966.32149798]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
xg..x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5034.27990205 -5704.51612281  5201.8873997  -5679.36893145]
------
Step:4, Action:East
State  260
Old Q Values:  [-5034.27990205 -5704.51612281  5201.8873997  -5679.36893145]
New Q values:  [-5034.27990205 -5704.51612281  4113.1910504  -5679.36893145]
Reward: 9  Episode Reward:  36
xxxxx
xg .x
x ..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 6756.78696842 -5807.06396197  2305.08372417  1722.42907986]
------
Step:5, Action:North
State  276
Old Q Values:  [ 6756.78696842 -5807.06396197  2305.08372417  1722.42907986]
New Q values:  [ 7540.97302786 -5807.06396197  2305.08372417  1722.42907986]
Reward: 9  Episode Reward:  45
xxxxx
x g.x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ -534.40699599  1465.42682022 16109.52746831   231.67262594]
------
Step:6, Action:East
State  196
Old Q Values:  [ -534.40699599  1465.42682022 16109.52746831   231.67262594]
New Q values:  [ -534.40699599  1465.42682022 14061.52026008   231.67262594]
Reward: 9  Episode Reward:  54
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[25374.36424252  8806.74663986 -4584.50430574 17679.41572396]
------
Step:7, Action:North
State  208
Old Q Values:  [25374.36424252  8806.74663986 -4584.50430574 17679.41572396]
New Q values:  [86609.88635795  8806.74663986 -4584.50430574 17679.41572396]
Reward: 100009  Episode Reward:  100063
xxxxx
x  ax
xg  x
x   x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  6848.26652003  1455.33129852]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  6848.26652003  1455.33129852]
New Q values:  [ -281.736      -1150.91067548 14487.62986265  1455.33129852]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  3.91430775e+04]
------
Step:2, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  3.91430775e+04]
New Q values:  [ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  2.00029200e+04]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 14487.62986265  1455.33129852]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548 14487.62986265  1455.33129852]
New Q values:  [ -281.736      -1150.91067548 12301.79139247  1455.33129852]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  2.00029200e+04]
------
Step:4, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  2.16911315e+04 -3.22965309e-01  2.00029200e+04]
New Q values:  [ 2.12301837e+02  9.76669517e+03 -3.22965309e-01  2.00029200e+04]
Reward: 9  Episode Reward:  16
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1719.34319057 3616.14192452  606.149024   2748.38982213]
------
Step:5, Action:South
State  216
Old Q Values:  [1719.34319057 3616.14192452  606.149024   2748.38982213]
New Q values:  [1719.34319057 4844.11394668  606.149024   2748.38982213]
Reward: 9  Episode Reward:  25
xxxxx
x.  x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11307.52392291  1802.67044829 -5588.09647059  4406.78534781]
------
Step:6, Action:North
State  288
Old Q Values:  [11307.52392291  1802.67044829 -5588.09647059  4406.78534781]
New Q values:  [ 5975.64375317  1802.67044829 -5588.09647059  4406.78534781]
Reward: -1  Episode Reward:  24
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1719.34319057 4844.11394668  606.149024   2748.38982213]
------
Step:7, Action:South
State  216
Old Q Values:  [1719.34319057 4844.11394668  606.149024   2748.38982213]
New Q values:  [1719.34319057 3729.73870462  606.149024   2748.38982213]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5975.64375317  1802.67044829 -5588.09647059  4406.78534781]
------
Step:8, Action:North
State  288
Old Q Values:  [ 5975.64375317  1802.67044829 -5588.09647059  4406.78534781]
New Q values:  [ 3508.57911265  1802.67044829 -5588.09647059  4406.78534781]
Reward: -1  Episode Reward:  22
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1719.34319057 3729.73870462  606.149024   2748.38982213]
------
Step:9, Action:South
State  208
Old Q Values:  [86609.88635795  8806.74663986 -4584.50430574 17679.41572396]
New Q values:  [86609.88635795  4844.13426029 -4584.50430574 17679.41572396]
Reward: -1  Episode Reward:  21
xxxxx
x.g x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3508.57911265  1802.67044829 -5588.09647059  4406.78534781]
------
Step:10, Action:West
State  288
Old Q Values:  [ 3508.57911265  1802.67044829 -5588.09647059  4406.78534781]
New Q values:  [ 3508.57911265  1802.67044829 -5588.09647059  6981.91854844]
Reward: 9  Episode Reward:  30
xxxxx
xg  x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799  3557.68927795 17379.34803104]
------
Step:11, Action:West
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799  3557.68927795 17379.34803104]
New Q values:  [ 6462.57506181 -8521.23367799  3557.68927795 38470.07955899]
Reward: 9  Episode Reward:  39
xxxxx
x.  x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[105043.13448859  15850.58020692   6661.12187543    644.94785455]
------
Step:12, Action:South
State  256
Old Q Values:  [105043.13448859  15850.58020692   6661.12187543    644.94785455]
New Q values:  [105043.13448859  37672.57242934   6661.12187543    644.94785455]
Reward: -301  Episode Reward:  -262
xxxxx
xg  x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[105043.13448859  37672.57242934   6661.12187543    644.94785455]
------
Step:13, Action:North
State  257
Old Q Values:  [48937.33982213 12764.58618105  3851.09595999  1875.31501677]
New Q values:  [37723.15134326 12764.58618105  3851.09595999  1875.31501677]
Reward: 9  Episode Reward:  -253
xxxxx
x.g x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039  10693.48214366     0.        ]
------
Step:14, Action:North
State  176
Old Q Values:  [103770.24881615   1621.55095326 107436.54432545      0.        ]
New Q values:  [ 95513.49952646   1621.55095326 107436.54432545      0.        ]
Reward: 90009  Episode Reward:  89756
xxxxx
xg  x
x   x
x   x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.24533942e+03  2.22176957e+04  2.17939995e+03]
------
Step:1, Action:East
State  194
Old Q Values:  [-6.00000000e-01  9.24533942e+03  2.22176957e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  9.24533942e+03  1.78207628e+04  2.17939995e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[29760.94833944 13820.5163021    790.72804752 17462.48602225]
------
Step:2, Action:North
State  210
Old Q Values:  [29760.94833944 13820.5163021    790.72804752 17462.48602225]
New Q values:  [28364.51999671 13820.5163021    790.72804752 17462.48602225]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.  x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 14806.28086412  -180.00807518 54849.13553646]
------
Step:3, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  9.76669517e+03 -3.22965309e-01  2.00029200e+04]
New Q values:  [ 2.12301837e+02  9.76669517e+03 -3.22965309e-01  3.96170598e+04]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.  x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 105368.3059805 ]
------
Step:4, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   385.23769261   769.23358742]
New Q values:  [ -253.44886264 -1902.20915811   385.23769261   712.02812241]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x.  x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1329.78229148  496.66968841 -252.78192178]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 5716.5875787   431.25952337 -120.29354603]
New Q values:  [-177.44732869 2983.2018468   431.25952337 -120.29354603]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[   3.06655861 2303.8893844  2029.22435297    0.        ]
------
Step:6, Action:South
State  191
Old Q Values:  [   3.06655861 2303.8893844  2029.22435297    0.        ]
New Q values:  [   3.06655861 2109.78545018 2029.22435297    0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x   x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3962.76565474 1352.37702619 2058.24585227  -12.17474163]
------
Step:7, Action:North
State  261
Old Q Values:  [3962.76565474 1352.37702619 2058.24585227  -12.17474163]
New Q values:  [2310.45077731 1352.37702619 2058.24585227  -12.17474163]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 2419.81505138 1530.88176879  154.04646645]
------
Step:8, Action:South
State  189
Old Q Values:  [ 337.36081627 2419.81505138 1530.88176879  154.04646645]
New Q values:  [ 337.36081627 1660.46125375 1530.88176879  154.04646645]
Reward: -1  Episode Reward:  42
xxxxx
x  gx
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2310.45077731 1352.37702619 2058.24585227  -12.17474163]
------
Step:9, Action:North
State  261
Old Q Values:  [2310.45077731 1352.37702619 2058.24585227  -12.17474163]
New Q values:  [1421.71868705 1352.37702619 2058.24585227  -12.17474163]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1660.46125375 1530.88176879  154.04646645]
------
Step:10, Action:South
State  189
Old Q Values:  [ 337.36081627 1660.46125375 1530.88176879  154.04646645]
New Q values:  [ 337.36081627 1281.05825718 1530.88176879  154.04646645]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x g x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1421.71868705 1352.37702619 2058.24585227  -12.17474163]
------
Step:11, Action:East
State  261
Old Q Values:  [1421.71868705 1352.37702619 2058.24585227  -12.17474163]
New Q values:  [1421.71868705 1352.37702619 1785.94376459  -12.17474163]
Reward: 9  Episode Reward:  49
xxxxx
x   x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.19081808e+03 1.58272554e+03]
------
Step:12, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 -1973.9105447  14295.96269163]
New Q values:  [ 1637.72437281  1974.75214244 61310.41134665 14295.96269163]
Reward: 100009  Episode Reward:  100058
xxxxx
x  gx
x   x
x  ax
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3508.57911265  1802.67044829 -5588.09647059  6981.91854844]
------
Step:1, Action:West
State  288
Old Q Values:  [ 3508.57911265  1802.67044829 -5588.09647059  6981.91854844]
New Q values:  [ 3508.57911265  1802.67044829 -5588.09647059 14339.19128707]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799  3557.68927795 38470.07955899]
------
Step:2, Action:West
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799  3557.68927795 38470.07955899]
New Q values:  [ 6462.57506181 -8521.23367799  3557.68927795 15929.21495297]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1421.71868705 1352.37702619 1785.94376459  -12.17474163]
------
Step:3, Action:East
State  257
Old Q Values:  [37723.15134326 12764.58618105  3851.09595999  1875.31501677]
New Q values:  [37723.15134326 12764.58618105  6318.60286989  1875.31501677]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
x.g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799  3557.68927795 15929.21495297]
------
Step:4, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 61310.41134665 14295.96269163]
New Q values:  [ 1637.72437281  1974.75214244 61310.41134665 17034.73047963]
Reward: -1  Episode Reward:  16
xxxxx
x. .x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[37723.15134326 12764.58618105  6318.60286989  1875.31501677]
------
Step:5, Action:North
State  261
Old Q Values:  [1421.71868705 1352.37702619 1785.94376459  -12.17474163]
New Q values:  [4504.22012354 1352.37702619 1785.94376459  -12.17474163]
Reward: 9  Episode Reward:  25
xxxxx
x. .x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.18320789e+03 1.31004422e+04 3.33862213e+00]
------
Step:6, Action:South
State  181
Old Q Values:  [4.15652936e+02 2.18320789e+03 1.31004422e+04 3.33862213e+00]
New Q values:  [4.15652936e+02 2.22394919e+03 1.31004422e+04 3.33862213e+00]
Reward: -1  Episode Reward:  24
xxxxx
x.g.x
x ..x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4504.22012354 1352.37702619 1785.94376459  -12.17474163]
------
Step:7, Action:North
State  261
Old Q Values:  [4504.22012354 1352.37702619 1785.94376459  -12.17474163]
New Q values:  [5731.22069814 1352.37702619 1785.94376459  -12.17474163]
Reward: -1  Episode Reward:  23
xxxxx
x. gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.22394919e+03 1.31004422e+04 3.33862213e+00]
------
Step:8, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039  10693.48214366     0.        ]
New Q values:  [60476.05138135 21430.9929039  10236.65747199     0.        ]
Reward: 9  Episode Reward:  32
xxxxx
x. .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 11369.57439469 19846.21538174   767.35890262]
------
Step:9, Action:South
State  193
Old Q Values:  [-5922.26708831 11369.57439469 19846.21538174   767.35890262]
New Q values:  [-5922.26708831 22940.35316187 19846.21538174   767.35890262]
Reward: -1  Episode Reward:  31
xxxxx
x. gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244 61310.41134665 17034.73047963]
------
Step:10, Action:East
State  276
Old Q Values:  [ 7540.97302786 -5807.06396197  2305.08372417  1722.42907986]
New Q values:  [ 7540.97302786 -5807.06396197  5223.19087579  1722.42907986]
Reward: -1  Episode Reward:  30
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3508.57911265  1802.67044829 -5588.09647059 14339.19128707]
------
Step:11, Action:West
State  288
Old Q Values:  [ 3508.57911265  1802.67044829 -5588.09647059 14339.19128707]
New Q values:  [ 3508.57911265  1802.67044829 -5588.09647059  7997.36842319]
Reward: -1  Episode Reward:  29
xxxxx
xg .x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7540.97302786 -5807.06396197  5223.19087579  1722.42907986]
------
Step:12, Action:North
State  276
Old Q Values:  [ 7540.97302786 -5807.06396197  5223.19087579  1722.42907986]
New Q values:  [ 7234.24528917 -5807.06396197  5223.19087579  1722.42907986]
Reward: -1  Episode Reward:  28
xxxxx
x. .x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ -534.40699599  1465.42682022 14061.52026008   231.67262594]
------
Step:13, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.83140824e+04 1.24335549e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 6.83140824e+04 3.09617879e+04 2.45392999e+03]
Reward: 9  Episode Reward:  37
xxxxx
x. .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[86609.88635795  4844.13426029 -4584.50430574 17679.41572396]
------
Step:14, Action:North
State  208
Old Q Values:  [86609.88635795  4844.13426029 -4584.50430574 17679.41572396]
New Q values:  [51104.09520412  4844.13426029 -4584.50430574 17679.41572396]
Reward: 9  Episode Reward:  46
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 14806.28086412  -180.00807518 54849.13553646]
------
Step:15, Action:West
State  130
Old Q Values:  [41234.48978377 14806.28086412  -180.00807518 54849.13553646]
New Q values:  [41234.48978377 14806.28086412  -180.00807518 53549.54600874]
Reward: -1  Episode Reward:  45
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 105368.3059805 ]
------
Step:16, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   32608.63878402 105368.3059805 ]
New Q values:  [  -180.6          3557.6642036   32608.63878402 119836.27385356]
Reward: 100009  Episode Reward:  100054
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  2634.15111452   660.86649319   318.71965056]
------
Step:1, Action:South
State  136
Old Q Values:  [-2129.37064562  2634.15111452   660.86649319   318.71965056]
New Q values:  [-2129.37064562  2177.9820572    660.86649319   318.71965056]
Reward: 9  Episode Reward:  9
xxxxx
x g x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1719.34319057 3729.73870462  606.149024   2748.38982213]
------
Step:2, Action:South
State  208
Old Q Values:  [51104.09520412  4844.13426029 -4584.50430574 17679.41572396]
New Q values:  [51104.09520412  4342.26423107 -4584.50430574 17679.41572396]
Reward: 9  Episode Reward:  18
xxxxx
x . x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3508.57911265  1802.67044829 -5588.09647059  7997.36842319]
------
Step:3, Action:West
State  288
Old Q Values:  [ 3508.57911265  1802.67044829 -5588.09647059  7997.36842319]
New Q values:  [ 3508.57911265  1802.67044829 -5588.09647059  7983.11185517]
Reward: 9  Episode Reward:  27
xxxxx
x . x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799  3557.68927795 15929.21495297]
------
Step:4, Action:West
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799  3557.68927795 15929.21495297]
New Q values:  [ 6462.57506181 -8521.23367799  3557.68927795 17694.03138417]
Reward: 9  Episode Reward:  36
xxxxx
x . x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[37723.15134326 12764.58618105  6318.60286989  1875.31501677]
------
Step:5, Action:North
State  260
Old Q Values:  [-5034.27990205 -5704.51612281  4113.1910504  -5679.36893145]
New Q values:  [-5804.63754208 -5704.51612281  4113.1910504  -5679.36893145]
Reward: -9991  Episode Reward:  -9955
xxxxx
x . x
xg. x
x   x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244 61310.41134665 17034.73047963]
------
Step:1, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 61310.41134665 17034.73047963]
New Q values:  [ 1637.72437281  1974.75214244 26924.49809521 17034.73047963]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3508.57911265  1802.67044829 -5588.09647059  7983.11185517]
------
Step:2, Action:West
State  288
Old Q Values:  [ 3508.57911265  1802.67044829 -5588.09647059  7983.11185517]
New Q values:  [ 3508.57911265  1802.67044829 -5588.09647059 11269.99417063]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x...x
x.agx
xxxxx
Step:3, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 26924.49809521 17034.73047963]
New Q values:  [ 1637.72437281  1974.75214244 12846.54866665 17034.73047963]
Reward: -10001  Episode Reward:  -9993
xxxxx
x.. x
x..gx
x.a x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799  3557.68927795 17694.03138417]
------
Step:1, Action:West
State  276
Old Q Values:  [ 7234.24528917 -5807.06396197  5223.19087579  1722.42907986]
New Q values:  [ 7234.24528917 -5807.06396197  5223.19087579  1928.32894707]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5804.63754208 -5704.51612281  4113.1910504  -5679.36893145]
------
Step:2, Action:East
State  260
Old Q Values:  [-5804.63754208 -5704.51612281  4113.1910504  -5679.36893145]
New Q values:  [-5804.63754208 -5704.51612281  6952.88583541 -5679.36893145]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799  3557.68927795 17694.03138417]
------
Step:3, Action:North
State  276
Old Q Values:  [ 7234.24528917 -5807.06396197  5223.19087579  1928.32894707]
New Q values:  [ 7117.55419369 -5807.06396197  5223.19087579  1928.32894707]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
xga.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ -534.40699599  1465.42682022 14061.52026008   231.67262594]
------
Step:4, Action:East
State  192
Old Q Values:  [3.89777037e-01 6.83140824e+04 3.09617879e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 6.83140824e+04 2.77213437e+04 2.45392999e+03]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x.gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[51104.09520412  4342.26423107 -4584.50430574 17679.41572396]
------
Step:5, Action:North
State  208
Old Q Values:  [51104.09520412  4342.26423107 -4584.50430574 17679.41572396]
New Q values:  [21100.43269881  4342.26423107 -4584.50430574 17679.41572396]
Reward: 9  Episode Reward:  35
xxxxx
x gax
x.  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  2177.9820572    660.86649319   318.71965056]
------
Step:6, Action:South
State  136
Old Q Values:  [-2129.37064562  2177.9820572    660.86649319   318.71965056]
New Q values:  [-2129.37064562  7200.72263252   660.86649319   318.71965056]
Reward: -1  Episode Reward:  34
xxxxx
x .gx
x. ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21100.43269881  4342.26423107 -4584.50430574 17679.41572396]
------
Step:7, Action:West
State  208
Old Q Values:  [21100.43269881  4342.26423107 -4584.50430574 17679.41572396]
New Q values:  [21100.43269881  4342.26423107 -4584.50430574 27565.39100775]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x.a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.83140824e+04 2.77213437e+04 2.45392999e+03]
------
Step:8, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.83140824e+04 2.77213437e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 3.26332424e+04 2.77213437e+04 2.45392999e+03]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x.g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799  3557.68927795 17694.03138417]
------
Step:9, Action:West
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799  3557.68927795 17694.03138417]
New Q values:  [ 6462.57506181 -8521.23367799  3557.68927795  9162.87830429]
Reward: -1  Episode Reward:  31
xxxxx
x . x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5804.63754208 -5704.51612281  6952.88583541 -5679.36893145]
------
Step:10, Action:East
State  260
Old Q Values:  [-5804.63754208 -5704.51612281  6952.88583541 -5679.36893145]
New Q values:  [-5804.63754208 -5704.51612281  5529.41782545 -5679.36893145]
Reward: -1  Episode Reward:  30
xxxxx
xg. x
x.  x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799  3557.68927795  9162.87830429]
------
Step:11, Action:West
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799  3557.68927795  9162.87830429]
New Q values:  [ 6462.57506181 -8521.23367799  3557.68927795  5323.37666935]
Reward: -1  Episode Reward:  29
xxxxx
x . x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-5804.63754208 -5704.51612281  5529.41782545 -5679.36893145]
------
Step:12, Action:East
State  260
Old Q Values:  [-5804.63754208 -5704.51612281  5529.41782545 -5679.36893145]
New Q values:  [-5804.63754208 -5704.51612281  4149.93964872 -5679.36893145]
Reward: -1  Episode Reward:  28
xxxxx
xg. x
x.  x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6462.57506181 -8521.23367799  3557.68927795  5323.37666935]
------
Step:13, Action:North
State  272
Old Q Values:  [ 6462.57506181 -8521.23367799  3557.68927795  5323.37666935]
New Q values:  [12374.40273656 -8521.23367799  3557.68927795  5323.37666935]
Reward: -1  Episode Reward:  27
xxxxx
x . x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.26332424e+04 2.77213437e+04 2.45392999e+03]
------
Step:14, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.26332424e+04 2.77213437e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.67650178e+04 2.77213437e+04 2.45392999e+03]
Reward: -1  Episode Reward:  26
xxxxx
x . x
x.g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[12374.40273656 -8521.23367799  3557.68927795  5323.37666935]
------
Step:15, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 12846.54866665 17034.73047963]
New Q values:  [ 1637.72437281  1974.75214244 12846.54866665  8532.6584013 ]
Reward: -1  Episode Reward:  25
xxxxx
x . x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5731.22069814 1352.37702619 1785.94376459  -12.17474163]
------
Step:16, Action:North
State  261
Old Q Values:  [5731.22069814 1352.37702619 1785.94376459  -12.17474163]
New Q values:  [3601.80805452 1352.37702619 1785.94376459  -12.17474163]
Reward: 9  Episode Reward:  34
xxxxx
x . x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2343.43228624 1861.84676547 4346.39925087 1554.80203889]
------
Step:17, Action:East
State  183
Old Q Values:  [2343.43228624 1861.84676547 4346.39925087 1554.80203889]
New Q values:  [2343.43228624 1861.84676547 7084.18853733 1554.80203889]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.24533942e+03  1.78207628e+04  2.17939995e+03]
------
Step:18, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.67650178e+04 2.77213437e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.67650178e+04 1.93575548e+04 2.45392999e+03]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[21100.43269881  4342.26423107 -4584.50430574 27565.39100775]
------
Step:19, Action:North
State  208
Old Q Values:  [21100.43269881  4342.26423107 -4584.50430574 27565.39100775]
New Q values:  [24504.43688214  4342.26423107 -4584.50430574 27565.39100775]
Reward: -1  Episode Reward:  31
xxxxx
x .ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 14806.28086412  -180.00807518 53549.54600874]
------
Step:20, Action:West
State  130
Old Q Values:  [41234.48978377 14806.28086412  -180.00807518 53549.54600874]
New Q values:  [41234.48978377 14806.28086412  -180.00807518 60359.89233951]
Reward: 9  Episode Reward:  40
xxxxx
x a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 129782.24645338]
------
Step:21, Action:West
State  127
Old Q Values:  [   0.            1.67014986  895.48700012 1172.13393898]
New Q values:  [   0.            1.67014986  895.48700012 1363.21412963]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2983.2018468   431.25952337 -120.29354603]
------
Step:22, Action:South
State  111
Old Q Values:  [-177.44732869 2983.2018468   431.25952337 -120.29354603]
New Q values:  [-177.44732869 1651.94526936  431.25952337 -120.29354603]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1281.05825718 1530.88176879  154.04646645]
------
Step:23, Action:South
State  189
Old Q Values:  [ 337.36081627 1281.05825718 1530.88176879  154.04646645]
New Q values:  [ 337.36081627 1592.36571923 1530.88176879  154.04646645]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3601.80805452 1352.37702619 1785.94376459  -12.17474163]
------
Step:24, Action:North
State  261
Old Q Values:  [3601.80805452 1352.37702619 1785.94376459  -12.17474163]
New Q values:  [1917.83293758 1352.37702619 1785.94376459  -12.17474163]
Reward: -1  Episode Reward:  36
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1592.36571923 1530.88176879  154.04646645]
------
Step:25, Action:South
State  189
Old Q Values:  [ 337.36081627 1592.36571923 1530.88176879  154.04646645]
New Q values:  [ 337.36081627 1211.69616896 1530.88176879  154.04646645]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1917.83293758 1352.37702619 1785.94376459  -12.17474163]
------
Step:26, Action:North
State  260
Old Q Values:  [-5804.63754208 -5704.51612281  4149.93964872 -5679.36893145]
New Q values:  [ -393.58373963 -5704.51612281  4149.93964872 -5679.36893145]
Reward: -1  Episode Reward:  34
xxxxx
xg  x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NW
[-6523.78898263  6429.570924    1963.43704178     0.        ]
------
Step:27, Action:South
State  188
Old Q Values:  [-6523.78898263  6429.570924    1963.43704178     0.        ]
New Q values:  [-6523.78898263  3816.21026422  1963.43704178     0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xg  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -393.58373963 -5704.51612281  4149.93964872 -5679.36893145]
------
Step:28, Action:East
State  261
Old Q Values:  [1917.83293758 1352.37702619 1785.94376459  -12.17474163]
New Q values:  [1917.83293758 1352.37702619 2849.04376394  -12.17474163]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7117.55419369 -5807.06396197  5223.19087579  1928.32894707]
------
Step:29, Action:East
State  272
Old Q Values:  [12374.40273656 -8521.23367799  3557.68927795  5323.37666935]
New Q values:  [12374.40273656 -8521.23367799 64809.47396237  5323.37666935]
Reward: 100009  Episode Reward:  100041
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  2110.90293953 -2165.66138672  -180.6       ]
------
Step:1, Action:South
State  108
Old Q Values:  [-8463.16477134  2233.8294456   1637.39424494     0.        ]
New Q values:  [-8463.16477134  3102.60619698  1637.39424494     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 4507.91302948  3232.64475742  7345.58139579 -4966.32149798]
------
Step:2, Action:East
State  180
Old Q Values:  [ 4507.91302948  3232.64475742  7345.58139579 -4966.32149798]
New Q values:  [ 4507.91302948  3232.64475742  7162.08863634 -4966.32149798]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ -534.40699599  1465.42682022 14061.52026008   231.67262594]
------
Step:3, Action:East
State  200
Old Q Values:  [ 169.9257398  3934.68362957 2839.56908722  610.93635926]
New Q values:  [ 169.9257398  3934.68362957 2260.14924627  610.93635926]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
x  ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1719.34319057 3729.73870462  606.149024   2748.38982213]
------
Step:4, Action:South
State  216
Old Q Values:  [1719.34319057 3729.73870462  606.149024   2748.38982213]
New Q values:  [1719.34319057 4878.29373304  606.149024   2748.38982213]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3508.57911265  1802.67044829 -5588.09647059 11269.99417063]
------
Step:5, Action:West
State  288
Old Q Values:  [ 3508.57911265  1802.67044829 -5588.09647059 11269.99417063]
New Q values:  [ 3508.57911265  1802.67044829 -5588.09647059  6648.66392636]
Reward: 9  Episode Reward:  45
xxxxx
x  .x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7117.55419369 -5807.06396197  5223.19087579  1928.32894707]
------
Step:6, Action:North
State  272
Old Q Values:  [12374.40273656 -8521.23367799 64809.47396237  5323.37666935]
New Q values:  [ 6518.97208123 -8521.23367799 64809.47396237  5323.37666935]
Reward: -1  Episode Reward:  44
xxxxx
x  .x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  5.23270329e+03  0.00000000e+00]
------
Step:7, Action:East
State  199
Old Q Values:  [  14.86214194 1549.78616729 6880.11271899 1915.70494401]
New Q values:  [   14.86214194  1549.78616729 11260.80108661  1915.70494401]
Reward: -1  Episode Reward:  43
xxxxx
x  .x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28364.51999671 13820.5163021    790.72804752 17462.48602225]
------
Step:8, Action:North
State  216
Old Q Values:  [1719.34319057 4878.29373304  606.149024   2748.38982213]
New Q values:  [12578.25521027  4878.29373304   606.149024    2748.38982213]
Reward: 9  Episode Reward:  52
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  9.76669517e+03 -3.22965309e-01  3.96170598e+04]
------
Step:9, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  9.76669517e+03 -3.22965309e-01  3.96170598e+04]
New Q values:  [ 2.12301837e+02  9.76669517e+03 -3.22965309e-01  1.95367613e+04]
Reward: -1  Episode Reward:  51
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 12301.79139247  1455.33129852]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548 12301.79139247  1455.33129852]
New Q values:  [ -281.736      -1150.91067548 10781.14495593  1455.33129852]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  9.76669517e+03 -3.22965309e-01  1.95367613e+04]
------
Step:11, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  9.76669517e+03 -3.22965309e-01  1.95367613e+04]
New Q values:  [ 2.12301837e+02  9.76669517e+03 -3.22965309e-01  8.02771297e+03]
Reward: -1  Episode Reward:  49
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   385.23769261   712.02812241]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548 10781.14495593  1455.33129852]
New Q values:  [ -281.736      -1150.91067548 10781.14495593   980.46720685]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1329.78229148  496.66968841 -252.78192178]
------
Step:13, Action:South
State  107
Old Q Values:  [-252.35169558 1329.78229148  496.66968841 -252.78192178]
New Q values:  [-252.35169558  940.74307677  496.66968841 -252.78192178]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 189.19059642    0.         1364.76720061 -178.98      ]
------
Step:14, Action:North
State  185
Old Q Values:  [ 189.19059642    0.         1364.76720061 -178.98      ]
New Q values:  [ 357.2991616     0.         1364.76720061 -178.98      ]
Reward: -1  Episode Reward:  46
xxxxx
xa  x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  940.74307677  496.66968841 -252.78192178]
------
Step:15, Action:South
State  105
Old Q Values:  [-180.6         433.68060256 -764.93196255    0.        ]
New Q values:  [-180.6         582.30240121 -764.93196255    0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 357.2991616     0.         1364.76720061 -178.98      ]
------
Step:16, Action:East
State  185
Old Q Values:  [ 357.2991616     0.         1364.76720061 -178.98      ]
New Q values:  [ 357.2991616     0.         1725.71196911 -178.98      ]
Reward: -1  Episode Reward:  44
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  3934.68362957 2260.14924627  610.93635926]
------
Step:17, Action:South
State  201
Old Q Values:  [ 613.33320563 1598.25945099 -789.02220255 1209.42904959]
New Q values:  [ 613.33320563 4492.66838039 -789.02220255 1209.42904959]
Reward: -1  Episode Reward:  43
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244 12846.54866665  8532.6584013 ]
------
Step:18, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 12846.54866665  8532.6584013 ]
New Q values:  [1637.72437281 1974.75214244 7132.61864457 8532.6584013 ]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3508.57911265  1802.67044829 -5588.09647059  6648.66392636]
------
Step:19, Action:West
State  288
Old Q Values:  [ 3508.57911265  1802.67044829 -5588.09647059  6648.66392636]
New Q values:  [ 3508.57911265  1802.67044829 -5588.09647059  5218.66309093]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 7132.61864457 8532.6584013 ]
------
Step:20, Action:West
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 64809.47396237  5323.37666935]
New Q values:  [ 6518.97208123 -8521.23367799 64809.47396237 73451.69607072]
Reward: 100009  Episode Reward:  100050
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  7200.72263252   660.86649319   318.71965056]
------
Step:1, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  9.76669517e+03 -3.22965309e-01  8.02771297e+03]
New Q values:  [ 2.12301837e+02  7.68555463e+03 -3.22965309e-01  8.02771297e+03]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[12578.25521027  4878.29373304   606.149024    2748.38982213]
------
Step:2, Action:North
State  210
Old Q Values:  [28364.51999671 13820.5163021    790.72804752 17462.48602225]
New Q values:  [13753.52188928 13820.5163021    790.72804752 17462.48602225]
Reward: -1  Episode Reward:  8
xxxxx
x .ax
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  7.68555463e+03 -3.22965309e-01  8.02771297e+03]
------
Step:3, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  7.68555463e+03 -3.22965309e-01  8.02771297e+03]
New Q values:  [ 2.12301837e+02  7.68555463e+03 -3.22965309e-01  6.45082867e+03]
Reward: 9  Episode Reward:  17
xxxxx
x a x
x.. x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548 10781.14495593   980.46720685]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548 10781.14495593   980.46720685]
New Q values:  [ -281.736      -1150.91067548  6617.52437216   980.46720685]
Reward: -1  Episode Reward:  16
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  7.68555463e+03 -3.22965309e-01  6.45082867e+03]
------
Step:5, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  7.68555463e+03 -3.22965309e-01  6.45082867e+03]
New Q values:  [ 2.12301837e+02  6.84709842e+03 -3.22965309e-01  6.45082867e+03]
Reward: -1  Episode Reward:  15
xxxxx
x   x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[12578.25521027  4878.29373304   606.149024    2748.38982213]
------
Step:6, Action:North
State  216
Old Q Values:  [12578.25521027  4878.29373304   606.149024    2748.38982213]
New Q values:  [7190.91887387 4878.29373304  606.149024   2748.38982213]
Reward: -1  Episode Reward:  14
xxxxx
x gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  7200.72263252   660.86649319   318.71965056]
------
Step:7, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  6.84709842e+03 -3.22965309e-01  6.45082867e+03]
New Q values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  6.45082867e+03]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[7190.91887387 4878.29373304  606.149024   2748.38982213]
------
Step:8, Action:North
State  216
Old Q Values:  [7190.91887387 4878.29373304  606.149024   2748.38982213]
New Q values:  [4811.01615182 4878.29373304  606.149024   2748.38982213]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  6.45082867e+03]
------
Step:9, Action:West
State  136
Old Q Values:  [-2129.37064562  7200.72263252   660.86649319   318.71965056]
New Q values:  [-2129.37064562  7200.72263252   660.86649319   208.98169646]
Reward: -1  Episode Reward:  11
xxxxx
x agx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   273.64612079]
------
Step:10, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3998.52582569   719.59279005]
New Q values:  [-9594.56523706 -8069.05606225  3998.52582569   920.50799788]
Reward: -1  Episode Reward:  10
xxxxx
xag x
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  2110.90293953 -2165.66138672  -180.6       ]
------
Step:11, Action:South
State  109
Old Q Values:  [ -241.10880094  2110.90293953 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1309.02570645 -2165.66138672  -180.6       ]
Reward: 9  Episode Reward:  19
xxxxx
x  gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1211.69616896 1530.88176879  154.04646645]
------
Step:12, Action:East
State  189
Old Q Values:  [ 337.36081627 1211.69616896 1530.88176879  154.04646645]
New Q values:  [ 337.36081627 1211.69616896 1798.15779639  154.04646645]
Reward: 9  Episode Reward:  28
xxxxx
x g x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  3934.68362957 2260.14924627  610.93635926]
------
Step:13, Action:South
State  201
Old Q Values:  [ 613.33320563 4492.66838039 -789.02220255 1209.42904959]
New Q values:  [ 613.33320563 4362.26487255 -789.02220255 1209.42904959]
Reward: 9  Episode Reward:  37
xxxxx
x  gx
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 7132.61864457 8532.6584013 ]
------
Step:14, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 1.58272554e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 1.49320335e+03]
Reward: 9  Episode Reward:  46
xxxxx
x  gx
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1917.83293758 1352.37702619 2849.04376394  -12.17474163]
------
Step:15, Action:East
State  261
Old Q Values:  [1917.83293758 1352.37702619 2849.04376394  -12.17474163]
New Q values:  [1917.83293758 1352.37702619 3274.28376368  -12.17474163]
Reward: -1  Episode Reward:  45
xxxxx
x g x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 7117.55419369 -5807.06396197  5223.19087579  1928.32894707]
------
Step:16, Action:North
State  276
Old Q Values:  [ 7117.55419369 -5807.06396197  5223.19087579  1928.32894707]
New Q values:  [ 4561.50951103 -5807.06396197  5223.19087579  1928.32894707]
Reward: -1  Episode Reward:  44
xxxxx
xg  x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         3600.83401675 5716.95944519  441.58769553]
------
Step:17, Action:East
State  204
Old Q Values:  [   0.         3600.83401675 5716.95944519  441.58769553]
New Q values:  [   0.         3600.83401675 3749.67189799  441.58769553]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[4811.01615182 4878.29373304  606.149024   2748.38982213]
------
Step:18, Action:South
State  208
Old Q Values:  [24504.43688214  4342.26423107 -4584.50430574 27565.39100775]
New Q values:  [24504.43688214 63307.90461971 -4584.50430574 27565.39100775]
Reward: 100009  Episode Reward:  100052
xxxxx
x   x
x g x
x  ax
xxxxx
xxxxx
x...x
x...x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3508.57911265  1802.67044829 -5588.09647059  5218.66309093]
------
Step:1, Action:North
State  288
Old Q Values:  [ 3508.57911265  1802.67044829 -5588.09647059  5218.66309093]
New Q values:  [20401.20303097  1802.67044829 -5588.09647059  5218.66309093]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24504.43688214 63307.90461971 -4584.50430574 27565.39100775]
------
Step:2, Action:South
State  208
Old Q Values:  [24504.43688214 63307.90461971 -4584.50430574 27565.39100775]
New Q values:  [24504.43688214 31442.92275718 -4584.50430574 27565.39100775]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[20401.20303097  1802.67044829 -5588.09647059  5218.66309093]
------
Step:3, Action:North
State  288
Old Q Values:  [20401.20303097  1802.67044829 -5588.09647059  5218.66309093]
New Q values:  [17592.75803954  1802.67044829 -5588.09647059  5218.66309093]
Reward: -1  Episode Reward:  7
xxxxx
xg..x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24504.43688214 31442.92275718 -4584.50430574 27565.39100775]
------
Step:4, Action:South
State  208
Old Q Values:  [24504.43688214 31442.92275718 -4584.50430574 27565.39100775]
New Q values:  [24504.43688214 17854.39651473 -4584.50430574 27565.39100775]
Reward: -1  Episode Reward:  6
xxxxx
x.g.x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17592.75803954  1802.67044829 -5588.09647059  5218.66309093]
------
Step:5, Action:North
State  288
Old Q Values:  [17592.75803954  1802.67044829 -5588.09647059  5218.66309093]
New Q values:  [15306.12051814  1802.67044829 -5588.09647059  5218.66309093]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24504.43688214 17854.39651473 -4584.50430574 27565.39100775]
------
Step:6, Action:North
State  210
Old Q Values:  [13753.52188928 13820.5163021    790.72804752 17462.48602225]
New Q values:  [ 7442.05735798 13820.5163021    790.72804752 17462.48602225]
Reward: 9  Episode Reward:  14
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  6.45082867e+03]
------
Step:7, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  6.45082867e+03]
New Q values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  2.79933991e+03]
Reward: 9  Episode Reward:  23
xxxxx
x.a x
x.. x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   385.23769261   712.02812241]
------
Step:8, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   385.23769261   712.02812241]
New Q values:  [ -253.44886264 -1902.20915811   385.23769261   572.434172  ]
Reward: 9  Episode Reward:  32
xxxxx
xa  x
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  940.74307677  496.66968841 -252.78192178]
------
Step:9, Action:South
State  105
Old Q Values:  [-180.6         582.30240121 -764.93196255    0.        ]
New Q values:  [-180.6        4168.45360921 -764.93196255    0.        ]
Reward: 9  Episode Reward:  41
xxxxx
x  gx
xa. x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.22394919e+03 1.31004422e+04 3.33862213e+00]
------
Step:10, Action:East
State  185
Old Q Values:  [ 357.2991616     0.         1725.71196911 -178.98      ]
New Q values:  [ 357.2991616     0.         2004.36424941 -178.98      ]
Reward: 9  Episode Reward:  50
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 4362.26487255 -789.02220255 1209.42904959]
------
Step:11, Action:South
State  200
Old Q Values:  [ 169.9257398  3934.68362957 2260.14924627  610.93635926]
New Q values:  [  169.9257398  23608.78227304  2260.14924627   610.93635926]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799 64809.47396237 73451.69607072]
------
Step:12, Action:West
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 64809.47396237 73451.69607072]
New Q values:  [  6518.97208123  -8521.23367799  64809.47396237 120899.01877486]
Reward: 100009  Episode Reward:  100058
xxxxx
x   x
xg  x
xa  x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 7442.05735798 13820.5163021    790.72804752 17462.48602225]
------
Step:1, Action:West
State  210
Old Q Values:  [ 7442.05735798 13820.5163021    790.72804752 17462.48602225]
New Q values:  [ 7442.05735798 13820.5163021    790.72804752 12336.62324589]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.24533942e+03  1.78207628e+04  2.17939995e+03]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01  9.24533942e+03  1.78207628e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  9.24533942e+03  1.12738600e+04  2.17939995e+03]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x. ax
x.g.x
xxxxx
Step:3, Action:South
State  210
Old Q Values:  [ 7442.05735798 13820.5163021    790.72804752 12336.62324589]
New Q values:  [ 7442.05735798  4125.44267628   790.72804752 12336.62324589]
Reward: -9991  Episode Reward:  -9983
xxxxx
x...x
x.  x
x. gx
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -393.58373963 -5704.51612281  4149.93964872 -5679.36893145]
------
Step:1, Action:East
State  261
Old Q Values:  [1917.83293758 1352.37702619 3274.28376368  -12.17474163]
New Q values:  [1917.83293758 1352.37702619 2882.07076821  -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4561.50951103 -5807.06396197  5223.19087579  1928.32894707]
------
Step:2, Action:East
State  272
Old Q Values:  [  6518.97208123  -8521.23367799  64809.47396237 120899.01877486]
New Q values:  [  6518.97208123  -8521.23367799  30521.02574039 120899.01877486]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[15306.12051814  1802.67044829 -5588.09647059  5218.66309093]
------
Step:3, Action:North
State  288
Old Q Values:  [15306.12051814  1802.67044829 -5588.09647059  5218.66309093]
New Q values:  [14397.46550958  1802.67044829 -5588.09647059  5218.66309093]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24504.43688214 17854.39651473 -4584.50430574 27565.39100775]
------
Step:4, Action:West
State  210
Old Q Values:  [ 7442.05735798  4125.44267628   790.72804752 12336.62324589]
New Q values:  [7442.05735798 4125.44267628  790.72804752 8322.20730034]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
x.a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.24533942e+03  1.12738600e+04  2.17939995e+03]
------
Step:5, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.67650178e+04 1.93575548e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.67650178e+04 1.60120392e+04 2.45392999e+03]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24504.43688214 17854.39651473 -4584.50430574 27565.39100775]
------
Step:6, Action:West
State  208
Old Q Values:  [24504.43688214 17854.39651473 -4584.50430574 27565.39100775]
New Q values:  [24504.43688214 17854.39651473 -4584.50430574 16055.06173413]
Reward: -1  Episode Reward:  34
xxxxx
xg..x
x.a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.67650178e+04 1.60120392e+04 2.45392999e+03]
------
Step:7, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.67650178e+04 1.60120392e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 4.29751127e+04 1.60120392e+04 2.45392999e+03]
Reward: -1  Episode Reward:  33
xxxxx
x g.x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  6518.97208123  -8521.23367799  30521.02574039 120899.01877486]
------
Step:8, Action:West
State  272
Old Q Values:  [  6518.97208123  -8521.23367799  30521.02574039 120899.01877486]
New Q values:  [ 6518.97208123 -8521.23367799 30521.02574039 49223.62874041]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1917.83293758 1352.37702619 2882.07076821  -12.17474163]
------
Step:9, Action:East
State  257
Old Q Values:  [37723.15134326 12764.58618105  6318.60286989  1875.31501677]
New Q values:  [37723.15134326 12764.58618105 17293.92977008  1875.31501677]
Reward: -1  Episode Reward:  31
xxxxx
x g.x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799 30521.02574039 49223.62874041]
------
Step:10, Action:West
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 30521.02574039 49223.62874041]
New Q values:  [ 6518.97208123 -8521.23367799 30521.02574039 20933.83339078]
Reward: -1  Episode Reward:  30
xxxxx
xg..x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -393.58373963 -5704.51612281  4149.93964872 -5679.36893145]
------
Step:11, Action:East
State  260
Old Q Values:  [ -393.58373963 -5704.51612281  4149.93964872 -5679.36893145]
New Q values:  [ -393.58373963 -5704.51612281 10815.68358161 -5679.36893145]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799 30521.02574039 20933.83339078]
------
Step:12, Action:East
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 30521.02574039 20933.83339078]
New Q values:  [ 6518.97208123 -8521.23367799 16527.04994903 20933.83339078]
Reward: -1  Episode Reward:  28
xxxxx
x ..x
x.  x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14397.46550958  1802.67044829 -5588.09647059  5218.66309093]
------
Step:13, Action:North
State  288
Old Q Values:  [14397.46550958  1802.67044829 -5588.09647059  5218.66309093]
New Q values:  [13109.71726848  1802.67044829 -5588.09647059  5218.66309093]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[24504.43688214 17854.39651473 -4584.50430574 16055.06173413]
------
Step:14, Action:North
State  208
Old Q Values:  [24504.43688214 17854.39651473 -4584.50430574 16055.06173413]
New Q values:  [27915.14245471 17854.39651473 -4584.50430574 16055.06173413]
Reward: 9  Episode Reward:  36
xxxxx
x .ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 14806.28086412  -180.00807518 60359.89233951]
------
Step:15, Action:West
State  136
Old Q Values:  [-2129.37064562  7200.72263252   660.86649319   208.98169646]
New Q values:  [-2129.37064562  7200.72263252   660.86649319 -4711.44957371]
Reward: -9991  Episode Reward:  -9955
xxxxx
x g x
x.  x
x   x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7442.05735798 4125.44267628  790.72804752 8322.20730034]
------
Step:1, Action:West
State  210
Old Q Values:  [7442.05735798 4125.44267628  790.72804752 8322.20730034]
New Q values:  [7442.05735798 4125.44267628  790.72804752 5825.93296778]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[  38.85388605 8305.50015881 7063.52223325 1169.39963074]
------
Step:2, Action:South
State  193
Old Q Values:  [-5922.26708831 22940.35316187 19846.21538174   767.35890262]
New Q values:  [-5922.26708831 11741.33878514 19846.21538174   767.35890262]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x. gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 7132.61864457 8532.6584013 ]
------
Step:3, Action:West
State  273
Old Q Values:  [1637.72437281 1974.75214244 7132.61864457 8532.6584013 ]
New Q values:  [ 1637.72437281  1974.75214244  7132.61864457 14735.4087635 ]
Reward: 9  Episode Reward:  27
xxxxx
x..gx
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[37723.15134326 12764.58618105 17293.92977008  1875.31501677]
------
Step:4, Action:North
State  261
Old Q Values:  [1917.83293758 1352.37702619 2882.07076821  -12.17474163]
New Q values:  [4702.66582376 1352.37702619 2882.07076821  -12.17474163]
Reward: 9  Episode Reward:  36
xxxxx
x...x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[4.15652936e+02 2.22394919e+03 1.31004422e+04 3.33862213e+00]
------
Step:5, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039  10236.65747199     0.        ]
New Q values:  [60476.05138135 21430.9929039  10047.92760332     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x..gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 11741.33878514 19846.21538174   767.35890262]
------
Step:6, Action:East
State  193
Old Q Values:  [-5922.26708831 11741.33878514 19846.21538174   767.35890262]
New Q values:  [-5922.26708831 11741.33878514 10312.42888911   767.35890262]
Reward: -10001  Episode Reward:  -9966
xxxxx
x...x
x  gx
x   x
xxxxx
xxxxx
x...x
xa..x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2343.43228624 1861.84676547 7084.18853733 1554.80203889]
------
Step:1, Action:East
State  183
Old Q Values:  [2343.43228624 1861.84676547 7084.18853733 1554.80203889]
New Q values:  [2343.43228624 1861.84676547 6221.23341692 1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.24533942e+03  1.12738600e+04  2.17939995e+03]
------
Step:2, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.29751127e+04 1.60120392e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 4.29751127e+04 1.47847584e+04 2.45392999e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27915.14245471 17854.39651473 -4584.50430574 16055.06173413]
------
Step:3, Action:North
State  216
Old Q Values:  [4811.01615182 4878.29373304  606.149024   2748.38982213]
New Q values:  [4090.02325048 4878.29373304  606.149024   2748.38982213]
Reward: 9  Episode Reward:  27
xxxxx
x.gax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  7200.72263252   660.86649319 -4711.44957371]
------
Step:4, Action:South
State  130
Old Q Values:  [41234.48978377 14806.28086412  -180.00807518 60359.89233951]
New Q values:  [41234.48978377 14296.45508206  -180.00807518 60359.89233951]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27915.14245471 17854.39651473 -4584.50430574 16055.06173413]
------
Step:5, Action:North
State  216
Old Q Values:  [4090.02325048 4878.29373304  606.149024   2748.38982213]
New Q values:  [3795.62608995 4878.29373304  606.149024   2748.38982213]
Reward: -1  Episode Reward:  25
xxxxx
x.gax
x   x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  7200.72263252   660.86649319 -4711.44957371]
------
Step:6, Action:South
State  128
Old Q Values:  [ 47581.15146009  15794.84178348  -8652.84       141952.53665887]
New Q values:  [ 47581.15146009  14691.8794498   -8652.84       141952.53665887]
Reward: -1  Episode Reward:  24
xxxxx
x..gx
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27915.14245471 17854.39651473 -4584.50430574 16055.06173413]
------
Step:7, Action:South
State  216
Old Q Values:  [3795.62608995 4878.29373304  606.149024   2748.38982213]
New Q values:  [3795.62608995 5883.63267376  606.149024   2748.38982213]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13109.71726848  1802.67044829 -5588.09647059  5218.66309093]
------
Step:8, Action:North
State  288
Old Q Values:  [13109.71726848  1802.67044829 -5588.09647059  5218.66309093]
New Q values:  [13617.8296438   1802.67044829 -5588.09647059  5218.66309093]
Reward: -1  Episode Reward:  22
xxxxx
x..gx
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27915.14245471 17854.39651473 -4584.50430574 16055.06173413]
------
Step:9, Action:South
State  208
Old Q Values:  [27915.14245471 17854.39651473 -4584.50430574 16055.06173413]
New Q values:  [27915.14245471 11226.50749903 -4584.50430574 16055.06173413]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13617.8296438   1802.67044829 -5588.09647059  5218.66309093]
------
Step:10, Action:West
State  288
Old Q Values:  [13617.8296438   1802.67044829 -5588.09647059  5218.66309093]
New Q values:  [13617.8296438   1802.67044829 -5588.09647059  6513.48786542]
Reward: 9  Episode Reward:  30
xxxxx
x..gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  7132.61864457 14735.4087635 ]
------
Step:11, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  7132.61864457 14735.4087635 ]
New Q values:  [ 1637.72437281  1974.75214244  7132.61864457 17216.50890838]
Reward: 9  Episode Reward:  39
xxxxx
x.. x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[37723.15134326 12764.58618105 17293.92977008  1875.31501677]
------
Step:12, Action:North
State  257
Old Q Values:  [37723.15134326 12764.58618105 17293.92977008  1875.31501677]
New Q values:  [40850.40298485 12764.58618105 17293.92977008  1875.31501677]
Reward: -1  Episode Reward:  38
xxxxx
x.. x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[82228.67666629 16101.90751562 85872.47482516     0.        ]
------
Step:13, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039  10047.92760332     0.        ]
New Q values:  [60476.05138135 21430.9929039   7540.97267687     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x.. x
x agx
x   x
xxxxx
Step:14, Action:North
State  195
Old Q Values:  [  38.85388605 8305.50015881 7063.52223325 1169.39963074]
New Q values:  [38955.61549043  8305.50015881  7063.52223325  1169.39963074]
Reward: 9  Episode Reward:  46
xxxxx
x.a x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 129782.24645338]
------
Step:15, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   32608.63878402 119836.27385356]
New Q values:  [  -180.6          3557.6642036   32608.63878402 125623.46100278]
Reward: 100009  Episode Reward:  100055
xxxxx
xa  x
x   x
x g x
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799 16527.04994903 20933.83339078]
------
Step:1, Action:West
State  276
Old Q Values:  [ 4561.50951103 -5807.06396197  5223.19087579  1928.32894707]
New Q values:  [ 4561.50951103 -5807.06396197  5223.19087579  4021.43665331]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x ..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -393.58373963 -5704.51612281 10815.68358161 -5679.36893145]
------
Step:2, Action:East
State  261
Old Q Values:  [4702.66582376 1352.37702619 2882.07076821  -12.17474163]
New Q values:  [4702.66582376 1352.37702619 2719.18557002  -12.17474163]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x ..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4561.50951103 -5807.06396197  5223.19087579  4021.43665331]
------
Step:3, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  7132.61864457 17216.50890838]
New Q values:  [ 1637.72437281  1974.75214244  6943.79635097 17216.50890838]
Reward: 9  Episode Reward:  17
xxxxx
x..gx
x ..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13617.8296438   1802.67044829 -5588.09647059  6513.48786542]
------
Step:4, Action:North
State  288
Old Q Values:  [13617.8296438   1802.67044829 -5588.09647059  6513.48786542]
New Q values:  [13827.07459393  1802.67044829 -5588.09647059  6513.48786542]
Reward: 9  Episode Reward:  26
xxxxx
x.g.x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27915.14245471 11226.50749903 -4584.50430574 16055.06173413]
------
Step:5, Action:North
State  208
Old Q Values:  [27915.14245471 11226.50749903 -4584.50430574 16055.06173413]
New Q values:  [53757.21797955 11226.50749903 -4584.50430574 16055.06173413]
Reward: 9  Episode Reward:  35
xxxxx
xg.ax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[ 47581.15146009  14691.8794498   -8652.84       141952.53665887]
------
Step:6, Action:West
State  128
Old Q Values:  [ 47581.15146009  14691.8794498   -8652.84       141952.53665887]
New Q values:  [47581.15146009 14691.8794498  -8652.84       84071.34358242]
Reward: -9991  Episode Reward:  -9956
xxxxx
x.g x
x . x
x   x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2343.43228624 1861.84676547 6221.23341692 1554.80203889]
------
Step:1, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243  8309.7986232      0.        ]
New Q values:  [    0.         -5536.05678243  6711.47745126     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.24533942e+03  1.12738600e+04  2.17939995e+03]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01  9.24533942e+03  1.12738600e+04  2.17939995e+03]
New Q values:  [-6.00000000e-01  9.24533942e+03  6.74756121e+03  2.17939995e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7442.05735798 4125.44267628  790.72804752 5825.93296778]
------
Step:3, Action:North
State  210
Old Q Values:  [7442.05735798 4125.44267628  790.72804752 5825.93296778]
New Q values:  [21090.19064505  4125.44267628   790.72804752  5825.93296778]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 14296.45508206  -180.00807518 60359.89233951]
------
Step:4, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  2.79933991e+03]
New Q values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  3.88121743e+04]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   32608.63878402 125623.46100278]
------
Step:5, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   385.23769261   572.434172  ]
New Q values:  [ -253.44886264 -1902.20915811   385.23769261   516.59659183]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  940.74307677  496.66968841 -252.78192178]
------
Step:6, Action:South
State  107
Old Q Values:  [-252.35169558  940.74307677  496.66968841 -252.78192178]
New Q values:  [-252.35169558  760.71844312  496.66968841 -252.78192178]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xa  x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 836.45017667    0.         1283.40404137    0.        ]
------
Step:7, Action:East
State  187
Old Q Values:  [ 836.45017667    0.         1283.40404137    0.        ]
New Q values:  [ 836.45017667    0.         1890.23066022    0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x a x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 3.76178822e+03 1.42261918e+03 4.59156348e+03]
------
Step:8, Action:West
State  201
Old Q Values:  [ 613.33320563 4362.26487255 -789.02220255 1209.42904959]
New Q values:  [ 613.33320563 4362.26487255 -789.02220255 1084.48089466]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xa gx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 357.2991616     0.         2004.36424941 -178.98      ]
------
Step:9, Action:East
State  185
Old Q Values:  [ 357.2991616     0.         2004.36424941 -178.98      ]
New Q values:  [ 357.2991616     0.         1883.78038168 -178.98      ]
Reward: -10001  Episode Reward:  -9959
xxxxx
x   x
x g x
x. .x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799 16527.04994903 20933.83339078]
------
Step:1, Action:West
State  276
Old Q Values:  [ 4561.50951103 -5807.06396197  5223.19087579  4021.43665331]
New Q values:  [ 4561.50951103 -5807.06396197  5223.19087579  4858.67973581]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -393.58373963 -5704.51612281 10815.68358161 -5679.36893145]
------
Step:2, Action:East
State  261
Old Q Values:  [4702.66582376 1352.37702619 2719.18557002  -12.17474163]
New Q values:  [4702.66582376 1352.37702619 7367.22424524  -12.17474163]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799 16527.04994903 20933.83339078]
------
Step:3, Action:West
State  276
Old Q Values:  [ 4561.50951103 -5807.06396197  5223.19087579  4858.67973581]
New Q values:  [ 4561.50951103 -5807.06396197  5223.19087579  5187.5769688 ]
Reward: -1  Episode Reward:  7
xxxxx
xg .x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -393.58373963 -5704.51612281 10815.68358161 -5679.36893145]
------
Step:4, Action:East
State  260
Old Q Values:  [ -393.58373963 -5704.51612281 10815.68358161 -5679.36893145]
New Q values:  [ -393.58373963 -5704.51612281  5892.63069538 -5679.36893145]
Reward: -1  Episode Reward:  6
xxxxx
x. .x
xg..x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4561.50951103 -5807.06396197  5223.19087579  5187.5769688 ]
------
Step:5, Action:East
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 16527.04994903 20933.83339078]
New Q values:  [ 6518.97208123 -8521.23367799 10764.34235779 20933.83339078]
Reward: 9  Episode Reward:  15
xxxxx
xg .x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[13827.07459393  1802.67044829 -5588.09647059  6513.48786542]
------
Step:6, Action:North
State  288
Old Q Values:  [13827.07459393  1802.67044829 -5588.09647059  6513.48786542]
New Q values:  [21663.39523144  1802.67044829 -5588.09647059  6513.48786542]
Reward: 9  Episode Reward:  24
xxxxx
x. .x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[53757.21797955 11226.50749903 -4584.50430574 16055.06173413]
------
Step:7, Action:North
State  208
Old Q Values:  [53757.21797955 11226.50749903 -4584.50430574 16055.06173413]
New Q values:  [39616.25489367 11226.50749903 -4584.50430574 16055.06173413]
Reward: 9  Episode Reward:  33
xxxxx
x. ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 14296.45508206  -180.00807518 60359.89233951]
------
Step:8, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  3.88121743e+04]
New Q values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  1.75095270e+04]
Reward: -1  Episode Reward:  32
xxxxx
x.a x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  6617.52437216   980.46720685]
------
Step:9, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   32608.63878402 125623.46100278]
New Q values:  [  -180.6          3557.6642036   31150.82321546 125623.46100278]
Reward: -1  Episode Reward:  31
xxxxx
x. ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 14296.45508206  -180.00807518 60359.89233951]
------
Step:10, Action:West
State  130
Old Q Values:  [41234.48978377 14296.45508206  -180.00807518 60359.89233951]
New Q values:  [41234.48978377 14296.45508206  -180.00807518 61830.39523664]
Reward: -1  Episode Reward:  30
xxxxx
x.a x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   31150.82321546 125623.46100278]
------
Step:11, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  6617.52437216   980.46720685]
New Q values:  [ -281.736      -1150.91067548  6617.52437216   680.501585  ]
Reward: 9  Episode Reward:  39
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -6764.65639938   943.04900753  -180.6       ]
------
Step:12, Action:East
State  107
Old Q Values:  [-252.35169558  760.71844312  496.66968841 -252.78192178]
New Q values:  [-252.35169558  760.71844312 2183.32518701 -252.78192178]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  6617.52437216   680.501585  ]
------
Step:13, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   385.23769261   516.59659183]
New Q values:  [ -253.44886264 -1902.20915811  5406.35318215   516.59659183]
Reward: -1  Episode Reward:  37
xxxxx
x  ax
x.. x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  1.75095270e+04]
------
Step:14, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  1.75095270e+04]
New Q values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  8.62511676e+03]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  5406.35318215   516.59659183]
------
Step:15, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  6617.52437216   680.501585  ]
New Q values:  [ -281.736      -1150.91067548  5233.9447773    680.501585  ]
Reward: -1  Episode Reward:  35
xxxxx
x  ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  8.62511676e+03]
------
Step:16, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  8.62511676e+03]
New Q values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  5.01963014e+03]
Reward: -1  Episode Reward:  34
xxxxx
x a x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  5233.9447773    680.501585  ]
------
Step:17, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  5406.35318215   516.59659183]
New Q values:  [ -253.44886264 -1902.20915811  3667.83031419   516.59659183]
Reward: -1  Episode Reward:  33
xxxxx
x  ax
x.. x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  5.01963014e+03]
------
Step:18, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  5.01963014e+03]
New Q values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  3.10760115e+03]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x.. x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  3667.83031419   516.59659183]
------
Step:19, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  3667.83031419   516.59659183]
New Q values:  [ -253.44886264 -1902.20915811  2935.18663426   516.59659183]
Reward: -1  Episode Reward:  31
xxxxx
x  ax
x..gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  3.10760115e+03]
------
Step:20, Action:West
State  138
Old Q Values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  3.10760115e+03]
New Q values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  2.12299645e+03]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x.. x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2935.18663426   516.59659183]
------
Step:21, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  5233.9447773    680.501585  ]
New Q values:  [ -281.736      -1150.91067548  3561.6324195    680.501585  ]
Reward: -1  Episode Reward:  29
xxxxx
x  ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  2.12299645e+03]
------
Step:22, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  4.89551503e+03 -3.22965309e-01  2.12299645e+03]
New Q values:  [ 2.12301837e+02  8.28466320e+03 -3.22965309e-01  2.12299645e+03]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x..ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21090.19064505  4125.44267628   790.72804752  5825.93296778]
------
Step:23, Action:North
State  210
Old Q Values:  [21090.19064505  4125.44267628   790.72804752  5825.93296778]
New Q values:  [10920.87521951  4125.44267628   790.72804752  5825.93296778]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.12301837e+02  8.28466320e+03 -3.22965309e-01  2.12299645e+03]
------
Step:24, Action:South
State  138
Old Q Values:  [ 2.12301837e+02  8.28466320e+03 -3.22965309e-01  2.12299645e+03]
New Q values:  [ 2.12301837e+02  1.51981418e+04 -3.22965309e-01  2.12299645e+03]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[39616.25489367 11226.50749903 -4584.50430574 16055.06173413]
------
Step:25, Action:North
State  210
Old Q Values:  [10920.87521951  4125.44267628   790.72804752  5825.93296778]
New Q values:  [8927.19261283 4125.44267628  790.72804752 5825.93296778]
Reward: -1  Episode Reward:  25
xxxxx
x  ax
x.. x
x g x
xxxxx
Step:26, Action:North
State  138
Old Q Values:  [ 2.12301837e+02  1.51981418e+04 -3.22965309e-01  2.12299645e+03]
New Q values:  [ 4.46376326e+03  1.51981418e+04 -3.22965309e-01  2.12299645e+03]
Reward: -301  Episode Reward:  -276
xxxxx
x  ax
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  1.51981418e+04 -3.22965309e-01  2.12299645e+03]
------
Step:27, Action:South
State  138
Old Q Values:  [ 4.46376326e+03  1.51981418e+04 -3.22965309e-01  2.12299645e+03]
New Q values:  [ 4.46376326e+03  1.79635332e+04 -3.22965309e-01  2.12299645e+03]
Reward: -1  Episode Reward:  -277
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[39616.25489367 11226.50749903 -4584.50430574 16055.06173413]
------
Step:28, Action:North
State  208
Old Q Values:  [39616.25489367 11226.50749903 -4584.50430574 16055.06173413]
New Q values:  [18006.11874722 11226.50749903 -4584.50430574 16055.06173413]
Reward: -1  Episode Reward:  -278
xxxxx
xg ax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[-2129.37064562  7200.72263252   660.86649319 -4711.44957371]
------
Step:29, Action:South
State  136
Old Q Values:  [-2129.37064562  7200.72263252   660.86649319 -4711.44957371]
New Q values:  [-2129.37064562  8281.52467718   660.86649319 -4711.44957371]
Reward: -1  Episode Reward:  -279
xxxxx
x g x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18006.11874722 11226.50749903 -4584.50430574 16055.06173413]
------
Step:30, Action:North
State  208
Old Q Values:  [18006.11874722 11226.50749903 -4584.50430574 16055.06173413]
New Q values:  [12590.90744933 11226.50749903 -4584.50430574 16055.06173413]
Reward: -1  Episode Reward:  -280
xxxxx
x  ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  1.79635332e+04 -3.22965309e-01  2.12299645e+03]
------
Step:31, Action:South
State  136
Old Q Values:  [-2129.37064562  8281.52467718   660.86649319 -4711.44957371]
New Q values:  [-2129.37064562  8128.52839111   660.86649319 -4711.44957371]
Reward: -1  Episode Reward:  -281
xxxxx
x g x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12590.90744933 11226.50749903 -4584.50430574 16055.06173413]
------
Step:32, Action:West
State  208
Old Q Values:  [12590.90744933 11226.50749903 -4584.50430574 16055.06173413]
New Q values:  [12590.90744933 11226.50749903 -4584.50430574 19319.9585158 ]
Reward: 9  Episode Reward:  -272
xxxxx
xg  x
x.a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.29751127e+04 1.47847584e+04 2.45392999e+03]
------
Step:33, Action:South
State  192
Old Q Values:  [3.89777037e-01 4.29751127e+04 1.47847584e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 2.34695951e+04 1.47847584e+04 2.45392999e+03]
Reward: -1  Episode Reward:  -273
xxxxx
x g x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799 10764.34235779 20933.83339078]
------
Step:34, Action:West
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 10764.34235779 20933.83339078]
New Q values:  [ 6518.97208123 -8521.23367799 10764.34235779 20628.05425177]
Reward: -1  Episode Reward:  -274
xxxxx
x   x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[40850.40298485 12764.58618105 17293.92977008  1875.31501677]
------
Step:35, Action:North
State  257
Old Q Values:  [40850.40298485 12764.58618105 17293.92977008  1875.31501677]
New Q values:  [94488.37660835 12764.58618105 17293.92977008  1875.31501677]
Reward: 100009  Episode Reward:  99735
xxxxx
x   x
xa gx
x   x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12590.90744933 11226.50749903 -4584.50430574 19319.9585158 ]
------
Step:1, Action:West
State  216
Old Q Values:  [3795.62608995 5883.63267376  606.149024   2748.38982213]
New Q values:  [3795.62608995 5883.63267376  606.149024   8145.63446288]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 2.34695951e+04 1.47847584e+04 2.45392999e+03]
------
Step:2, Action:South
State  192
Old Q Values:  [3.89777037e-01 2.34695951e+04 1.47847584e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.55816543e+04 1.47847584e+04 2.45392999e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799 10764.34235779 20628.05425177]
------
Step:3, Action:West
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 10764.34235779 20628.05425177]
New Q values:  [ 6518.97208123 -8521.23367799 10764.34235779 36603.13468321]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x.  x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[94488.37660835 12764.58618105 17293.92977008  1875.31501677]
------
Step:4, Action:North
State  261
Old Q Values:  [4702.66582376 1352.37702619 7367.22424524  -12.17474163]
New Q values:  [ 2.00292817e+04  1.35237703e+03  7.36722425e+03 -1.21747416e+01]
Reward: 9  Episode Reward:  36
xxxxx
x.. x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039   7540.97267687     0.        ]
------
Step:5, Action:North
State  181
Old Q Values:  [4.15652936e+02 2.22394919e+03 1.31004422e+04 3.33862213e+00]
New Q values:  [1.30461225e+03 2.22394919e+03 1.31004422e+04 3.33862213e+00]
Reward: 9  Episode Reward:  45
xxxxx
xa. x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 3776.50358003 1796.41351142    0.        ]
------
Step:6, Action:South
State  101
Old Q Values:  [   0.       2113.988708    0.          0.      ]
New Q values:  [   0.         4775.12813192    0.            0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x .gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.30461225e+03 2.22394919e+03 1.31004422e+04 3.33862213e+00]
------
Step:7, Action:East
State  189
Old Q Values:  [ 337.36081627 1211.69616896 1798.15779639  154.04646645]
New Q values:  [ 337.36081627 1211.69616896 1843.56468795  154.04646645]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         3600.83401675 3749.67189799  441.58769553]
------
Step:8, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.55816543e+04 1.47847584e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.55816543e+04 1.17092909e+04 2.45392999e+03]
Reward: -1  Episode Reward:  42
xxxxx
x . x
x gax
x  .x
xxxxx
Step:9, Action:North
State  208
Old Q Values:  [12590.90744933 11226.50749903 -4584.50430574 19319.9585158 ]
New Q values:  [23584.88155072 11226.50749903 -4584.50430574 19319.9585158 ]
Reward: -1  Episode Reward:  41
xxxxx
x .ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 14296.45508206  -180.00807518 61830.39523664]
------
Step:10, Action:West
State  138
Old Q Values:  [ 4.46376326e+03  1.79635332e+04 -3.22965309e-01  2.12299645e+03]
New Q values:  [ 4.46376326e+03  1.79635332e+04 -3.22965309e-01  1.17807147e+03]
Reward: 9  Episode Reward:  50
xxxxx
x a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 1078.2429548   350.00244198]
------
Step:11, Action:East
State  126
Old Q Values:  [   0.          331.64678262 1078.2429548   350.00244198]
New Q values:  [   0.          331.64678262 5819.75713236  350.00244198]
Reward: -1  Episode Reward:  49
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  1.79635332e+04 -3.22965309e-01  1.17807147e+03]
------
Step:12, Action:South
State  138
Old Q Values:  [ 4.46376326e+03  1.79635332e+04 -3.22965309e-01  1.17807147e+03]
New Q values:  [ 4.46376326e+03  8.01140350e+03 -3.22965309e-01  1.17807147e+03]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2440.34612496 2755.30078316    0.          930.00701399]
------
Step:13, Action:South
State  208
Old Q Values:  [23584.88155072 11226.50749903 -4584.50430574 19319.9585158 ]
New Q values:  [23584.88155072 70995.02156904 -4584.50430574 19319.9585158 ]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  8.01140350e+03 -3.22965309e-01  1.17807147e+03]
------
Step:1, Action:South
State  138
Old Q Values:  [ 4.46376326e+03  8.01140350e+03 -3.22965309e-01  1.17807147e+03]
New Q values:  [ 4.46376326e+03  2.45084679e+04 -3.22965309e-01  1.17807147e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23584.88155072 70995.02156904 -4584.50430574 19319.9585158 ]
------
Step:2, Action:South
State  208
Old Q Values:  [23584.88155072 70995.02156904 -4584.50430574 19319.9585158 ]
New Q values:  [23584.88155072 34902.42719705 -4584.50430574 19319.9585158 ]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21663.39523144  1802.67044829 -5588.09647059  6513.48786542]
------
Step:3, Action:North
State  288
Old Q Values:  [21663.39523144  1802.67044829 -5588.09647059  6513.48786542]
New Q values:  [11342.91587642  1802.67044829 -5588.09647059  6513.48786542]
Reward: -1  Episode Reward:  17
xxxxx
x.. x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8927.19261283 4125.44267628  790.72804752 5825.93296778]
------
Step:4, Action:North
State  210
Old Q Values:  [8927.19261283 4125.44267628  790.72804752 5825.93296778]
New Q values:  [10922.81740661  4125.44267628   790.72804752  5825.93296778]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  2.45084679e+04 -3.22965309e-01  1.17807147e+03]
------
Step:5, Action:South
State  130
Old Q Values:  [41234.48978377 14296.45508206  -180.00807518 61830.39523664]
New Q values:  [41234.48978377 16188.71019194  -180.00807518 61830.39523664]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23584.88155072 34902.42719705 -4584.50430574 19319.9585158 ]
------
Step:6, Action:South
State  210
Old Q Values:  [10922.81740661  4125.44267628   790.72804752  5825.93296778]
New Q values:  [10922.81740661  5052.45183344   790.72804752  5825.93296778]
Reward: -1  Episode Reward:  14
xxxxx
x.. x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11342.91587642  1802.67044829 -5588.09647059  6513.48786542]
------
Step:7, Action:North
State  288
Old Q Values:  [11342.91587642  1802.67044829 -5588.09647059  6513.48786542]
New Q values:  [ 7813.41157255  1802.67044829 -5588.09647059  6513.48786542]
Reward: -1  Episode Reward:  13
xxxxx
x.. x
x..ax
x. gx
xxxxx
Step:8, Action:South
State  208
Old Q Values:  [23584.88155072 34902.42719705 -4584.50430574 19319.9585158 ]
New Q values:  [23584.88155072 18431.09903793 -4584.50430574 19319.9585158 ]
Reward: -10001  Episode Reward:  -9988
xxxxx
x.. x
x..gx
x.  x
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  6711.47745126     0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [2343.43228624 1861.84676547 6221.23341692 1554.80203889]
New Q values:  [2343.43228624 1861.84676547 5267.4951939  1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.24533942e+03  6.74756121e+03  2.17939995e+03]
------
Step:2, Action:East
State  194
Old Q Values:  [-6.00000000e-01  9.24533942e+03  6.74756121e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  9.24533942e+03  5.98126971e+03  2.17939995e+03]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[10922.81740661  5052.45183344   790.72804752  5825.93296778]
------
Step:3, Action:North
State  208
Old Q Values:  [23584.88155072 18431.09903793 -4584.50430574 19319.9585158 ]
New Q values:  [27988.47119128 18431.09903793 -4584.50430574 19319.9585158 ]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 16188.71019194  -180.00807518 61830.39523664]
------
Step:4, Action:West
State  138
Old Q Values:  [ 4.46376326e+03  2.45084679e+04 -3.22965309e-01  1.17807147e+03]
New Q values:  [ 4.46376326e+03  2.45084679e+04 -3.22965309e-01  3.81636669e+04]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   31150.82321546 125623.46100278]
------
Step:5, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3998.52582569   920.50799788]
New Q values:  [-9594.56523706 -8069.05606225  3998.52582569   766.31091109]
Reward: 9  Episode Reward:  45
xxxxx
xag x
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1309.02570645 -2165.66138672  -180.6       ]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869 1651.94526936  431.25952337 -120.29354603]
New Q values:  [-177.44732869 1213.24751413  431.25952337 -120.29354603]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 1211.69616896 1843.56468795  154.04646645]
------
Step:7, Action:South
State  189
Old Q Values:  [ 337.36081627 1211.69616896 1843.56468795  154.04646645]
New Q values:  [ 337.36081627 6492.86299076 1843.56468795  154.04646645]
Reward: -1  Episode Reward:  43
xxxxx
x g x
x   x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2.00292817e+04  1.35237703e+03  7.36722425e+03 -1.21747416e+01]
------
Step:8, Action:North
State  261
Old Q Values:  [ 2.00292817e+04  1.35237703e+03  7.36722425e+03 -1.21747416e+01]
New Q values:  [9958.97159479 1352.37702619 7367.22424524  -12.17474163]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xag x
x ..x
xxxxx
Step:9, Action:South
State  188
Old Q Values:  [-6523.78898263  3816.21026422  1963.43704178     0.        ]
New Q values:  [-6523.78898263  3293.6733143   1963.43704178     0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xg  x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -393.58373963 -5704.51612281  5892.63069538 -5679.36893145]
------
Step:10, Action:East
State  261
Old Q Values:  [9958.97159479 1352.37702619 7367.22424524  -12.17474163]
New Q values:  [9958.97159479 1352.37702619 4519.24696083  -12.17474163]
Reward: 9  Episode Reward:  50
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4561.50951103 -5807.06396197  5223.19087579  5187.5769688 ]
------
Step:11, Action:East
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 10764.34235779 36603.13468321]
New Q values:  [ 6518.97208123 -8521.23367799 66655.16041488 36603.13468321]
Reward: 100009  Episode Reward:  100059
xxxxx
x   x
x   x
x gax
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.30461225e+03 2.22394919e+03 1.31004422e+04 3.33862213e+00]
------
Step:1, Action:East
State  181
Old Q Values:  [1.30461225e+03 2.22394919e+03 1.31004422e+04 3.33862213e+00]
New Q values:  [1.30461225e+03 2.22394919e+03 3.46403294e+03 3.33862213e+00]
Reward: -9991  Episode Reward:  -9991
xxxxx
x. .x
x g.x
x...x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7813.41157255  1802.67044829 -5588.09647059  6513.48786542]
------
Step:1, Action:North
State  288
Old Q Values:  [ 7813.41157255  1802.67044829 -5588.09647059  6513.48786542]
New Q values:  [11527.3059864   1802.67044829 -5588.09647059  6513.48786542]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[27988.47119128 18431.09903793 -4584.50430574 19319.9585158 ]
------
Step:2, Action:North
State  208
Old Q Values:  [27988.47119128 18431.09903793 -4584.50430574 19319.9585158 ]
New Q values:  [22649.88854274 18431.09903793 -4584.50430574 19319.9585158 ]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  2.45084679e+04 -3.22965309e-01  3.81636669e+04]
------
Step:3, Action:West
State  138
Old Q Values:  [ 4.46376326e+03  2.45084679e+04 -3.22965309e-01  3.81636669e+04]
New Q values:  [ 4.46376326e+03  2.45084679e+04 -3.22965309e-01  1.63393565e+04]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3561.6324195    680.501585  ]
------
Step:4, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2935.18663426   516.59659183]
New Q values:  [ -253.44886264 -1902.20915811  8526.01501518   516.59659183]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  2.45084679e+04 -3.22965309e-01  1.63393565e+04]
------
Step:5, Action:South
State  138
Old Q Values:  [ 4.46376326e+03  2.45084679e+04 -3.22965309e-01  1.63393565e+04]
New Q values:  [ 4.46376326e+03  1.30796324e+04 -3.22965309e-01  1.63393565e+04]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[10922.81740661  5052.45183344   790.72804752  5825.93296778]
------
Step:6, Action:North
State  210
Old Q Values:  [10922.81740661  5052.45183344   790.72804752  5825.93296778]
New Q values:  [9270.33390689 5052.45183344  790.72804752 5825.93296778]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  1.30796324e+04 -3.22965309e-01  1.63393565e+04]
------
Step:7, Action:West
State  138
Old Q Values:  [ 4.46376326e+03  1.30796324e+04 -3.22965309e-01  1.63393565e+04]
New Q values:  [ 4.46376326e+03  1.30796324e+04 -3.22965309e-01  9.09294710e+03]
Reward: -1  Episode Reward:  23
xxxxx
x.a x
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  8526.01501518   516.59659183]
------
Step:8, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  8526.01501518   516.59659183]
New Q values:  [ -253.44886264 -1902.20915811  7333.69571726   516.59659183]
Reward: -1  Episode Reward:  22
xxxxx
x. ax
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  1.30796324e+04 -3.22965309e-01  9.09294710e+03]
------
Step:9, Action:South
State  138
Old Q Values:  [ 4.46376326e+03  1.30796324e+04 -3.22965309e-01  9.09294710e+03]
New Q values:  [ 4.46376326e+03  8.01235312e+03 -3.22965309e-01  9.09294710e+03]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[9270.33390689 5052.45183344  790.72804752 5825.93296778]
------
Step:10, Action:North
State  210
Old Q Values:  [9270.33390689 5052.45183344  790.72804752 5825.93296778]
New Q values:  [6435.41769182 5052.45183344  790.72804752 5825.93296778]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  8.01235312e+03 -3.22965309e-01  9.09294710e+03]
------
Step:11, Action:West
State  138
Old Q Values:  [ 4.46376326e+03  8.01235312e+03 -3.22965309e-01  9.09294710e+03]
New Q values:  [ 4.46376326e+03  8.01235312e+03 -3.22965309e-01  4.70506856e+03]
Reward: -1  Episode Reward:  19
xxxxx
x.a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3561.6324195    680.501585  ]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3561.6324195    680.501585  ]
New Q values:  [ -281.736      -1150.91067548  3827.7589039    680.501585  ]
Reward: -1  Episode Reward:  18
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  8.01235312e+03 -3.22965309e-01  4.70506856e+03]
------
Step:13, Action:South
State  136
Old Q Values:  [-2129.37064562  8128.52839111   660.86649319 -4711.44957371]
New Q values:  [-2129.37064562 10045.77791926   660.86649319 -4711.44957371]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22649.88854274 18431.09903793 -4584.50430574 19319.9585158 ]
------
Step:14, Action:North
State  208
Old Q Values:  [22649.88854274 18431.09903793 -4584.50430574 19319.9585158 ]
New Q values:  [11463.06135319 18431.09903793 -4584.50430574 19319.9585158 ]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  8.01235312e+03 -3.22965309e-01  4.70506856e+03]
------
Step:15, Action:South
State  138
Old Q Values:  [ 4.46376326e+03  8.01235312e+03 -3.22965309e-01  4.70506856e+03]
New Q values:  [ 4.46376326e+03  5.13496656e+03 -3.22965309e-01  4.70506856e+03]
Reward: -1  Episode Reward:  15
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6435.41769182 5052.45183344  790.72804752 5825.93296778]
------
Step:16, Action:North
State  208
Old Q Values:  [11463.06135319 18431.09903793 -4584.50430574 19319.9585158 ]
New Q values:  [ 6125.11450798 18431.09903793 -4584.50430574 19319.9585158 ]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  5.13496656e+03 -3.22965309e-01  4.70506856e+03]
------
Step:17, Action:South
State  136
Old Q Values:  [-2129.37064562 10045.77791926   660.86649319 -4711.44957371]
New Q values:  [-2129.37064562  9813.69872245   660.86649319 -4711.44957371]
Reward: -1  Episode Reward:  13
xxxxx
x.g x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6125.11450798 18431.09903793 -4584.50430574 19319.9585158 ]
------
Step:18, Action:West
State  208
Old Q Values:  [ 6125.11450798 18431.09903793 -4584.50430574 19319.9585158 ]
New Q values:  [ 6125.11450798 18431.09903793 -4584.50430574  6401.87970259]
Reward: -10001  Episode Reward:  -9988
xxxxx
x.  x
x.g x
x.. x
xxxxx
xxxxx
x...x
x.g.x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11527.3059864   1802.67044829 -5588.09647059  6513.48786542]
------
Step:1, Action:North
State  288
Old Q Values:  [11527.3059864   1802.67044829 -5588.09647059  6513.48786542]
New Q values:  [ 4145.65210594  1802.67044829 -5588.09647059  6513.48786542]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x. gx
x.. x
xxxxx
xxxxx
x.g.x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6125.11450798 18431.09903793 -4584.50430574  6401.87970259]
------
Step:1, Action:South
State  208
Old Q Values:  [ 6125.11450798 18431.09903793 -4584.50430574  6401.87970259]
New Q values:  [ 6125.11450798  9331.8859748  -4584.50430574  6401.87970259]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4145.65210594  1802.67044829 -5588.09647059  6513.48786542]
------
Step:2, Action:West
State  288
Old Q Values:  [ 4145.65210594  1802.67044829 -5588.09647059  6513.48786542]
New Q values:  [ 4145.65210594  1802.67044829 -5588.09647059 22607.34327063]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xg. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799 66655.16041488 36603.13468321]
------
Step:3, Action:East
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 66655.16041488 36603.13468321]
New Q values:  [ 6518.97208123 -8521.23367799 33443.66714714 36603.13468321]
Reward: -1  Episode Reward:  17
xxxxx
x. .x
x.. x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4145.65210594  1802.67044829 -5588.09647059 22607.34327063]
------
Step:4, Action:West
State  288
Old Q Values:  [ 4145.65210594  1802.67044829 -5588.09647059 22607.34327063]
New Q values:  [ 4145.65210594  1802.67044829 -5588.09647059 14023.27771322]
Reward: -10001  Episode Reward:  -9984
xxxxx
x. .x
x.. x
x.g x
xxxxx
xxxxx
xag.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1309.02570645 -2165.66138672  -180.6       ]
------
Step:1, Action:South
State  108
Old Q Values:  [-8463.16477134  3102.60619698  1637.39424494     0.        ]
New Q values:  [-8463.16477134  3395.06906969  1637.39424494     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 4507.91302948  3232.64475742  7162.08863634 -4966.32149798]
------
Step:2, Action:East
State  181
Old Q Values:  [1.30461225e+03 2.22394919e+03 3.46403294e+03 3.33862213e+00]
New Q values:  [1.30461225e+03 2.22394919e+03 5.60946926e+03 3.33862213e+00]
Reward: 9  Episode Reward:  18
xxxxx
x g.x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ -534.40699599  1465.42682022 14061.52026008   231.67262594]
------
Step:3, Action:East
State  200
Old Q Values:  [  169.9257398  23608.78227304  2260.14924627   610.93635926]
New Q values:  [  169.9257398  23608.78227304  3353.15003737   610.93635926]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[3795.62608995 5883.63267376  606.149024   8145.63446288]
------
Step:4, Action:South
State  210
Old Q Values:  [6435.41769182 5052.45183344  790.72804752 5825.93296778]
New Q values:  [6435.41769182 6233.36404734  790.72804752 5825.93296778]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4145.65210594  1802.67044829 -5588.09647059 14023.27771322]
------
Step:5, Action:North
State  288
Old Q Values:  [ 4145.65210594  1802.67044829 -5588.09647059 14023.27771322]
New Q values:  [ 2484.25107732  1802.67044829 -5588.09647059 14023.27771322]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2440.34612496 2755.30078316    0.          930.00701399]
------
Step:6, Action:North
State  218
Old Q Values:  [2440.34612496 2755.30078316    0.          930.00701399]
New Q values:  [2522.02841669 2755.30078316    0.          930.00701399]
Reward: 9  Episode Reward:  44
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  5.13496656e+03 -3.22965309e-01  4.70506856e+03]
------
Step:7, Action:South
State  138
Old Q Values:  [ 4.46376326e+03  5.13496656e+03 -3.22965309e-01  4.70506856e+03]
New Q values:  [ 4.46376326e+03  4.49707696e+03 -3.22965309e-01  4.70506856e+03]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[3795.62608995 5883.63267376  606.149024   8145.63446288]
------
Step:8, Action:South
State  218
Old Q Values:  [2522.02841669 2755.30078316    0.          930.00701399]
New Q values:  [2522.02841669 5308.50362723    0.          930.00701399]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2484.25107732  1802.67044829 -5588.09647059 14023.27771322]
------
Step:9, Action:North
State  288
Old Q Values:  [ 2484.25107732  1802.67044829 -5588.09647059 14023.27771322]
New Q values:  [ 3436.79076979  1802.67044829 -5588.09647059 14023.27771322]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[3795.62608995 5883.63267376  606.149024   8145.63446288]
------
Step:10, Action:South
State  218
Old Q Values:  [2522.02841669 5308.50362723    0.          930.00701399]
New Q values:  [2522.02841669 6329.78476486    0.          930.00701399]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3436.79076979  1802.67044829 -5588.09647059 14023.27771322]
------
Step:11, Action:North
State  288
Old Q Values:  [ 3436.79076979  1802.67044829 -5588.09647059 14023.27771322]
New Q values:  [ 3273.05173737  1802.67044829 -5588.09647059 14023.27771322]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2522.02841669 6329.78476486    0.          930.00701399]
------
Step:12, Action:North
State  216
Old Q Values:  [3795.62608995 5883.63267376  606.149024   8145.63446288]
New Q values:  [2929.17100536 5883.63267376  606.149024   8145.63446288]
Reward: -1  Episode Reward:  38
xxxxx
x  ax
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  4.49707696e+03 -3.22965309e-01  4.70506856e+03]
------
Step:13, Action:West
State  138
Old Q Values:  [ 4.46376326e+03  4.49707696e+03 -3.22965309e-01  4.70506856e+03]
New Q values:  [ 4.46376326e+03  4.49707696e+03 -3.22965309e-01  4.08153614e+03]
Reward: -1  Episode Reward:  37
xxxxx
x a x
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  7333.69571726   516.59659183]
------
Step:14, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  7333.69571726   516.59659183]
New Q values:  [ -253.44886264 -1902.20915811  4282.00137524   516.59659183]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x  gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.46376326e+03  4.49707696e+03 -3.22965309e-01  4.08153614e+03]
------
Step:15, Action:North
State  138
Old Q Values:  [ 4.46376326e+03  4.49707696e+03 -3.22965309e-01  4.08153614e+03]
New Q values:  [ 2.95402839e+03  4.49707696e+03 -3.22965309e-01  4.08153614e+03]
Reward: -301  Episode Reward:  -265
xxxxx
x  ax
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.95402839e+03  4.49707696e+03 -3.22965309e-01  4.08153614e+03]
------
Step:16, Action:South
State  136
Old Q Values:  [-2129.37064562  9813.69872245   660.86649319 -4711.44957371]
New Q values:  [-2129.37064562  6368.56982784   660.86649319 -4711.44957371]
Reward: -1  Episode Reward:  -266
xxxxx
x g x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2929.17100536 5883.63267376  606.149024   8145.63446288]
------
Step:17, Action:West
State  216
Old Q Values:  [2929.17100536 5883.63267376  606.149024   8145.63446288]
New Q values:  [2929.17100536 5883.63267376  606.149024   4566.33324692]
Reward: -1  Episode Reward:  -267
xxxxx
x  gx
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 4362.26487255 -789.02220255 1084.48089466]
------
Step:18, Action:South
State  201
Old Q Values:  [ 613.33320563 4362.26487255 -789.02220255 1084.48089466]
New Q values:  [ 613.33320563 6915.25862153 -789.02220255 1084.48089466]
Reward: 9  Episode Reward:  -258
xxxxx
x   x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  6943.79635097 17216.50890838]
------
Step:19, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  6943.79635097 17216.50890838]
New Q values:  [ 1637.72437281  1974.75214244  6943.79635097 95238.51654585]
Reward: 100009  Episode Reward:  99751
xxxxx
x   x
x   x
xa gx
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3827.7589039    680.501585  ]
------
Step:1, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  4282.00137524   516.59659183]
New Q values:  [ -253.44886264 -1902.20915811  3067.32363844   516.59659183]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.95402839e+03  4.49707696e+03 -3.22965309e-01  4.08153614e+03]
------
Step:2, Action:West
State  136
Old Q Values:  [-2129.37064562  6368.56982784   660.86649319 -4711.44957371]
New Q values:  [-2129.37064562  6368.56982784   660.86649319 -1803.08599325]
Reward: -1  Episode Reward:  8
xxxxx
x.agx
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   273.64612079]
------
Step:3, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  3067.32363844   516.59659183]
New Q values:  [ -253.44886264 -1902.20915811  3067.32363844   867.03619284]
Reward: 9  Episode Reward:  17
xxxxx
xa  x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  760.71844312 2183.32518701 -252.78192178]
------
Step:4, Action:East
State  107
Old Q Values:  [-252.35169558  760.71844312 2183.32518701 -252.78192178]
New Q values:  [-252.35169558  760.71844312 1792.92716634 -252.78192178]
Reward: -1  Episode Reward:  16
xxxxx
x a x
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  3067.32363844   867.03619284]
------
Step:5, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  3067.32363844   867.03619284]
New Q values:  [ -253.44886264 -1902.20915811  2575.45254372   867.03619284]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.95402839e+03  4.49707696e+03 -3.22965309e-01  4.08153614e+03]
------
Step:6, Action:West
State  138
Old Q Values:  [ 2.95402839e+03  4.49707696e+03 -3.22965309e-01  4.08153614e+03]
New Q values:  [ 2.95402839e+03  4.49707696e+03 -3.22965309e-01  2.40465022e+03]
Reward: -1  Episode Reward:  14
xxxxx
x a x
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2575.45254372   867.03619284]
------
Step:7, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2575.45254372   867.03619284]
New Q values:  [ -253.44886264 -1902.20915811  2378.70410583   867.03619284]
Reward: -1  Episode Reward:  13
xxxxx
x  ax
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.95402839e+03  4.49707696e+03 -3.22965309e-01  2.40465022e+03]
------
Step:8, Action:North
State  138
Old Q Values:  [ 2.95402839e+03  4.49707696e+03 -3.22965309e-01  2.40465022e+03]
New Q values:  [ 2.35013445e+03  4.49707696e+03 -3.22965309e-01  2.40465022e+03]
Reward: -301  Episode Reward:  -288
xxxxx
x  ax
x. .x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.35013445e+03  4.49707696e+03 -3.22965309e-01  2.40465022e+03]
------
Step:9, Action:South
State  138
Old Q Values:  [ 2.35013445e+03  4.49707696e+03 -3.22965309e-01  2.40465022e+03]
New Q values:  [ 2.35013445e+03 -2.43067941e+03 -3.22965309e-01  2.40465022e+03]
Reward: -9991  Episode Reward:  -10279
xxxxx
x   x
x. gx
x...x
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3827.7589039    680.501585  ]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3827.7589039    680.501585  ]
New Q values:  [ -281.736      -1150.91067548  2257.89862742   680.501585  ]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.35013445e+03 -2.43067941e+03 -3.22965309e-01  2.40465022e+03]
------
Step:2, Action:West
State  138
Old Q Values:  [ 2.35013445e+03 -2.43067941e+03 -3.22965309e-01  2.40465022e+03]
New Q values:  [ 2.35013445e+03 -2.43067941e+03 -3.22965309e-01  1.63862968e+03]
Reward: -1  Episode Reward:  8
xxxxx
x.a x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2257.89862742   680.501585  ]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2257.89862742   680.501585  ]
New Q values:  [ -281.736      -1150.91067548  1607.59978455   680.501585  ]
Reward: -1  Episode Reward:  7
xxxxx
x. ax
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.35013445e+03 -2.43067941e+03 -3.22965309e-01  1.63862968e+03]
------
Step:4, Action:North
State  136
Old Q Values:  [-2129.37064562  6368.56982784   660.86649319 -1803.08599325]
New Q values:  [  878.22269011  6368.56982784   660.86649319 -1803.08599325]
Reward: -301  Episode Reward:  -294
xxxxx
x.gax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  6368.56982784   660.86649319 -1803.08599325]
------
Step:5, Action:South
State  136
Old Q Values:  [  878.22269011  6368.56982784   660.86649319 -1803.08599325]
New Q values:  [  878.22269011  4317.91773326   660.86649319 -1803.08599325]
Reward: 9  Episode Reward:  -285
xxxxx
xg  x
x .ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2929.17100536 5883.63267376  606.149024   4566.33324692]
------
Step:6, Action:South
State  208
Old Q Values:  [ 6125.11450798  9331.8859748  -4584.50430574  6401.87970259]
New Q values:  [ 6125.11450798  7945.13770389 -4584.50430574  6401.87970259]
Reward: 9  Episode Reward:  -276
xxxxx
x.  x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3273.05173737  1802.67044829 -5588.09647059 14023.27771322]
------
Step:7, Action:West
State  288
Old Q Values:  [ 3273.05173737  1802.67044829 -5588.09647059 14023.27771322]
New Q values:  [ 3273.05173737  1802.67044829 -5588.09647059 16595.65149025]
Reward: 9  Episode Reward:  -267
xxxxx
x.  x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799 33443.66714714 36603.13468321]
------
Step:8, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  6943.79635097 95238.51654585]
New Q values:  [ 1637.72437281  1974.75214244  6943.79635097 66447.31960085]
Reward: 9  Episode Reward:  -258
xxxxx
x.  x
x .gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[94488.37660835 12764.58618105 17293.92977008  1875.31501677]
------
Step:9, Action:North
State  257
Old Q Values:  [94488.37660835 12764.58618105 17293.92977008  1875.31501677]
New Q values:  [55937.56605774 12764.58618105 17293.92977008  1875.31501677]
Reward: -1  Episode Reward:  -259
xxxxx
x. gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039   7540.97267687     0.        ]
------
Step:10, Action:North
State  181
Old Q Values:  [1.30461225e+03 2.22394919e+03 5.60946926e+03 3.33862213e+00]
New Q values:  [9.19952611e+02 2.22394919e+03 5.60946926e+03 3.33862213e+00]
Reward: 9  Episode Reward:  -250
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1309.02570645 -2165.66138672  -180.6       ]
------
Step:11, Action:South
State  99
Old Q Values:  [    0.         43570.05483897 58945.17153785     0.        ]
New Q values:  [    0.         35570.23734999 58945.17153785     0.        ]
Reward: -1  Episode Reward:  -251
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039   7540.97267687     0.        ]
------
Step:12, Action:North
State  183
Old Q Values:  [2343.43228624 1861.84676547 5267.4951939  1554.80203889]
New Q values:  [1300.74716873 1861.84676547 5267.4951939  1554.80203889]
Reward: -1  Episode Reward:  -252
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1213.24751413  431.25952337 -120.29354603]
------
Step:13, Action:South
State  99
Old Q Values:  [    0.         35570.23734999 58945.17153785     0.        ]
New Q values:  [    0.         32370.3103544  58945.17153785     0.        ]
Reward: -1  Episode Reward:  -253
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039   7540.97267687     0.        ]
------
Step:14, Action:North
State  183
Old Q Values:  [1300.74716873 1861.84676547 5267.4951939  1554.80203889]
New Q values:  [ 883.67312173 1861.84676547 5267.4951939  1554.80203889]
Reward: -1  Episode Reward:  -254
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1213.24751413  431.25952337 -120.29354603]
------
Step:15, Action:South
State  110
Old Q Values:  [ -239.29051573 -2331.08565392  1540.97816408  -180.6       ]
New Q values:  [-239.29051573 1080.40897381 1540.97816408 -180.6       ]
Reward: -1  Episode Reward:  -255
xxxxx
x   x
xa. x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  6711.47745126     0.        ]
------
Step:16, Action:East
State  179
Old Q Values:  [82228.67666629 16101.90751562 85872.47482516     0.        ]
New Q values:  [82228.67666629 16101.90751562 97127.9917572      0.        ]
Reward: 100009  Episode Reward:  99754
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[9958.97159479 1352.37702619 4519.24696083  -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [9958.97159479 1352.37702619 4519.24696083  -12.17474163]
New Q values:  [5671.82941448 1352.37702619 4519.24696083  -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[9.19952611e+02 2.22394919e+03 5.60946926e+03 3.33862213e+00]
------
Step:2, Action:South
State  183
Old Q Values:  [ 883.67312173 1861.84676547 5267.4951939  1554.80203889]
New Q values:  [ 883.67312173 2445.68753053 5267.4951939  1554.80203889]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x . x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5671.82941448 1352.37702619 4519.24696083  -12.17474163]
------
Step:3, Action:North
State  261
Old Q Values:  [5671.82941448 1352.37702619 4519.24696083  -12.17474163]
New Q values:  [3848.38032396 1352.37702619 4519.24696083  -12.17474163]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xa. x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 883.67312173 2445.68753053 5267.4951939  1554.80203889]
------
Step:4, Action:East
State  181
Old Q Values:  [9.19952611e+02 2.22394919e+03 5.60946926e+03 3.33862213e+00]
New Q values:  [9.19952611e+02 2.22394919e+03 5.77158934e+03 3.33862213e+00]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 11741.33878514 10312.42888911   767.35890262]
------
Step:5, Action:South
State  195
Old Q Values:  [38955.61549043  8305.50015881  7063.52223325  1169.39963074]
New Q values:  [38955.61549043 23261.79594378  7063.52223325  1169.39963074]
Reward: 9  Episode Reward:  25
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  6943.79635097 66447.31960085]
------
Step:6, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  6943.79635097 66447.31960085]
New Q values:  [ 1637.72437281  1974.75214244  6943.79635097 27934.10192859]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3848.38032396 1352.37702619 4519.24696083  -12.17474163]
------
Step:7, Action:East
State  261
Old Q Values:  [3848.38032396 1352.37702619 4519.24696083  -12.17474163]
New Q values:  [ 3848.38032396  1352.37702619 10187.32936291   -12.17474163]
Reward: -1  Episode Reward:  23
xxxxx
x..gx
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  6943.79635097 27934.10192859]
------
Step:8, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  6943.79635097 27934.10192859]
New Q values:  [ 1637.72437281  1974.75214244  6943.79635097 14229.23958031]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3848.38032396  1352.37702619 10187.32936291   -12.17474163]
------
Step:9, Action:East
State  261
Old Q Values:  [ 3848.38032396  1352.37702619 10187.32936291   -12.17474163]
New Q values:  [3848.38032396 1352.37702619 8343.10361926  -12.17474163]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  6943.79635097 14229.23958031]
------
Step:10, Action:West
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 33443.66714714 36603.13468321]
New Q values:  [ 6518.97208123 -8521.23367799 33443.66714714 17143.58495906]
Reward: -1  Episode Reward:  20
xxxxx
x...x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3848.38032396 1352.37702619 8343.10361926  -12.17474163]
------
Step:11, Action:North
State  261
Old Q Values:  [3848.38032396 1352.37702619 8343.10361926  -12.17474163]
New Q values:  [3119.00068776 1352.37702619 8343.10361926  -12.17474163]
Reward: -1  Episode Reward:  19
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 883.67312173 2445.68753053 5267.4951939  1554.80203889]
------
Step:12, Action:East
State  183
Old Q Values:  [ 883.67312173 2445.68753053 5267.4951939  1554.80203889]
New Q values:  [ 883.67312173 2445.68753053 4879.9999047  1554.80203889]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.24533942e+03  5.98126971e+03  2.17939995e+03]
------
Step:13, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.55816543e+04 1.17092909e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 1.55816543e+04 7.06665768e+03 2.45392999e+03]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6125.11450798  7945.13770389 -4584.50430574  6401.87970259]
------
Step:14, Action:South
State  208
Old Q Values:  [ 6125.11450798  7945.13770389 -4584.50430574  6401.87970259]
New Q values:  [ 6125.11450798  8162.15052863 -4584.50430574  6401.87970259]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3273.05173737  1802.67044829 -5588.09647059 16595.65149025]
------
Step:15, Action:West
State  288
Old Q Values:  [ 3273.05173737  1802.67044829 -5588.09647059 16595.65149025]
New Q values:  [ 3273.05173737  1802.67044829 -5588.09647059 10906.43247019]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  6943.79635097 14229.23958031]
------
Step:16, Action:West
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 33443.66714714 17143.58495906]
New Q values:  [ 6518.97208123 -8521.23367799 33443.66714714  9359.7650694 ]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3119.00068776 1352.37702619 8343.10361926  -12.17474163]
------
Step:17, Action:North
State  261
Old Q Values:  [3119.00068776 1352.37702619 8343.10361926  -12.17474163]
New Q values:  [2711.00024651 1352.37702619 8343.10361926  -12.17474163]
Reward: -1  Episode Reward:  23
xxxxx
x...x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 883.67312173 2445.68753053 4879.9999047  1554.80203889]
------
Step:18, Action:East
State  181
Old Q Values:  [9.19952611e+02 2.22394919e+03 5.77158934e+03 3.33862213e+00]
New Q values:  [9.19952611e+02 2.22394919e+03 5.83043737e+03 3.33862213e+00]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 11741.33878514 10312.42888911   767.35890262]
------
Step:19, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.55816543e+04 7.06665768e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 1.62651619e+04 7.06665768e+03 2.45392999e+03]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799 33443.66714714  9359.7650694 ]
------
Step:20, Action:East
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 33443.66714714  9359.7650694 ]
New Q values:  [ 6518.97208123 -8521.23367799 16648.79659991  9359.7650694 ]
Reward: -1  Episode Reward:  20
xxxxx
x.g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3273.05173737  1802.67044829 -5588.09647059 10906.43247019]
------
Step:21, Action:West
State  288
Old Q Values:  [ 3273.05173737  1802.67044829 -5588.09647059 10906.43247019]
New Q values:  [ 3273.05173737  1802.67044829 -5588.09647059  9356.61196805]
Reward: -1  Episode Reward:  19
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799 16648.79659991  9359.7650694 ]
------
Step:22, Action:East
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799 16648.79659991  9359.7650694 ]
New Q values:  [ 6518.97208123 -8521.23367799  9465.90223038  9359.7650694 ]
Reward: -1  Episode Reward:  18
xxxxx
x...x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3273.05173737  1802.67044829 -5588.09647059  9356.61196805]
------
Step:23, Action:West
State  288
Old Q Values:  [ 3273.05173737  1802.67044829 -5588.09647059  9356.61196805]
New Q values:  [ 3273.05173737  1802.67044829 -5588.09647059  6581.81545634]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799  9465.90223038  9359.7650694 ]
------
Step:24, Action:East
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799  9465.90223038  9359.7650694 ]
New Q values:  [ 6518.97208123 -8521.23367799  5760.30552905  9359.7650694 ]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3273.05173737  1802.67044829 -5588.09647059  6581.81545634]
------
Step:25, Action:North
State  288
Old Q Values:  [ 3273.05173737  1802.67044829 -5588.09647059  6581.81545634]
New Q values:  [ 3239.2460025   1802.67044829 -5588.09647059  6581.81545634]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x  ax
xg  x
xxxxx
Step:26, Action:West
State  210
Old Q Values:  [6435.41769182 6233.36404734  790.72804752 5825.93296778]
New Q values:  [6435.41769182 6233.36404734  790.72804752 5103.37501425]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.24533942e+03  5.98126971e+03  2.17939995e+03]
------
Step:27, Action:East
State  195
Old Q Values:  [38955.61549043 23261.79594378  7063.52223325  1169.39963074]
New Q values:  [38955.61549043 23261.79594378  4755.43420085  1169.39963074]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[6435.41769182 6233.36404734  790.72804752 5103.37501425]
------
Step:28, Action:North
State  210
Old Q Values:  [6435.41769182 6233.36404734  790.72804752 5103.37501425]
New Q values:  [21128.68564772  6233.36404734   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  22
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 16188.71019194  -180.00807518 61830.39523664]
------
Step:29, Action:West
State  130
Old Q Values:  [41234.48978377 16188.71019194  -180.00807518 61830.39523664]
New Q values:  [41234.48978377 16188.71019194  -180.00807518 62424.59639549]
Reward: 9  Episode Reward:  31
xxxxx
x.a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   31150.82321546 125623.46100278]
------
Step:30, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   31150.82321546 125623.46100278]
New Q values:  [  -180.6          3557.6642036   31150.82321546 127662.85544679]
Reward: 100009  Episode Reward:  100040
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6125.11450798  8162.15052863 -4584.50430574  6401.87970259]
------
Step:1, Action:South
State  208
Old Q Values:  [ 6125.11450798  8162.15052863 -4584.50430574  6401.87970259]
New Q values:  [ 6125.11450798  5244.80484835 -4584.50430574  6401.87970259]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3239.2460025   1802.67044829 -5588.09647059  6581.81545634]
------
Step:2, Action:West
State  288
Old Q Values:  [ 3239.2460025   1802.67044829 -5588.09647059  6581.81545634]
New Q values:  [ 3239.2460025   1802.67044829 -5588.09647059  5446.05570335]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799  5760.30552905  9359.7650694 ]
------
Step:3, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  6943.79635097 14229.23958031]
New Q values:  [ 1637.72437281  1974.75214244  6943.79635097 22478.36564945]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x. gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[55937.56605774 12764.58618105 17293.92977008  1875.31501677]
------
Step:4, Action:North
State  261
Old Q Values:  [2711.00024651 1352.37702619 8343.10361926  -12.17474163]
New Q values:  [2838.93130978 1352.37702619 8343.10361926  -12.17474163]
Reward: 9  Episode Reward:  36
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[9.19952611e+02 2.22394919e+03 5.83043737e+03 3.33862213e+00]
------
Step:5, Action:South
State  180
Old Q Values:  [ 4507.91302948  3232.64475742  7162.08863634 -4966.32149798]
New Q values:  [ 4507.91302948  3060.24711158  7162.08863634 -4966.32149798]
Reward: -1  Episode Reward:  35
xxxxx
x...x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -393.58373963 -5704.51612281  5892.63069538 -5679.36893145]
------
Step:6, Action:East
State  260
Old Q Values:  [ -393.58373963 -5704.51612281  5892.63069538 -5679.36893145]
New Q values:  [ -393.58373963 -5704.51612281  5164.38179897 -5679.36893145]
Reward: -1  Episode Reward:  34
xxxxx
x...x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6518.97208123 -8521.23367799  5760.30552905  9359.7650694 ]
------
Step:7, Action:North
State  272
Old Q Values:  [ 6518.97208123 -8521.23367799  5760.30552905  9359.7650694 ]
New Q values:  [ 5380.59065963 -8521.23367799  5760.30552905  9359.7650694 ]
Reward: -1  Episode Reward:  33
xxxxx
x...x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[-6.00000000e-01  9.24533942e+03  5.98126971e+03  2.17939995e+03]
------
Step:8, Action:East
State  195
Old Q Values:  [38955.61549043 23261.79594378  4755.43420085  1169.39963074]
New Q values:  [38955.61549043 23261.79594378  8240.17937465  1169.39963074]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[21128.68564772  6233.36404734   790.72804752  5103.37501425]
------
Step:9, Action:North
State  210
Old Q Values:  [21128.68564772  6233.36404734   790.72804752  5103.37501425]
New Q values:  [27184.25317773  6233.36404734   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  41
xxxxx
x..ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 16188.71019194  -180.00807518 62424.59639549]
------
Step:10, Action:West
State  130
Old Q Values:  [41234.48978377 16188.71019194  -180.00807518 62424.59639549]
New Q values:  [41234.48978377 16188.71019194  -180.00807518 63274.09519223]
Reward: 9  Episode Reward:  50
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   31150.82321546 127662.85544679]
------
Step:11, Action:West
State  112
Old Q Values:  [     0.           3629.92591876  27196.16909557 110949.76306292]
New Q values:  [     0.           3629.92591876  27196.16909557 129302.41900456]
Reward: 100009  Episode Reward:  100059
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
xg..x
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6125.11450798  5244.80484835 -4584.50430574  6401.87970259]
------
Step:1, Action:West
State  216
Old Q Values:  [2929.17100536 5883.63267376  606.149024   4566.33324692]
New Q values:  [2929.17100536 5883.63267376  606.149024   8914.56798068]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xga x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  23608.78227304  3353.15003737   610.93635926]
------
Step:2, Action:South
State  194
Old Q Values:  [-6.00000000e-01  9.24533942e+03  5.98126971e+03  2.17939995e+03]
New Q values:  [-6.00000000e-01  6.51146529e+03  5.98126971e+03  2.17939995e+03]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.  x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5380.59065963 -8521.23367799  5760.30552905  9359.7650694 ]
------
Step:3, Action:East
State  272
Old Q Values:  [ 5380.59065963 -8521.23367799  5760.30552905  9359.7650694 ]
New Q values:  [ 5380.59065963 -8521.23367799  3943.33892263  9359.7650694 ]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3239.2460025   1802.67044829 -5588.09647059  5446.05570335]
------
Step:4, Action:West
State  288
Old Q Values:  [ 3239.2460025   1802.67044829 -5588.09647059  5446.05570335]
New Q values:  [ 3239.2460025   1802.67044829 -5588.09647059  4985.75180216]
Reward: -1  Episode Reward:  26
xxxxx
xg..x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5380.59065963 -8521.23367799  3943.33892263  9359.7650694 ]
------
Step:5, Action:West
State  272
Old Q Values:  [ 5380.59065963 -8521.23367799  3943.33892263  9359.7650694 ]
New Q values:  [ 5380.59065963 -8521.23367799  3943.33892263  5298.62056745]
Reward: 9  Episode Reward:  35
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -393.58373963 -5704.51612281  5164.38179897 -5679.36893145]
------
Step:6, Action:East
State  260
Old Q Values:  [ -393.58373963 -5704.51612281  5164.38179897 -5679.36893145]
New Q values:  [ -393.58373963 -5704.51612281  3679.32991748 -5679.36893145]
Reward: -1  Episode Reward:  34
xxxxx
xg..x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5380.59065963 -8521.23367799  3943.33892263  5298.62056745]
------
Step:7, Action:North
State  272
Old Q Values:  [ 5380.59065963 -8521.23367799  3943.33892263  5298.62056745]
New Q values:  [ 7031.1848256  -8521.23367799  3943.33892263  5298.62056745]
Reward: -1  Episode Reward:  33
xxxxx
x g.x
x.a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.62651619e+04 7.06665768e+03 2.45392999e+03]
------
Step:8, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.62651619e+04 7.06665768e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 8.61482020e+03 7.06665768e+03 2.45392999e+03]
Reward: -1  Episode Reward:  32
xxxxx
xg..x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7031.1848256  -8521.23367799  3943.33892263  5298.62056745]
------
Step:9, Action:North
State  272
Old Q Values:  [ 7031.1848256  -8521.23367799  3943.33892263  5298.62056745]
New Q values:  [ 5396.31998925 -8521.23367799  3943.33892263  5298.62056745]
Reward: -1  Episode Reward:  31
xxxxx
x g.x
x.a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 8.61482020e+03 7.06665768e+03 2.45392999e+03]
------
Step:10, Action:South
State  192
Old Q Values:  [3.89777037e-01 8.61482020e+03 7.06665768e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 5.06422408e+03 7.06665768e+03 2.45392999e+03]
Reward: -1  Episode Reward:  30
xxxxx
xg..x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5396.31998925 -8521.23367799  3943.33892263  5298.62056745]
------
Step:11, Action:North
State  272
Old Q Values:  [ 5396.31998925 -8521.23367799  3943.33892263  5298.62056745]
New Q values:  [ 4277.92529994 -8521.23367799  3943.33892263  5298.62056745]
Reward: -1  Episode Reward:  29
xxxxx
x g.x
x.a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.06422408e+03 7.06665768e+03 2.45392999e+03]
------
Step:12, Action:East
State  192
Old Q Values:  [3.89777037e-01 5.06422408e+03 7.06665768e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 5.06422408e+03 4.74662698e+03 2.45392999e+03]
Reward: -1  Episode Reward:  28
xxxxx
xg..x
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6125.11450798  5244.80484835 -4584.50430574  6401.87970259]
------
Step:13, Action:West
State  208
Old Q Values:  [ 6125.11450798  5244.80484835 -4584.50430574  6401.87970259]
New Q values:  [ 6125.11450798  5244.80484835 -4584.50430574  4079.41910367]
Reward: -1  Episode Reward:  27
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.06422408e+03 4.74662698e+03 2.45392999e+03]
------
Step:14, Action:South
State  192
Old Q Values:  [3.89777037e-01 5.06422408e+03 4.74662698e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 3.61467580e+03 4.74662698e+03 2.45392999e+03]
Reward: -1  Episode Reward:  26
xxxxx
xg..x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4277.92529994 -8521.23367799  3943.33892263  5298.62056745]
------
Step:15, Action:West
State  272
Old Q Values:  [ 4277.92529994 -8521.23367799  3943.33892263  5298.62056745]
New Q values:  [ 4277.92529994 -8521.23367799  3943.33892263  3222.64720222]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ -393.58373963 -5704.51612281  3679.32991748 -5679.36893145]
------
Step:16, Action:East
State  260
Old Q Values:  [ -393.58373963 -5704.51612281  3679.32991748 -5679.36893145]
New Q values:  [ -393.58373963 -5704.51612281  2754.50955697 -5679.36893145]
Reward: -1  Episode Reward:  24
xxxxx
xg..x
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4277.92529994 -8521.23367799  3943.33892263  3222.64720222]
------
Step:17, Action:North
State  272
Old Q Values:  [ 4277.92529994 -8521.23367799  3943.33892263  3222.64720222]
New Q values:  [ 3134.5582149  -8521.23367799  3943.33892263  3222.64720222]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.61467580e+03 4.74662698e+03 2.45392999e+03]
------
Step:18, Action:East
State  192
Old Q Values:  [3.89777037e-01 3.61467580e+03 4.74662698e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 3.61467580e+03 3.73558515e+03 2.45392999e+03]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6125.11450798  5244.80484835 -4584.50430574  4079.41910367]
------
Step:19, Action:North
State  208
Old Q Values:  [ 6125.11450798  5244.80484835 -4584.50430574  4079.41910367]
New Q values:  [ 3750.82112317  5244.80484835 -4584.50430574  4079.41910367]
Reward: 9  Episode Reward:  31
xxxxx
x gax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  4317.91773326   660.86649319 -1803.08599325]
------
Step:20, Action:South
State  128
Old Q Values:  [47581.15146009 14691.8794498  -8652.84       84071.34358242]
New Q values:  [47581.15146009  7449.59323443 -8652.84       84071.34358242]
Reward: -1  Episode Reward:  30
xxxxx
x .gx
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3750.82112317  5244.80484835 -4584.50430574  4079.41910367]
------
Step:21, Action:South
State  208
Old Q Values:  [ 3750.82112317  5244.80484835 -4584.50430574  4079.41910367]
New Q values:  [ 3750.82112317  3593.04747999 -4584.50430574  4079.41910367]
Reward: -1  Episode Reward:  29
xxxxx
x . x
x. gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3239.2460025   1802.67044829 -5588.09647059  4985.75180216]
------
Step:22, Action:West
State  288
Old Q Values:  [ 3239.2460025   1802.67044829 -5588.09647059  4985.75180216]
New Q values:  [ 3239.2460025   1802.67044829 -5588.09647059  8737.2104157 ]
Reward: -1  Episode Reward:  28
xxxxx
x .gx
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  6943.79635097 22478.36564945]
------
Step:23, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  3943.33892263  3222.64720222]
New Q values:  [ 3134.5582149  -8521.23367799  3943.33892263 18069.72869821]
Reward: -1  Episode Reward:  27
xxxxx
x g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[55937.56605774 12764.58618105 17293.92977008  1875.31501677]
------
Step:24, Action:North
State  261
Old Q Values:  [2838.93130978 1352.37702619 8343.10361926  -12.17474163]
New Q values:  [2890.10373509 1352.37702619 8343.10361926  -12.17474163]
Reward: 9  Episode Reward:  36
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[9.19952611e+02 2.22394919e+03 5.83043737e+03 3.33862213e+00]
------
Step:25, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039   7540.97267687     0.        ]
New Q values:  [60476.05138135 21430.9929039   4136.46461444     0.        ]
Reward: -1  Episode Reward:  35
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 3.61467580e+03 3.73558515e+03 2.45392999e+03]
------
Step:26, Action:East
State  193
Old Q Values:  [-5922.26708831 11741.33878514 10312.42888911   767.35890262]
New Q values:  [-5922.26708831 11741.33878514  5348.19728675   767.35890262]
Reward: -1  Episode Reward:  34
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3750.82112317  3593.04747999 -4584.50430574  4079.41910367]
------
Step:27, Action:West
State  208
Old Q Values:  [ 3750.82112317  3593.04747999 -4584.50430574  4079.41910367]
New Q values:  [ 3750.82112317  3593.04747999 -4584.50430574  5153.56927701]
Reward: -1  Episode Reward:  33
xxxxx
x . x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-5922.26708831 11741.33878514  5348.19728675   767.35890262]
------
Step:28, Action:South
State  192
Old Q Values:  [3.89777037e-01 3.61467580e+03 3.73558515e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 6.86618893e+03 3.73558515e+03 2.45392999e+03]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799  3943.33892263 18069.72869821]
------
Step:29, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  3943.33892263 18069.72869821]
New Q values:  [ 3134.5582149  -8521.23367799  3943.33892263 24008.56129661]
Reward: -1  Episode Reward:  31
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[55937.56605774 12764.58618105 17293.92977008  1875.31501677]
------
Step:30, Action:North
State  261
Old Q Values:  [2890.10373509 1352.37702619 8343.10361926  -12.17474163]
New Q values:  [2904.57270521 1352.37702619 8343.10361926  -12.17474163]
Reward: -1  Episode Reward:  30
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[9.19952611e+02 2.22394919e+03 5.83043737e+03 3.33862213e+00]
------
Step:31, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039   4136.46461444     0.        ]
New Q values:  [60476.05138135 21430.9929039   3713.84252466     0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 6.86618893e+03 3.73558515e+03 2.45392999e+03]
------
Step:32, Action:South
State  192
Old Q Values:  [3.89777037e-01 6.86618893e+03 3.73558515e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 9.94844396e+03 3.73558515e+03 2.45392999e+03]
Reward: -1  Episode Reward:  28
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799  3943.33892263 24008.56129661]
------
Step:33, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  3943.33892263 24008.56129661]
New Q values:  [ 3134.5582149  -8521.23367799  3943.33892263 26384.09433597]
Reward: -1  Episode Reward:  27
xxxxx
x g x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[55937.56605774 12764.58618105 17293.92977008  1875.31501677]
------
Step:34, Action:North
State  260
Old Q Values:  [ -393.58373963 -5704.51612281  2754.50955697 -5679.36893145]
New Q values:  [ 1990.59309505 -5704.51612281  2754.50955697 -5679.36893145]
Reward: -1  Episode Reward:  26
xxxxx
xg. x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 4507.91302948  3060.24711158  7162.08863634 -4966.32149798]
------
Step:35, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039   3713.84252466     0.        ]
New Q values:  [60476.05138135 21430.9929039   4469.47019812     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x g x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 9.94844396e+03 3.73558515e+03 2.45392999e+03]
------
Step:36, Action:South
State  192
Old Q Values:  [3.89777037e-01 9.94844396e+03 3.73558515e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 1.18940059e+04 3.73558515e+03 2.45392999e+03]
Reward: -1  Episode Reward:  24
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799  3943.33892263 26384.09433597]
------
Step:37, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  3943.33892263 26384.09433597]
New Q values:  [ 3134.5582149  -8521.23367799  3943.33892263 11379.39060148]
Reward: -1  Episode Reward:  23
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1990.59309505 -5704.51612281  2754.50955697 -5679.36893145]
------
Step:38, Action:East
State  261
Old Q Values:  [2904.57270521 1352.37702619 8343.10361926  -12.17474163]
New Q values:  [2904.57270521 1352.37702619 6750.45862815  -12.17474163]
Reward: -1  Episode Reward:  22
xxxxx
x . x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799  3943.33892263 11379.39060148]
------
Step:39, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  3943.33892263 11379.39060148]
New Q values:  [ 3134.5582149  -8521.23367799  3943.33892263  5377.50910768]
Reward: -1  Episode Reward:  21
xxxxx
x . x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1990.59309505 -5704.51612281  2754.50955697 -5679.36893145]
------
Step:40, Action:East
State  260
Old Q Values:  [ 1990.59309505 -5704.51612281  2754.50955697 -5679.36893145]
New Q values:  [ 1990.59309505 -5704.51612281  2714.45655509 -5679.36893145]
Reward: -1  Episode Reward:  20
xxxxx
x . x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799  3943.33892263  5377.50910768]
------
Step:41, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  3943.33892263  5377.50910768]
New Q values:  [ 3134.5582149  -8521.23367799  4197.89869376  5377.50910768]
Reward: -1  Episode Reward:  19
xxxxx
x . x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3239.2460025   1802.67044829 -5588.09647059  8737.2104157 ]
------
Step:42, Action:North
State  288
Old Q Values:  [ 3239.2460025   1802.67044829 -5588.09647059  8737.2104157 ]
New Q values:  [ 9450.37435432  1802.67044829 -5588.09647059  8737.2104157 ]
Reward: -1  Episode Reward:  18
xxxxx
x . x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[27184.25317773  6233.36404734   790.72804752  5103.37501425]
------
Step:43, Action:North
State  210
Old Q Values:  [27184.25317773  6233.36404734   790.72804752  5103.37501425]
New Q values:  [29855.32982876  6233.36404734   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  17
xxxxx
x .ax
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377 16188.71019194  -180.00807518 63274.09519223]
------
Step:44, Action:West
State  130
Old Q Values:  [41234.48978377 16188.71019194  -180.00807518 63274.09519223]
New Q values:  [ 41234.48978377  16188.71019194   -180.00807518 123613.89471093]
Reward: 100009  Episode Reward:  100026
xxxxx
x a x
x   x
xg  x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  3998.52582569   766.31091109]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3998.52582569   766.31091109]
New Q values:  [-9594.56523706 -8069.05606225  2900.18565025   766.31091109]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  4317.91773326   660.86649319 -1803.08599325]
------
Step:2, Action:South
State  136
Old Q Values:  [  878.22269011  4317.91773326   660.86649319 -1803.08599325]
New Q values:  [  878.22269011  4406.93748751   660.86649319 -1803.08599325]
Reward: 9  Episode Reward:  18
xxxxx
x  gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2929.17100536 5883.63267376  606.149024   8914.56798068]
------
Step:3, Action:West
State  216
Old Q Values:  [2929.17100536 5883.63267376  606.149024   8914.56798068]
New Q values:  [2929.17100536 5883.63267376  606.149024   5645.80477873]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 6915.25862153 -789.02220255 1084.48089466]
------
Step:4, Action:South
State  195
Old Q Values:  [38955.61549043 23261.79594378  8240.17937465  1169.39963074]
New Q values:  [38955.61549043 16053.62807234  8240.17937465  1169.39963074]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  6943.79635097 22478.36564945]
------
Step:5, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  6943.79635097 22478.36564945]
New Q values:  [ 1637.72437281  1974.75214244  6943.79635097 25778.0160771 ]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x. gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[55937.56605774 12764.58618105 17293.92977008  1875.31501677]
------
Step:6, Action:North
State  261
Old Q Values:  [2904.57270521 1352.37702619 6750.45862815  -12.17474163]
New Q values:  [3115.08797931 1352.37702619 6750.45862815  -12.17474163]
Reward: 9  Episode Reward:  54
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 6492.86299076 1843.56468795  154.04646645]
------
Step:7, Action:South
State  191
Old Q Values:  [   3.06655861 2109.78545018 2029.22435297    0.        ]
New Q values:  [   3.06655861 2868.45176852 2029.22435297    0.        ]
Reward: -1  Episode Reward:  53
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3115.08797931 1352.37702619 6750.45862815  -12.17474163]
------
Step:8, Action:North
State  261
Old Q Values:  [3115.08797931 1352.37702619 6750.45862815  -12.17474163]
New Q values:  [3193.29408895 1352.37702619 6750.45862815  -12.17474163]
Reward: -1  Episode Reward:  52
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 337.36081627 6492.86299076 1843.56468795  154.04646645]
------
Step:9, Action:South
State  189
Old Q Values:  [ 337.36081627 6492.86299076 1843.56468795  154.04646645]
New Q values:  [ 337.36081627 4621.68278475 1843.56468795  154.04646645]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3193.29408895 1352.37702619 6750.45862815  -12.17474163]
------
Step:10, Action:East
State  261
Old Q Values:  [3193.29408895 1352.37702619 6750.45862815  -12.17474163]
New Q values:  [3193.29408895 1352.37702619 4266.540714    -12.17474163]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4561.50951103 -5807.06396197  5223.19087579  5187.5769688 ]
------
Step:11, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  4197.89869376  5377.50910768]
New Q values:  [ 3134.5582149  -8521.23367799 64519.6717838   5377.50910768]
Reward: 100009  Episode Reward:  100059
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9450.37435432  1802.67044829 -5588.09647059  8737.2104157 ]
------
Step:1, Action:North
State  288
Old Q Values:  [ 9450.37435432  1802.67044829 -5588.09647059  8737.2104157 ]
New Q values:  [ 5331.62052483  1802.67044829 -5588.09647059  8737.2104157 ]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3750.82112317  3593.04747999 -4584.50430574  5153.56927701]
------
Step:2, Action:West
State  208
Old Q Values:  [ 3750.82112317  3593.04747999 -4584.50430574  5153.56927701]
New Q values:  [ 3750.82112317  3593.04747999 -4584.50430574  5635.02947634]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 1.18940059e+04 3.73558515e+03 2.45392999e+03]
------
Step:3, Action:South
State  193
Old Q Values:  [-5922.26708831 11741.33878514  5348.19728675   767.35890262]
New Q values:  [-5922.26708831 12435.34033719  5348.19728675   767.35890262]
Reward: 9  Episode Reward:  27
xxxxx
x. gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  6943.79635097 25778.0160771 ]
------
Step:4, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799 64519.6717838   5377.50910768]
New Q values:  [ 3134.5582149  -8521.23367799 64519.6717838  18937.6734604 ]
Reward: 9  Episode Reward:  36
xxxxx
x.g.x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[55937.56605774 12764.58618105 17293.92977008  1875.31501677]
------
Step:5, Action:North
State  257
Old Q Values:  [55937.56605774 12764.58618105 17293.92977008  1875.31501677]
New Q values:  [40523.2418375  12764.58618105 17293.92977008  1875.31501677]
Reward: 9  Episode Reward:  45
xxxxx
x. gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039   4469.47019812     0.        ]
------
Step:6, Action:North
State  181
Old Q Values:  [9.19952611e+02 2.22394919e+03 5.83043737e+03 3.33862213e+00]
New Q values:  [1.50633212e+03 2.22394919e+03 5.83043737e+03 3.33862213e+00]
Reward: 9  Episode Reward:  54
xxxxx
xa .x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 3776.50358003 1796.41351142    0.        ]
------
Step:7, Action:South
State  97
Old Q Values:  [    0.         83057.04593133     0.             0.        ]
New Q values:  [    0.         51365.03378694     0.             0.        ]
Reward: -1  Episode Reward:  53
xxxxx
x  gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039   4469.47019812     0.        ]
------
Step:8, Action:North
State  181
Old Q Values:  [1.50633212e+03 2.22394919e+03 5.83043737e+03 3.33862213e+00]
New Q values:  [1.73488392e+03 2.22394919e+03 5.83043737e+03 3.33862213e+00]
Reward: -1  Episode Reward:  52
xxxxx
xa .x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 3776.50358003 1796.41351142    0.        ]
------
Step:9, Action:South
State  103
Old Q Values:  [ 221.30610858 3776.50358003 1796.41351142    0.        ]
New Q values:  [ 221.30610858 3259.13264319 1796.41351142    0.        ]
Reward: -1  Episode Reward:  51
xxxxx
x  .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.73488392e+03 2.22394919e+03 5.83043737e+03 3.33862213e+00]
------
Step:10, Action:South
State  183
Old Q Values:  [ 883.67312173 2445.68753053 4879.9999047  1554.80203889]
New Q values:  [ 883.67312173 2257.63722641 4879.9999047  1554.80203889]
Reward: -1  Episode Reward:  50
xxxxx
x  .x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3193.29408895 1352.37702619 4266.540714    -12.17474163]
------
Step:11, Action:North
State  261
Old Q Values:  [3193.29408895 1352.37702619 4266.540714    -12.17474163]
New Q values:  [3025.84884676 1352.37702619 4266.540714    -12.17474163]
Reward: -1  Episode Reward:  49
xxxxx
x  .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.73488392e+03 2.22394919e+03 5.83043737e+03 3.33862213e+00]
------
Step:12, Action:South
State  181
Old Q Values:  [1.73488392e+03 2.22394919e+03 5.83043737e+03 3.33862213e+00]
New Q values:  [1.73488392e+03 2.16894189e+03 5.83043737e+03 3.33862213e+00]
Reward: -1  Episode Reward:  48
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3025.84884676 1352.37702619 4266.540714    -12.17474163]
------
Step:13, Action:East
State  257
Old Q Values:  [40523.2418375  12764.58618105 17293.92977008  1875.31501677]
New Q values:  [40523.2418375  12764.58618105 14650.37673116  1875.31501677]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  6943.79635097 25778.0160771 ]
------
Step:14, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 1.49320335e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 1.87664355e+03]
Reward: -1  Episode Reward:  46
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3025.84884676 1352.37702619 4266.540714    -12.17474163]
------
Step:15, Action:East
State  257
Old Q Values:  [40523.2418375  12764.58618105 14650.37673116  1875.31501677]
New Q values:  [40523.2418375  12764.58618105 13592.9555156   1875.31501677]
Reward: -1  Episode Reward:  45
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  6943.79635097 25778.0160771 ]
------
Step:16, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 1.87664355e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 2.03001964e+03]
Reward: -1  Episode Reward:  44
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3025.84884676 1352.37702619 4266.540714    -12.17474163]
------
Step:17, Action:East
State  257
Old Q Values:  [40523.2418375  12764.58618105 13592.9555156   1875.31501677]
New Q values:  [40523.2418375  12764.58618105 13169.98702937  1875.31501677]
Reward: -1  Episode Reward:  43
xxxxx
x  gx
x   x
x a x
xxxxx
Step:18, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 2.03001964e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 2.09137007e+03]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3025.84884676 1352.37702619 4266.540714    -12.17474163]
------
Step:19, Action:East
State  261
Old Q Values:  [3025.84884676 1352.37702619 4266.540714    -12.17474163]
New Q values:  [3025.84884676 1352.37702619 2663.26170928  -12.17474163]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.19081808e+03 2.09137007e+03]
------
Step:20, Action:West
State  276
Old Q Values:  [ 4561.50951103 -5807.06396197  5223.19087579  5187.5769688 ]
New Q values:  [ 4561.50951103 -5807.06396197  5223.19087579  2982.18544155]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3025.84884676 1352.37702619 2663.26170928  -12.17474163]
------
Step:21, Action:North
State  260
Old Q Values:  [ 1990.59309505 -5704.51612281  2714.45655509 -5679.36893145]
New Q values:  [ 2809.0804734  -5704.51612281  2714.45655509 -5679.36893145]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  6711.47745126     0.        ]
------
Step:22, Action:East
State  183
Old Q Values:  [ 883.67312173 2257.63722641 4879.9999047  1554.80203889]
New Q values:  [ 883.67312173 2257.63722641 3521.21094849 1554.80203889]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[-2.78872080e-01 -2.00610230e+02  5.23270329e+03  0.00000000e+00]
------
Step:23, Action:East
State  198
Old Q Values:  [-2.78872080e-01 -2.00610230e+02  5.23270329e+03  0.00000000e+00]
New Q values:  [-2.78872080e-01 -2.00610230e+02  1.10490803e+04  0.00000000e+00]
Reward: -1  Episode Reward:  37
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[29855.32982876  6233.36404734   790.72804752  5103.37501425]
------
Step:24, Action:North
State  210
Old Q Values:  [29855.32982876  6233.36404734   790.72804752  5103.37501425]
New Q values:  [109031.70034478   6233.36404734    790.72804752   5103.37501425]
Reward: 100009  Episode Reward:  100046
xxxxx
x  ax
x   x
xg  x
xxxxx
xxxxx
x.a.x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1607.59978455   680.501585  ]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1607.59978455   680.501585  ]
New Q values:  [ -281.736      -1150.91067548  1353.4802474    680.501585  ]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 2.35013445e+03 -2.43067941e+03 -3.22965309e-01  1.63862968e+03]
------
Step:2, Action:North
State  138
Old Q Values:  [ 2.35013445e+03 -2.43067941e+03 -3.22965309e-01  1.63862968e+03]
New Q values:  [ 1.46449411e+03 -2.43067941e+03 -3.22965309e-01  1.63862968e+03]
Reward: -301  Episode Reward:  -292
xxxxx
x. ax
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.46449411e+03 -2.43067941e+03 -3.22965309e-01  1.63862968e+03]
------
Step:3, Action:West
State  138
Old Q Values:  [ 1.46449411e+03 -2.43067941e+03 -3.22965309e-01  1.63862968e+03]
New Q values:  [ 1.46449411e+03 -2.43067941e+03 -3.22965309e-01  1.06089594e+03]
Reward: -1  Episode Reward:  -293
xxxxx
x.a x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1353.4802474    680.501585  ]
------
Step:4, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2900.18565025   766.31091109]
New Q values:  [-9594.56523706 -8069.05606225  2481.55550635   766.31091109]
Reward: -1  Episode Reward:  -294
xxxxx
xg ax
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  4406.93748751   660.86649319 -1803.08599325]
------
Step:5, Action:South
State  138
Old Q Values:  [ 1.46449411e+03 -2.43067941e+03 -3.22965309e-01  1.06089594e+03]
New Q values:  [ 1.46449411e+03  7.98218037e+02 -3.22965309e-01  1.06089594e+03]
Reward: 9  Episode Reward:  -285
xxxxx
x.  x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2929.17100536 5883.63267376  606.149024   5645.80477873]
------
Step:6, Action:South
State  216
Old Q Values:  [2929.17100536 5883.63267376  606.149024   5645.80477873]
New Q values:  [2929.17100536 4980.01619421  606.149024   5645.80477873]
Reward: 9  Episode Reward:  -276
xxxxx
xg  x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5331.62052483  1802.67044829 -5588.09647059  8737.2104157 ]
------
Step:7, Action:West
State  288
Old Q Values:  [ 5331.62052483  1802.67044829 -5588.09647059  8737.2104157 ]
New Q values:  [ 5331.62052483  1802.67044829 -5588.09647059 22856.18570142]
Reward: 9  Episode Reward:  -267
xxxxx
x.g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799 64519.6717838  18937.6734604 ]
------
Step:8, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799 64519.6717838  18937.6734604 ]
New Q values:  [ 3134.5582149  -8521.23367799 32664.12442395 18937.6734604 ]
Reward: -1  Episode Reward:  -268
xxxxx
x.  x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5331.62052483  1802.67044829 -5588.09647059 22856.18570142]
------
Step:9, Action:West
State  288
Old Q Values:  [ 5331.62052483  1802.67044829 -5588.09647059 22856.18570142]
New Q values:  [ 5331.62052483  1802.67044829 -5588.09647059 12941.11160775]
Reward: -10001  Episode Reward:  -10269
xxxxx
x.  x
x.  x
x.g x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[38955.61549043 16053.62807234  8240.17937465  1169.39963074]
------
Step:1, Action:North
State  193
Old Q Values:  [-5922.26708831 12435.34033719  5348.19728675   767.35890262]
New Q values:  [-1649.89560358 12435.34033719  5348.19728675   767.35890262]
Reward: 9  Episode Reward:  9
xxxxx
x.a.x
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2378.70410583   867.03619284]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1353.4802474    680.501585  ]
New Q values:  [ -281.736      -1150.91067548   986.14033247   680.501585  ]
Reward: 9  Episode Reward:  18
xxxxx
x. ax
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.46449411e+03  7.98218037e+02 -3.22965309e-01  1.06089594e+03]
------
Step:3, Action:North
State  138
Old Q Values:  [ 1.46449411e+03  7.98218037e+02 -3.22965309e-01  1.06089594e+03]
New Q values:  [ 8.44545878e+02  7.98218037e+02 -3.22965309e-01  1.06089594e+03]
Reward: -301  Episode Reward:  -283
xxxxx
x. ax
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.44545878e+02  7.98218037e+02 -3.22965309e-01  1.06089594e+03]
------
Step:4, Action:West
State  138
Old Q Values:  [ 8.44545878e+02  7.98218037e+02 -3.22965309e-01  1.06089594e+03]
New Q values:  [ 8.44545878e+02  7.98218037e+02 -3.22965309e-01  7.19600478e+02]
Reward: -1  Episode Reward:  -284
xxxxx
x.a x
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   986.14033247   680.501585  ]
------
Step:5, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   986.14033247   680.501585  ]
New Q values:  [ -281.736      -1150.91067548   647.21989644   680.501585  ]
Reward: -1  Episode Reward:  -285
xxxxx
x. ax
xg .x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 8.44545878e+02  7.98218037e+02 -3.22965309e-01  7.19600478e+02]
------
Step:6, Action:North
State  138
Old Q Values:  [ 8.44545878e+02  7.98218037e+02 -3.22965309e-01  7.19600478e+02]
New Q values:  [ 4.10582115e+02  7.98218037e+02 -3.22965309e-01  7.19600478e+02]
Reward: -301  Episode Reward:  -586
xxxxx
x. ax
x. .x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02  7.98218037e+02 -3.22965309e-01  7.19600478e+02]
------
Step:7, Action:South
State  138
Old Q Values:  [ 4.10582115e+02  7.98218037e+02 -3.22965309e-01  7.19600478e+02]
New Q values:  [ 4.10582115e+02  3.30341973e+04 -3.22965309e-01  7.19600478e+02]
Reward: 9  Episode Reward:  -577
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[109031.70034478   6233.36404734    790.72804752   5103.37501425]
------
Step:8, Action:North
State  208
Old Q Values:  [ 3750.82112317  3593.04747999 -4584.50430574  5635.02947634]
New Q values:  [11409.98764471  3593.04747999 -4584.50430574  5635.02947634]
Reward: -1  Episode Reward:  -578
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02  3.30341973e+04 -3.22965309e-01  7.19600478e+02]
------
Step:9, Action:South
State  138
Old Q Values:  [ 4.10582115e+02  3.30341973e+04 -3.22965309e-01  7.19600478e+02]
New Q values:  [ 4.10582115e+02  4.59225890e+04 -3.22965309e-01  7.19600478e+02]
Reward: -1  Episode Reward:  -579
xxxxx
x.  x
x. ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[109031.70034478   6233.36404734    790.72804752   5103.37501425]
------
Step:10, Action:North
State  210
Old Q Values:  [109031.70034478   6233.36404734    790.72804752   5103.37501425]
New Q values:  [57388.85684712  6233.36404734   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  -580
xxxxx
x. ax
x.  x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02  4.59225890e+04 -3.22965309e-01  7.19600478e+02]
------
Step:11, Action:South
State  138
Old Q Values:  [ 4.10582115e+02  4.59225890e+04 -3.22965309e-01  7.19600478e+02]
New Q values:  [ 4.10582115e+02  2.00621770e+04 -3.22965309e-01  7.19600478e+02]
Reward: -1  Episode Reward:  -581
xxxxx
x.  x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2929.17100536 4980.01619421  606.149024   5645.80477873]
------
Step:12, Action:West
State  216
Old Q Values:  [2929.17100536 4980.01619421  606.149024   5645.80477873]
New Q values:  [2929.17100536 4980.01619421  606.149024   9340.35659341]
Reward: -1  Episode Reward:  -582
xxxxx
xg  x
x.a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  23608.78227304  3353.15003737   610.93635926]
------
Step:13, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.18940059e+04 3.73558515e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 1.45622397e+04 3.73558515e+03 2.45392999e+03]
Reward: 9  Episode Reward:  -573
xxxxx
x.g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799 32664.12442395 18937.6734604 ]
------
Step:14, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  6943.79635097 25778.0160771 ]
New Q values:  [ 1637.72437281  1974.75214244  6659.25202271 25778.0160771 ]
Reward: -1  Episode Reward:  -574
xxxxx
x. gx
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5331.62052483  1802.67044829 -5588.09647059 12941.11160775]
------
Step:15, Action:West
State  288
Old Q Values:  [ 5331.62052483  1802.67044829 -5588.09647059 12941.11160775]
New Q values:  [ 5331.62052483  1802.67044829 -5588.09647059 14975.08197028]
Reward: -1  Episode Reward:  -575
xxxxx
x.g x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799 32664.12442395 18937.6734604 ]
------
Step:16, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799 32664.12442395 18937.6734604 ]
New Q values:  [ 3134.5582149  -8521.23367799 17557.57436066 18937.6734604 ]
Reward: -1  Episode Reward:  -576
xxxxx
xg  x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5331.62052483  1802.67044829 -5588.09647059 14975.08197028]
------
Step:17, Action:West
State  288
Old Q Values:  [ 5331.62052483  1802.67044829 -5588.09647059 14975.08197028]
New Q values:  [ 5331.62052483  1802.67044829 -5588.09647059 11670.73482623]
Reward: -1  Episode Reward:  -577
xxxxx
x.  x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799 17557.57436066 18937.6734604 ]
------
Step:18, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799 17557.57436066 18937.6734604 ]
New Q values:  [ 3134.5582149  -8521.23367799 17557.57436066 19737.44193541]
Reward: 9  Episode Reward:  -568
xxxxx
x.  x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[40523.2418375  12764.58618105 13169.98702937  1875.31501677]
------
Step:19, Action:North
State  257
Old Q Values:  [40523.2418375  12764.58618105 13169.98702937  1875.31501677]
New Q values:  [45353.09426216 12764.58618105 13169.98702937  1875.31501677]
Reward: 9  Episode Reward:  -559
xxxxx
x.  x
xa  x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[82228.67666629 16101.90751562 97127.9917572      0.        ]
------
Step:20, Action:East
State  179
Old Q Values:  [82228.67666629 16101.90751562 97127.9917572      0.        ]
New Q values:  [82228.67666629 16101.90751562 50537.28135001     0.        ]
Reward: -1  Episode Reward:  -560
xxxxx
x.  x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[38955.61549043 16053.62807234  8240.17937465  1169.39963074]
------
Step:21, Action:North
State  194
Old Q Values:  [-6.00000000e-01  6.51146529e+03  5.98126971e+03  2.17939995e+03]
New Q values:  [38298.01663404  6511.46529034  5981.269706    2179.39995143]
Reward: -1  Episode Reward:  -561
xxxxx
x.a x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   31150.82321546 127662.85544679]
------
Step:22, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   31150.82321546 127662.85544679]
New Q values:  [  -180.6          3557.6642036   31150.82321546 128754.09364007]
Reward: 100009  Episode Reward:  99448
xxxxx
xa  x
x g x
x   x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1213.24751413  431.25952337 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 3259.13264319 1796.41351142    0.        ]
New Q values:  [ 221.30610858 3058.18426845 1796.41351142    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.73488392e+03 2.16894189e+03 5.83043737e+03 3.33862213e+00]
------
Step:2, Action:East
State  181
Old Q Values:  [1.73488392e+03 2.16894189e+03 5.83043737e+03 3.33862213e+00]
New Q values:  [1734.88392142 2168.94189099  550.03102626    3.33862213]
Reward: -10001  Episode Reward:  -9992
xxxxx
x ..x
x g.x
x...x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1734.88392142 2168.94189099  550.03102626    3.33862213]
------
Step:1, Action:South
State  181
Old Q Values:  [1734.88392142 2168.94189099  550.03102626    3.33862213]
New Q values:  [1734.88392142 1780.73141042  550.03102626    3.33862213]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3025.84884676 1352.37702619 2663.26170928  -12.17474163]
------
Step:2, Action:North
State  260
Old Q Values:  [ 2809.0804734  -5704.51612281  2714.45655509 -5679.36893145]
New Q values:  [ 3271.65878026 -5704.51612281  2714.45655509 -5679.36893145]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 4507.91302948  3060.24711158  7162.08863634 -4966.32149798]
------
Step:3, Action:East
State  180
Old Q Values:  [ 4507.91302948  3060.24711158  7162.08863634 -4966.32149798]
New Q values:  [ 4507.91302948  3060.24711158  7232.90735891 -4966.32149798]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xga.x
x ..x
xxxxx
Step:4, Action:South
State  196
Old Q Values:  [ -534.40699599  1465.42682022 14061.52026008   231.67262594]
New Q values:  [ -534.40699599  2158.52799082 14061.52026008   231.67262594]
Reward: 9  Episode Reward:  16
xxxxx
xg..x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4561.50951103 -5807.06396197  5223.19087579  2982.18544155]
------
Step:5, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799 17557.57436066 19737.44193541]
New Q values:  [ 3134.5582149  -8521.23367799 10529.65019214 19737.44193541]
Reward: 9  Episode Reward:  25
xxxxx
x...x
xg .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5331.62052483  1802.67044829 -5588.09647059 11670.73482623]
------
Step:6, Action:West
State  288
Old Q Values:  [ 5331.62052483  1802.67044829 -5588.09647059 11670.73482623]
New Q values:  [ 5331.62052483  1802.67044829 -5588.09647059 10588.92651112]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799 10529.65019214 19737.44193541]
------
Step:7, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799 10529.65019214 19737.44193541]
New Q values:  [ 3134.5582149  -8521.23367799 10529.65019214  8802.13142819]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x  .x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3025.84884676 1352.37702619 2663.26170928  -12.17474163]
------
Step:8, Action:North
State  261
Old Q Values:  [3025.84884676 1352.37702619 2663.26170928  -12.17474163]
New Q values:  [2266.10282325 1352.37702619 2663.26170928  -12.17474163]
Reward: -1  Episode Reward:  22
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 883.67312173 2257.63722641 3521.21094849 1554.80203889]
------
Step:9, Action:East
State  181
Old Q Values:  [1734.88392142 1780.73141042  550.03102626    3.33862213]
New Q values:  [1.73488392e+03 1.78073141e+03 3.95001451e+03 3.33862213e+00]
Reward: -1  Episode Reward:  21
xxxxx
x...x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-1649.89560358 12435.34033719  5348.19728675   767.35890262]
------
Step:10, Action:South
State  193
Old Q Values:  [-1649.89560358 12435.34033719  5348.19728675   767.35890262]
New Q values:  [-1649.89560358 12706.940958    5348.19728675   767.35890262]
Reward: -1  Episode Reward:  20
xxxxx
x..gx
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  6659.25202271 25778.0160771 ]
------
Step:11, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  6659.25202271 25778.0160771 ]
New Q values:  [ 1637.72437281  1974.75214244  6659.25202271 11109.58494362]
Reward: -1  Episode Reward:  19
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2266.10282325 1352.37702619 2663.26170928  -12.17474163]
------
Step:12, Action:East
State  261
Old Q Values:  [2266.10282325 1352.37702619 2663.26170928  -12.17474163]
New Q values:  [2266.10282325 1352.37702619 4397.5801668   -12.17474163]
Reward: -1  Episode Reward:  18
xxxxx
x...x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1637.72437281  1974.75214244  6659.25202271 11109.58494362]
------
Step:13, Action:West
State  273
Old Q Values:  [ 1637.72437281  1974.75214244  6659.25202271 11109.58494362]
New Q values:  [1637.72437281 1974.75214244 6659.25202271 5762.50802749]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2266.10282325 1352.37702619 4397.5801668   -12.17474163]
------
Step:14, Action:East
State  261
Old Q Values:  [2266.10282325 1352.37702619 4397.5801668   -12.17474163]
New Q values:  [2266.10282325 1352.37702619 4917.32712436  -12.17474163]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799 10529.65019214  8802.13142819]
------
Step:15, Action:East
State  273
Old Q Values:  [1637.72437281 1974.75214244 6659.25202271 5762.50802749]
New Q values:  [1637.72437281 1974.75214244 5839.77876242 5762.50802749]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5331.62052483  1802.67044829 -5588.09647059 10588.92651112]
------
Step:16, Action:West
State  288
Old Q Values:  [ 5331.62052483  1802.67044829 -5588.09647059 10588.92651112]
New Q values:  [ 5331.62052483  1802.67044829 -5588.09647059  7393.86566209]
Reward: -1  Episode Reward:  14
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799 10529.65019214  8802.13142819]
------
Step:17, Action:East
State  276
Old Q Values:  [ 4561.50951103 -5807.06396197  5223.19087579  2982.18544155]
New Q values:  [ 4561.50951103 -5807.06396197  4306.83604894  2982.18544155]
Reward: -1  Episode Reward:  13
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5331.62052483  1802.67044829 -5588.09647059  7393.86566209]
------
Step:18, Action:West
State  288
Old Q Values:  [ 5331.62052483  1802.67044829 -5588.09647059  7393.86566209]
New Q values:  [ 5331.62052483  1802.67044829 -5588.09647059  6115.84132248]
Reward: -1  Episode Reward:  12
xxxxx
x...x
x g.x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799 10529.65019214  8802.13142819]
------
Step:19, Action:East
State  276
Old Q Values:  [ 4561.50951103 -5807.06396197  4306.83604894  2982.18544155]
New Q values:  [ 4561.50951103 -5807.06396197  3556.88681632  2982.18544155]
Reward: -1  Episode Reward:  11
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5331.62052483  1802.67044829 -5588.09647059  6115.84132248]
------
Step:20, Action:West
State  288
Old Q Values:  [ 5331.62052483  1802.67044829 -5588.09647059  6115.84132248]
New Q values:  [ 5331.62052483  1802.67044829 -5588.09647059  3814.1893823 ]
Reward: -1  Episode Reward:  10
xxxxx
xg..x
x  .x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 4561.50951103 -5807.06396197  3556.88681632  2982.18544155]
------
Step:21, Action:North
State  276
Old Q Values:  [ 4561.50951103 -5807.06396197  3556.88681632  2982.18544155]
New Q values:  [ 6042.45988244 -5807.06396197  3556.88681632  2982.18544155]
Reward: -1  Episode Reward:  9
xxxxx
x.g.x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ -534.40699599  2158.52799082 14061.52026008   231.67262594]
------
Step:22, Action:East
State  192
Old Q Values:  [3.89777037e-01 1.45622397e+04 3.73558515e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 1.45622397e+04 4.92263035e+03 2.45392999e+03]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11409.98764471  3593.04747999 -4584.50430574  5635.02947634]
------
Step:23, Action:North
State  208
Old Q Values:  [11409.98764471  3593.04747999 -4584.50430574  5635.02947634]
New Q values:  [29790.79813261  3593.04747999 -4584.50430574  5635.02947634]
Reward: 9  Episode Reward:  27
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[47581.15146009  7449.59323443 -8652.84       84071.34358242]
------
Step:24, Action:North
State  128
Old Q Values:  [47581.15146009  7449.59323443 -8652.84       84071.34358242]
New Q values:  [44073.26365876  7449.59323443 -8652.84       84071.34358242]
Reward: -301  Episode Reward:  -274
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[44073.26365876  7449.59323443 -8652.84       84071.34358242]
------
Step:25, Action:West
State  128
Old Q Values:  [44073.26365876  7449.59323443 -8652.84       84071.34358242]
New Q values:  [44073.26365876  7449.59323443 -8652.84       72424.66313434]
Reward: 9  Episode Reward:  -265
xxxxx
xga x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:NW
[     0.           3629.92591876  27196.16909557 129302.41900456]
------
Step:26, Action:East
State  114
Old Q Values:  [  -180.6          3557.6642036   31150.82321546 128754.09364007]
New Q values:  [  -180.6          3557.6642036   49543.89769946 128754.09364007]
Reward: -1  Episode Reward:  -266
xxxxx
x. ax
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[ 41234.48978377  16188.71019194   -180.00807518 123613.89471093]
------
Step:27, Action:West
State  130
Old Q Values:  [ 41234.48978377  16188.71019194   -180.00807518 123613.89471093]
New Q values:  [41234.48978377 16188.71019194  -180.00807518 88071.18597639]
Reward: -1  Episode Reward:  -267
xxxxx
x.a x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 128754.09364007]
------
Step:28, Action:West
State  112
Old Q Values:  [     0.           3629.92591876  27196.16909557 129302.41900456]
New Q values:  [     0.           3629.92591876  27196.16909557 127135.87773791]
Reward: 100009  Episode Reward:  99742
xxxxx
xag x
x   x
x   x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-1649.89560358 12706.940958    5348.19728675   767.35890262]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 1.45622397e+04 4.92263035e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 8.98919093e+03 4.92263035e+03 2.45392999e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799 10529.65019214  8802.13142819]
------
Step:2, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799 10529.65019214  8802.13142819]
New Q values:  [ 3134.5582149  -8521.23367799  5816.7462343   8802.13142819]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x. .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5331.62052483  1802.67044829 -5588.09647059  3814.1893823 ]
------
Step:3, Action:North
State  288
Old Q Values:  [ 5331.62052483  1802.67044829 -5588.09647059  3814.1893823 ]
New Q values:  [11075.28764972  1802.67044829 -5588.09647059  3814.1893823 ]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[29790.79813261  3593.04747999 -4584.50430574  5635.02947634]
------
Step:4, Action:North
State  208
Old Q Values:  [29790.79813261  3593.04747999 -4584.50430574  5635.02947634]
New Q values:  [13237.8004993   3593.04747999 -4584.50430574  5635.02947634]
Reward: -1  Episode Reward:  26
xxxxx
xg.ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  4406.93748751   660.86649319 -1803.08599325]
------
Step:5, Action:South
State  136
Old Q Values:  [  878.22269011  4406.93748751   660.86649319 -1803.08599325]
New Q values:  [  878.22269011  5733.51514479   660.86649319 -1803.08599325]
Reward: -1  Episode Reward:  25
xxxxx
x.g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13237.8004993   3593.04747999 -4584.50430574  5635.02947634]
------
Step:6, Action:North
State  208
Old Q Values:  [13237.8004993   3593.04747999 -4584.50430574  5635.02947634]
New Q values:  [ 7014.57474316  3593.04747999 -4584.50430574  5635.02947634]
Reward: -1  Episode Reward:  24
xxxxx
xg.ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  5733.51514479   660.86649319 -1803.08599325]
------
Step:7, Action:South
State  136
Old Q Values:  [  878.22269011  5733.51514479   660.86649319 -1803.08599325]
New Q values:  [  878.22269011  4397.17848086   660.86649319 -1803.08599325]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7014.57474316  3593.04747999 -4584.50430574  5635.02947634]
------
Step:8, Action:North
State  208
Old Q Values:  [ 7014.57474316  3593.04747999 -4584.50430574  5635.02947634]
New Q values:  [ 4124.38344152  3593.04747999 -4584.50430574  5635.02947634]
Reward: -1  Episode Reward:  22
xxxxx
xg.ax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  4397.17848086   660.86649319 -1803.08599325]
------
Step:9, Action:South
State  130
Old Q Values:  [41234.48978377 16188.71019194  -180.00807518 88071.18597639]
New Q values:  [41234.48978377  8165.39291968  -180.00807518 88071.18597639]
Reward: -1  Episode Reward:  21
xxxxx
x.. x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4124.38344152  3593.04747999 -4584.50430574  5635.02947634]
------
Step:10, Action:West
State  208
Old Q Values:  [ 4124.38344152  3593.04747999 -4584.50430574  5635.02947634]
New Q values:  [ 4124.38344152  3593.04747999 -4584.50430574 -1049.83093042]
Reward: -10001  Episode Reward:  -9980
xxxxx
x.. x
x.g x
x.  x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799  5816.7462343   8802.13142819]
------
Step:1, Action:West
State  276
Old Q Values:  [ 6042.45988244 -5807.06396197  3556.88681632  2982.18544155]
New Q values:  [ 6042.45988244 -5807.06396197  3556.88681632  2673.47231393]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x. .x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2266.10282325 1352.37702619 4917.32712436  -12.17474163]
------
Step:2, Action:East
State  260
Old Q Values:  [ 3271.65878026 -5704.51612281  2714.45655509 -5679.36893145]
New Q values:  [ 3271.65878026 -5704.51612281  2897.92058677 -5679.36893145]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 6042.45988244 -5807.06396197  3556.88681632  2673.47231393]
------
Step:3, Action:North
State  276
Old Q Values:  [ 6042.45988244 -5807.06396197  3556.88681632  2673.47231393]
New Q values:  [ 5113.14123202 -5807.06396197  3556.88681632  2673.47231393]
Reward: -1  Episode Reward:  7
xxxxx
x.g.x
x.a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 8.98919093e+03 4.92263035e+03 2.45392999e+03]
------
Step:4, Action:South
State  193
Old Q Values:  [-1649.89560358 12706.940958    5348.19728675   767.35890262]
New Q values:  [-1649.89560358  6834.11001193  5348.19728675   767.35890262]
Reward: -1  Episode Reward:  6
xxxxx
x..gx
x. .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 5839.77876242 5762.50802749]
------
Step:5, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  5816.7462343   8802.13142819]
New Q values:  [ 3134.5582149  -8521.23367799  5654.68478864  8802.13142819]
Reward: 9  Episode Reward:  15
xxxxx
x.g.x
x. .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[11075.28764972  1802.67044829 -5588.09647059  3814.1893823 ]
------
Step:6, Action:North
State  288
Old Q Values:  [11075.28764972  1802.67044829 -5588.09647059  3814.1893823 ]
New Q values:  [ 5672.83009234  1802.67044829 -5588.09647059  3814.1893823 ]
Reward: 9  Episode Reward:  24
xxxxx
x..gx
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4124.38344152  3593.04747999 -4584.50430574 -1049.83093042]
------
Step:7, Action:South
State  208
Old Q Values:  [ 4124.38344152  3593.04747999 -4584.50430574 -1049.83093042]
New Q values:  [ 4124.38344152  3138.4680197  -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x. gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5672.83009234  1802.67044829 -5588.09647059  3814.1893823 ]
------
Step:8, Action:West
State  288
Old Q Values:  [ 5672.83009234  1802.67044829 -5588.09647059  3814.1893823 ]
New Q values:  [ 5672.83009234  1802.67044829 -5588.09647059  3277.00938165]
Reward: -1  Episode Reward:  22
xxxxx
x..gx
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 5839.77876242 5762.50802749]
------
Step:9, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  5654.68478864  8802.13142819]
New Q values:  [ 3134.5582149  -8521.23367799  3963.12294316  8802.13142819]
Reward: -1  Episode Reward:  21
xxxxx
x.g.x
x.  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5672.83009234  1802.67044829 -5588.09647059  3277.00938165]
------
Step:10, Action:North
State  288
Old Q Values:  [ 5672.83009234  1802.67044829 -5588.09647059  3277.00938165]
New Q values:  [ 3505.84706939  1802.67044829 -5588.09647059  3277.00938165]
Reward: -1  Episode Reward:  20
xxxxx
xg..x
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4124.38344152  3138.4680197  -4584.50430574 -1049.83093042]
------
Step:11, Action:North
State  208
Old Q Values:  [ 4124.38344152  3138.4680197  -4584.50430574 -1049.83093042]
New Q values:  [23382.55231691  3138.4680197  -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  29
xxxxx
x.gax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[44073.26365876  7449.59323443 -8652.84       72424.66313434]
------
Step:12, Action:North
State  128
Old Q Values:  [44073.26365876  7449.59323443 -8652.84       72424.66313434]
New Q values:  [39176.10440381  7449.59323443 -8652.84       72424.66313434]
Reward: -301  Episode Reward:  -272
xxxxx
xg.ax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[39176.10440381  7449.59323443 -8652.84       72424.66313434]
------
Step:13, Action:West
State  128
Old Q Values:  [39176.10440381  7449.59323443 -8652.84       72424.66313434]
New Q values:  [39176.10440381  7449.59323443 -8652.84       61116.02857511]
Reward: -9991  Episode Reward:  -10263
xxxxx
x.g x
x.  x
x   x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23382.55231691  3138.4680197  -4584.50430574 -1049.83093042]
------
Step:1, Action:North
State  210
Old Q Values:  [57388.85684712  6233.36404734   790.72804752  5103.37501425]
New Q values:  [28979.59585262  6233.36404734   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02  2.00621770e+04 -3.22965309e-01  7.19600478e+02]
------
Step:2, Action:South
State  138
Old Q Values:  [ 4.10582115e+02  2.00621770e+04 -3.22965309e-01  7.19600478e+02]
New Q values:  [ 4.10582115e+02  1.67181496e+04 -3.22965309e-01  7.19600478e+02]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[28979.59585262  6233.36404734   790.72804752  5103.37501425]
------
Step:3, Action:North
State  210
Old Q Values:  [28979.59585262  6233.36404734   790.72804752  5103.37501425]
New Q values:  [16606.68321329  6233.36404734   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02  1.67181496e+04 -3.22965309e-01  7.19600478e+02]
------
Step:4, Action:South
State  138
Old Q Values:  [ 4.10582115e+02  1.67181496e+04 -3.22965309e-01  7.19600478e+02]
New Q values:  [ 4.10582115e+02  1.37014255e+04 -3.22965309e-01  7.19600478e+02]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23382.55231691  3138.4680197  -4584.50430574 -1049.83093042]
------
Step:5, Action:North
State  210
Old Q Values:  [16606.68321329  6233.36404734   790.72804752  5103.37501425]
New Q values:  [10752.50094274  6233.36404734   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  5
xxxxx
x..ax
x . x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02  1.37014255e+04 -3.22965309e-01  7.19600478e+02]
------
Step:6, Action:South
State  138
Old Q Values:  [ 4.10582115e+02  1.37014255e+04 -3.22965309e-01  7.19600478e+02]
New Q values:  [ 4.10582115e+02  8.70572049e+03 -3.22965309e-01  7.19600478e+02]
Reward: -1  Episode Reward:  4
xxxxx
x.. x
x .ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[10752.50094274  6233.36404734   790.72804752  5103.37501425]
------
Step:7, Action:North
State  210
Old Q Values:  [10752.50094274  6233.36404734   790.72804752  5103.37501425]
New Q values:  [6912.11652491 6233.36404734  790.72804752 5103.37501425]
Reward: -1  Episode Reward:  3
xxxxx
x..ax
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02  8.70572049e+03 -3.22965309e-01  7.19600478e+02]
------
Step:8, Action:South
State  138
Old Q Values:  [ 4.10582115e+02  8.70572049e+03 -3.22965309e-01  7.19600478e+02]
New Q values:  [ 4.10582115e+02  4.49645389e+03 -3.22965309e-01  7.19600478e+02]
Reward: -10001  Episode Reward:  -9998
xxxxx
x.. x
x .gx
x...x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2266.10282325 1352.37702619 4917.32712436  -12.17474163]
------
Step:1, Action:East
State  261
Old Q Values:  [2266.10282325 1352.37702619 4917.32712436  -12.17474163]
New Q values:  [2266.10282325 1352.37702619 4612.9702782   -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799  3963.12294316  8802.13142819]
------
Step:2, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  3963.12294316  8802.13142819]
New Q values:  [ 3134.5582149  -8521.23367799  3963.12294316  4904.14365474]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x.. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2266.10282325 1352.37702619 4612.9702782   -12.17474163]
------
Step:3, Action:East
State  261
Old Q Values:  [2266.10282325 1352.37702619 4612.9702782   -12.17474163]
New Q values:  [2266.10282325 1352.37702619 3596.52174001  -12.17474163]
Reward: -1  Episode Reward:  7
xxxxx
x..gx
x.. x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 5839.77876242 5762.50802749]
------
Step:4, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  3963.12294316  4904.14365474]
New Q values:  [ 3134.5582149  -8521.23367799  2642.40329808  4904.14365474]
Reward: 9  Episode Reward:  16
xxxxx
x.g.x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3505.84706939  1802.67044829 -5588.09647059  3277.00938165]
------
Step:5, Action:North
State  288
Old Q Values:  [ 3505.84706939  1802.67044829 -5588.09647059  3277.00938165]
New Q values:  [ 8416.50452283  1802.67044829 -5588.09647059  3277.00938165]
Reward: -1  Episode Reward:  15
xxxxx
x...x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23382.55231691  3138.4680197  -4584.50430574 -1049.83093042]
------
Step:6, Action:North
State  210
Old Q Values:  [6912.11652491 6233.36404734  790.72804752 5103.37501425]
New Q values:  [29191.60240288  6233.36404734   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  24
xxxxx
x..ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377  8165.39291968  -180.00807518 88071.18597639]
------
Step:7, Action:West
State  138
Old Q Values:  [ 4.10582115e+02  4.49645389e+03 -3.22965309e-01  7.19600478e+02]
New Q values:  [ 4.10582115e+02  4.49645389e+03 -3.22965309e-01  4.97390667e+02]
Reward: 9  Episode Reward:  33
xxxxx
x.a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   647.21989644   680.501585  ]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   647.21989644   680.501585  ]
New Q values:  [ -281.736      -1150.91067548   647.21989644   560.51533626]
Reward: 9  Episode Reward:  42
xxxxx
xa  x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -6764.65639938   943.04900753  -180.6       ]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558  760.71844312 1792.92716634 -252.78192178]
New Q values:  [-252.35169558  760.71844312  910.73683547 -252.78192178]
Reward: -1  Episode Reward:  41
xxxxx
x a x
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   647.21989644   560.51533626]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   647.21989644   560.51533626]
New Q values:  [ -281.736      -1150.91067548  1607.22412622   560.51533626]
Reward: -1  Episode Reward:  40
xxxxx
x  ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02  4.49645389e+03 -3.22965309e-01  4.97390667e+02]
------
Step:11, Action:South
State  136
Old Q Values:  [  878.22269011  4397.17848086   660.86649319 -1803.08599325]
New Q values:  [  878.22269011  8773.03708742   660.86649319 -1803.08599325]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x..ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23382.55231691  3138.4680197  -4584.50430574 -1049.83093042]
------
Step:12, Action:North
State  208
Old Q Values:  [23382.55231691  3138.4680197  -4584.50430574 -1049.83093042]
New Q values:  [11984.33205299  3138.4680197  -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  38
xxxxx
xg ax
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  8773.03708742   660.86649319 -1803.08599325]
------
Step:13, Action:South
State  138
Old Q Values:  [ 4.10582115e+02  4.49645389e+03 -3.22965309e-01  4.97390667e+02]
New Q values:  [ 4.10582115e+02  5.39328117e+03 -3.22965309e-01  4.97390667e+02]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xg.ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[11984.33205299  3138.4680197  -4584.50430574 -1049.83093042]
------
Step:14, Action:North
State  208
Old Q Values:  [11984.33205299  3138.4680197  -4584.50430574 -1049.83093042]
New Q values:  [ 6411.11717302  3138.4680197  -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02  5.39328117e+03 -3.22965309e-01  4.97390667e+02]
------
Step:15, Action:South
State  138
Old Q Values:  [ 4.10582115e+02  5.39328117e+03 -3.22965309e-01  4.97390667e+02]
New Q values:  [ 4.10582115e+02 -1.91995238e+03 -3.22965309e-01  4.97390667e+02]
Reward: -10001  Episode Reward:  -9965
xxxxx
x   x
x..gx
x   x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02 -1.91995238e+03 -3.22965309e-01  4.97390667e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 4.10582115e+02 -1.91995238e+03 -3.22965309e-01  4.97390667e+02]
New Q values:  [ 4.10582115e+02 -1.91995238e+03 -3.22965309e-01  6.86523504e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1607.22412622   560.51533626]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2481.55550635   766.31091109]
New Q values:  [-9594.56523706 -8069.05606225  3623.93332877   766.31091109]
Reward: -1  Episode Reward:  8
xxxxx
xg ax
x. .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  8773.03708742   660.86649319 -1803.08599325]
------
Step:3, Action:South
State  138
Old Q Values:  [ 4.10582115e+02 -1.91995238e+03 -3.22965309e-01  6.86523504e+02]
New Q values:  [ 4.10582115e+02  2.03952603e+03 -3.22965309e-01  6.86523504e+02]
Reward: 9  Episode Reward:  17
xxxxx
x.  x
xg ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2929.17100536 4980.01619421  606.149024   9340.35659341]
------
Step:4, Action:West
State  216
Old Q Values:  [2929.17100536 4980.01619421  606.149024   9340.35659341]
New Q values:  [ 2929.17100536  4980.01619421   606.149024   10818.17731927]
Reward: -1  Episode Reward:  16
xxxxx
xg  x
x.a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  23608.78227304  3353.15003737   610.93635926]
------
Step:5, Action:South
State  192
Old Q Values:  [3.89777037e-01 8.98919093e+03 4.92263035e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 5.07231947e+03 4.92263035e+03 2.45392999e+03]
Reward: 9  Episode Reward:  25
xxxxx
x.  x
xg  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799  2642.40329808  4904.14365474]
------
Step:6, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  2642.40329808  4904.14365474]
New Q values:  [ 3134.5582149  -8521.23367799  2642.40329808 15572.98574054]
Reward: 9  Episode Reward:  34
xxxxx
x.  x
x.g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[45353.09426216 12764.58618105 13169.98702937  1875.31501677]
------
Step:7, Action:North
State  257
Old Q Values:  [45353.09426216 12764.58618105 13169.98702937  1875.31501677]
New Q values:  [36289.45311927 12764.58618105 13169.98702937  1875.31501677]
Reward: 9  Episode Reward:  43
xxxxx
x.g x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039   4469.47019812     0.        ]
------
Step:8, Action:North
State  189
Old Q Values:  [ 337.36081627 4621.68278475 1843.56468795  154.04646645]
New Q values:  [ 533.05203844 4621.68278475 1843.56468795  154.04646645]
Reward: 9  Episode Reward:  52
xxxxx
xa gx
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1309.02570645 -2165.66138672  -180.6       ]
------
Step:9, Action:South
State  109
Old Q Values:  [ -241.10880094  1309.02570645 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1909.515118   -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  51
xxxxx
x g x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 533.05203844 4621.68278475 1843.56468795  154.04646645]
------
Step:10, Action:South
State  188
Old Q Values:  [-6523.78898263  3293.6733143   1963.43704178     0.        ]
New Q values:  [-6523.78898263  2298.3669598   1963.43704178     0.        ]
Reward: -1  Episode Reward:  50
xxxxx
xg  x
x   x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3271.65878026 -5704.51612281  2897.92058677 -5679.36893145]
------
Step:11, Action:North
State  261
Old Q Values:  [2266.10282325 1352.37702619 3596.52174001  -12.17474163]
New Q values:  [2292.34596472 1352.37702619 3596.52174001  -12.17474163]
Reward: -1  Episode Reward:  49
xxxxx
x g x
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 533.05203844 4621.68278475 1843.56468795  154.04646645]
------
Step:12, Action:South
State  189
Old Q Values:  [ 533.05203844 4621.68278475 1843.56468795  154.04646645]
New Q values:  [ 533.05203844 2927.0296359  1843.56468795  154.04646645]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2292.34596472 1352.37702619 3596.52174001  -12.17474163]
------
Step:13, Action:East
State  260
Old Q Values:  [ 3271.65878026 -5704.51612281  2897.92058677 -5679.36893145]
New Q values:  [ 3271.65878026 -5704.51612281  2692.51060431 -5679.36893145]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xg  x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 5113.14123202 -5807.06396197  3556.88681632  2673.47231393]
------
Step:14, Action:North
State  276
Old Q Values:  [ 5113.14123202 -5807.06396197  3556.88681632  2673.47231393]
New Q values:  [ 3169.5580622  -5807.06396197  3556.88681632  2673.47231393]
Reward: -1  Episode Reward:  46
xxxxx
xg  x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         3600.83401675 3749.67189799  441.58769553]
------
Step:15, Action:East
State  204
Old Q Values:  [   0.         3600.83401675 3749.67189799  441.58769553]
New Q values:  [   0.         3600.83401675 4744.72195498  441.58769553]
Reward: -1  Episode Reward:  45
xxxxx
x g x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2929.17100536  4980.01619421   606.149024   10818.17731927]
------
Step:16, Action:West
State  216
Old Q Values:  [ 2929.17100536  4980.01619421   606.149024   10818.17731927]
New Q values:  [2929.17100536 4980.01619421  606.149024   4725.53725377]
Reward: -1  Episode Reward:  44
xxxxx
x  gx
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[   0.         1329.5544202     0.          198.38683706]
------
Step:17, Action:South
State  205
Old Q Values:  [   0.         1329.5544202     0.          198.38683706]
New Q values:  [   0.         1488.46719176    0.          198.38683706]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 3.19081808e+03 2.09137007e+03]
------
Step:18, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  2642.40329808 15572.98574054]
New Q values:  [ 3134.5582149  -8521.23367799 63587.31267608 15572.98574054]
Reward: 100009  Episode Reward:  100052
xxxxx
x   x
x g x
x  ax
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2292.34596472 1352.37702619 3596.52174001  -12.17474163]
------
Step:1, Action:East
State  260
Old Q Values:  [ 3271.65878026 -5704.51612281  2692.51060431 -5679.36893145]
New Q values:  [ 3271.65878026 -5704.51612281  2149.47028662 -5679.36893145]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3169.5580622  -5807.06396197  3556.88681632  2673.47231393]
------
Step:2, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799 63587.31267608 15572.98574054]
New Q values:  [ 3134.5582149  -8521.23367799 27965.27642728 15572.98574054]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x.g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8416.50452283  1802.67044829 -5588.09647059  3277.00938165]
------
Step:3, Action:North
State  288
Old Q Values:  [ 8416.50452283  1802.67044829 -5588.09647059  3277.00938165]
New Q values:  [12129.48253     1802.67044829 -5588.09647059  3277.00938165]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x. ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[29191.60240288  6233.36404734   790.72804752  5103.37501425]
------
Step:4, Action:North
State  208
Old Q Values:  [ 6411.11717302  3138.4680197  -4584.50430574 -1049.83093042]
New Q values:  [28991.20266213  3138.4680197  -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377  8165.39291968  -180.00807518 88071.18597639]
------
Step:5, Action:West
State  130
Old Q Values:  [41234.48978377  8165.39291968  -180.00807518 88071.18597639]
New Q values:  [41234.48978377  8165.39291968  -180.00807518 73860.10248258]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 128754.09364007]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1607.22412622   560.51533626]
New Q values:  [ -281.736      -1150.91067548  1607.22412622   512.52083676]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -6764.65639938   943.04900753  -180.6       ]
------
Step:7, Action:East
State  107
Old Q Values:  [-252.35169558  760.71844312  910.73683547 -252.78192178]
New Q values:  [-252.35169558  760.71844312  845.86197205 -252.78192178]
Reward: -1  Episode Reward:  53
xxxxx
x a x
x.  x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1607.22412622   512.52083676]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1607.22412622   512.52083676]
New Q values:  [ -281.736      -1150.91067548  1254.14745842   512.52083676]
Reward: -1  Episode Reward:  52
xxxxx
x  ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02  2.03952603e+03 -3.22965309e-01  6.86523504e+02]
------
Step:9, Action:South
State  136
Old Q Values:  [  878.22269011  8773.03708742   660.86649319 -1803.08599325]
New Q values:  [  878.22269011 12205.97563361   660.86649319 -1803.08599325]
Reward: -1  Episode Reward:  51
xxxxx
x g x
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28991.20266213  3138.4680197  -4584.50430574 -1049.83093042]
------
Step:10, Action:North
State  208
Old Q Values:  [28991.20266213  3138.4680197  -4584.50430574 -1049.83093042]
New Q values:  [12207.73887278  3138.4680197  -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02  2.03952603e+03 -3.22965309e-01  6.86523504e+02]
------
Step:11, Action:South
State  130
Old Q Values:  [41234.48978377  8165.39291968  -180.00807518 73860.10248258]
New Q values:  [41234.48978377  6927.8788297   -180.00807518 73860.10248258]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12207.73887278  3138.4680197  -4584.50430574 -1049.83093042]
------
Step:12, Action:North
State  208
Old Q Values:  [12207.73887278  3138.4680197  -4584.50430574 -1049.83093042]
New Q values:  [ 8544.28823919  3138.4680197  -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  48
xxxxx
xg ax
x.  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011 12205.97563361   660.86649319 -1803.08599325]
------
Step:13, Action:South
State  136
Old Q Values:  [  878.22269011 12205.97563361   660.86649319 -1803.08599325]
New Q values:  [  878.22269011  7445.0767252    660.86649319 -1803.08599325]
Reward: -1  Episode Reward:  47
xxxxx
x g x
x. ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8544.28823919  3138.4680197  -4584.50430574 -1049.83093042]
------
Step:14, Action:North
State  208
Old Q Values:  [ 8544.28823919  3138.4680197  -4584.50430574 -1049.83093042]
New Q values:  [ 4028.97310361  3138.4680197  -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  46
xxxxx
x  ax
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02  2.03952603e+03 -3.22965309e-01  6.86523504e+02]
------
Step:15, Action:South
State  138
Old Q Values:  [ 4.10582115e+02  2.03952603e+03 -3.22965309e-01  6.86523504e+02]
New Q values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  6.86523504e+02]
Reward: -10001  Episode Reward:  -9955
xxxxx
x   x
x. gx
x   x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 3271.65878026 -5704.51612281  2149.47028662 -5679.36893145]
------
Step:1, Action:North
State  260
Old Q Values:  [ 3271.65878026 -5704.51612281  2149.47028662 -5679.36893145]
New Q values:  [-2516.06428022 -5704.51612281  2149.47028662 -5679.36893145]
Reward: -9991  Episode Reward:  -9991
xxxxx
x ..x
xg..x
x ..x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799 27965.27642728 15572.98574054]
------
Step:1, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799 27965.27642728 15572.98574054]
New Q values:  [ 3134.5582149  -8521.23367799 14830.35532991 15572.98574054]
Reward: 9  Episode Reward:  9
xxxxx
x. .x
x.g.x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12129.48253     1802.67044829 -5588.09647059  3277.00938165]
------
Step:2, Action:North
State  288
Old Q Values:  [12129.48253     1802.67044829 -5588.09647059  3277.00938165]
New Q values:  [   65.88494308  1802.67044829 -5588.09647059  3277.00938165]
Reward: -9991  Episode Reward:  -9982
xxxxx
x. .x
x..gx
x.  x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1213.24751413  431.25952337 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 3058.18426845 1796.41351142    0.        ]
New Q values:  [ 221.30610858 2285.03699193 1796.41351142    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 883.67312173 2257.63722641 3521.21094849 1554.80203889]
------
Step:2, Action:East
State  183
Old Q Values:  [ 883.67312173 2257.63722641 3521.21094849 1554.80203889]
New Q values:  [  883.67312173  2257.63722641 12903.28936961  1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[38298.01663404  6511.46529034  5981.269706    2179.39995143]
------
Step:3, Action:North
State  198
Old Q Values:  [-2.78872080e-01 -2.00610230e+02  1.10490803e+04  0.00000000e+00]
New Q values:  [ 1156.11987161  -200.61022961 11049.08026411     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x a.x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[1761.89752936 3836.1047348     0.          503.49427758]
------
Step:4, Action:South
State  118
Old Q Values:  [1761.89752936 3836.1047348     0.          503.49427758]
New Q values:  [1761.89752936 4848.56597315    0.          503.49427758]
Reward: -1  Episode Reward:  26
xxxxx
x  .x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[ 1156.11987161  -200.61022961 11049.08026411     0.        ]
------
Step:5, Action:East
State  198
Old Q Values:  [ 1156.11987161  -200.61022961 11049.08026411     0.        ]
New Q values:  [1156.11987161 -200.61022961 6317.9675351     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x  .x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2522.02841669 6329.78476486    0.          930.00701399]
------
Step:6, Action:South
State  216
Old Q Values:  [2929.17100536 4980.01619421  606.149024   4725.53725377]
New Q values:  [2929.17100536 2980.50929218  606.149024   4725.53725377]
Reward: 9  Episode Reward:  34
xxxxx
x  .x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308  1802.67044829 -5588.09647059  3277.00938165]
------
Step:7, Action:West
State  288
Old Q Values:  [   65.88494308  1802.67044829 -5588.09647059  3277.00938165]
New Q values:  [   65.88494308  1802.67044829 -5588.09647059  5988.09947482]
Reward: 9  Episode Reward:  43
xxxxx
x  .x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799 14830.35532991 15572.98574054]
------
Step:8, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 2.09137007e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 1.92090455e+03]
Reward: 9  Episode Reward:  52
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2292.34596472 1352.37702619 3596.52174001  -12.17474163]
------
Step:9, Action:East
State  261
Old Q Values:  [2292.34596472 1352.37702619 3596.52174001  -12.17474163]
New Q values:  [2292.34596472 1352.37702619 2505.0747409   -12.17474163]
Reward: -1  Episode Reward:  51
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3169.5580622  -5807.06396197  3556.88681632  2673.47231393]
------
Step:10, Action:East
State  276
Old Q Values:  [ 3169.5580622  -5807.06396197  3556.88681632  2673.47231393]
New Q values:  [ 3169.5580622  -5807.06396197  3218.58456897  2673.47231393]
Reward: -1  Episode Reward:  50
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308  1802.67044829 -5588.09647059  5988.09947482]
------
Step:11, Action:West
State  288
Old Q Values:  [   65.88494308  1802.67044829 -5588.09647059  5988.09947482]
New Q values:  [   65.88494308  1802.67044829 -5588.09647059  3360.21516062]
Reward: -1  Episode Reward:  49
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3169.5580622  -5807.06396197  3218.58456897  2673.47231393]
------
Step:12, Action:East
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 3.19081808e+03 1.92090455e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 2.28379178e+03 1.92090455e+03]
Reward: -1  Episode Reward:  48
xxxxx
x  .x
x  gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308  1802.67044829 -5588.09647059  3360.21516062]
------
Step:13, Action:West
State  288
Old Q Values:  [   65.88494308  1802.67044829 -5588.09647059  3360.21516062]
New Q values:  [   65.88494308  1802.67044829 -5588.09647059  2309.06143494]
Reward: -1  Episode Reward:  47
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3169.5580622  -5807.06396197  3218.58456897  2673.47231393]
------
Step:14, Action:East
State  276
Old Q Values:  [ 3169.5580622  -5807.06396197  3218.58456897  2673.47231393]
New Q values:  [ 3169.5580622  -5807.06396197  1979.55225807  2673.47231393]
Reward: -1  Episode Reward:  46
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308  1802.67044829 -5588.09647059  2309.06143494]
------
Step:15, Action:South
State  288
Old Q Values:  [   65.88494308  1802.67044829 -5588.09647059  2309.06143494]
New Q values:  [   65.88494308  1233.1866098  -5588.09647059  2309.06143494]
Reward: -301  Episode Reward:  -255
xxxxx
x  .x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308  1233.1866098  -5588.09647059  2309.06143494]
------
Step:16, Action:West
State  288
Old Q Values:  [   65.88494308  1233.1866098  -5588.09647059  2309.06143494]
New Q values:  [   65.88494308  1233.1866098  -5588.09647059  1873.89199264]
Reward: -1  Episode Reward:  -256
xxxxx
x  .x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 3169.5580622  -5807.06396197  1979.55225807  2673.47231393]
------
Step:17, Action:North
State  276
Old Q Values:  [ 3169.5580622  -5807.06396197  1979.55225807  2673.47231393]
New Q values:  [ 5485.67930291 -5807.06396197  1979.55225807  2673.47231393]
Reward: -1  Episode Reward:  -257
xxxxx
xg .x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ -534.40699599  2158.52799082 14061.52026008   231.67262594]
------
Step:18, Action:East
State  196
Old Q Values:  [ -534.40699599  2158.52799082 14061.52026008   231.67262594]
New Q values:  [-534.40699599 2158.52799082 6832.70003511  231.67262594]
Reward: -1  Episode Reward:  -258
xxxxx
x  .x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4028.97310361  3138.4680197  -4584.50430574 -1049.83093042]
------
Step:19, Action:North
State  208
Old Q Values:  [ 4028.97310361  3138.4680197  -4584.50430574 -1049.83093042]
New Q values:  [79951.79781397  3138.4680197  -4584.50430574 -1049.83093042]
Reward: 100009  Episode Reward:  99751
xxxxx
xg ax
x   x
x   x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1909.515118   -2165.66138672  -180.6       ]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869 1213.24751413  431.25952337 -120.29354603]
New Q values:  [-177.44732869 1675.70335915  431.25952337 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.73488392e+03 1.78073141e+03 3.95001451e+03 3.33862213e+00]
------
Step:2, Action:East
State  183
Old Q Values:  [  883.67312173  2257.63722641 12903.28936961  1554.80203889]
New Q values:  [ 883.67312173 2257.63722641 6544.18479152 1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x . x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 3.76178822e+03 1.42261918e+03 4.59156348e+03]
------
Step:3, Action:West
State  201
Old Q Values:  [ 613.33320563 6915.25862153 -789.02220255 1084.48089466]
New Q values:  [ 613.33320563 6915.25862153 -789.02220255 1311.30124863]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 533.05203844 2927.0296359  1843.56468795  154.04646645]
------
Step:4, Action:South
State  183
Old Q Values:  [ 883.67312173 2257.63722641 6544.18479152 1554.80203889]
New Q values:  [ 883.67312173 1659.97731283 6544.18479152 1554.80203889]
Reward: 9  Episode Reward:  26
xxxxx
x . x
x  .x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2292.34596472 1352.37702619 2505.0747409   -12.17474163]
------
Step:5, Action:East
State  261
Old Q Values:  [2292.34596472 1352.37702619 2505.0747409   -12.17474163]
New Q values:  [ 2292.34596472  1352.37702619 -3346.86631277   -12.17474163]
Reward: -9991  Episode Reward:  -9965
xxxxx
x . x
x  .x
x g.x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1675.70335915  431.25952337 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 2285.03699193 1796.41351142    0.        ]
New Q values:  [ 221.30610858 2104.41915027 1796.41351142    0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.73488392e+03 1.78073141e+03 3.95001451e+03 3.33862213e+00]
------
Step:2, Action:South
State  183
Old Q Values:  [ 883.67312173 1659.97731283 6544.18479152 1554.80203889]
New Q values:  [ 883.67312173 1357.09471455 6544.18479152 1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x . x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2292.34596472  1352.37702619 -3346.86631277   -12.17474163]
------
Step:3, Action:North
State  261
Old Q Values:  [ 2292.34596472  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 2879.59382335  1352.37702619 -3346.86631277   -12.17474163]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xa. x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 883.67312173 1357.09471455 6544.18479152 1554.80203889]
------
Step:4, Action:East
State  183
Old Q Values:  [ 883.67312173 1357.09471455 6544.18479152 1554.80203889]
New Q values:  [  883.67312173  1357.09471455 14112.47890682  1554.80203889]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[38298.01663404  6511.46529034  5981.269706    2179.39995143]
------
Step:5, Action:North
State  198
Old Q Values:  [1156.11987161 -200.61022961 6317.9675351     0.        ]
New Q values:  [1922.41774059 -200.61022961 6317.9675351     0.        ]
Reward: 9  Episode Reward:  35
xxxxx
x a.x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[1761.89752936 4848.56597315    0.          503.49427758]
------
Step:6, Action:South
State  118
Old Q Values:  [1761.89752936 4848.56597315    0.          503.49427758]
New Q values:  [1761.89752936 3834.21664979    0.          503.49427758]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[1922.41774059 -200.61022961 6317.9675351     0.        ]
------
Step:7, Action:East
State  196
Old Q Values:  [-534.40699599 2158.52799082 6832.70003511  231.67262594]
New Q values:  [-534.40699599 2158.52799082 4150.14119018  231.67262594]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2929.17100536 2980.50929218  606.149024   4725.53725377]
------
Step:8, Action:West
State  216
Old Q Values:  [2929.17100536 2980.50929218  606.149024   4725.53725377]
New Q values:  [ 2929.17100536  2980.50929218   606.149024   -2865.34274144]
Reward: -10001  Episode Reward:  -9968
xxxxx
x  .x
x g x
x ..x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.73488392e+03 1.78073141e+03 3.95001451e+03 3.33862213e+00]
------
Step:1, Action:East
State  189
Old Q Values:  [ 533.05203844 2927.0296359  1843.56468795  154.04646645]
New Q values:  [ 533.05203844 2927.0296359  2817.40346164  154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 6915.25862153 -789.02220255 1311.30124863]
------
Step:2, Action:South
State  196
Old Q Values:  [-534.40699599 2158.52799082 4150.14119018  231.67262594]
New Q values:  [-534.40699599 2514.5149872  4150.14119018  231.67262594]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[ 5485.67930291 -5807.06396197  1979.55225807  2673.47231393]
------
Step:3, Action:North
State  276
Old Q Values:  [ 5485.67930291 -5807.06396197  1979.55225807  2673.47231393]
New Q values:  [-2561.28592178 -5807.06396197  1979.55225807  2673.47231393]
Reward: -10001  Episode Reward:  -9983
xxxxx
x. .x
x g.x
x. .x
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308  1233.1866098  -5588.09647059  1873.89199264]
------
Step:1, Action:West
State  288
Old Q Values:  [   65.88494308  1233.1866098  -5588.09647059  1873.89199264]
New Q values:  [   65.88494308  1233.1866098  -5588.09647059  5426.85251922]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x ..x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799 14830.35532991 15572.98574054]
------
Step:2, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799 14830.35532991 15572.98574054]
New Q values:  [ 3134.5582149  -8521.23367799  7559.59788773 15572.98574054]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x ..x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308  1233.1866098  -5588.09647059  5426.85251922]
------
Step:3, Action:South
State  288
Old Q Values:  [   65.88494308  1233.1866098  -5588.09647059  5426.85251922]
New Q values:  [   65.88494308 -4059.26960032 -5588.09647059  5426.85251922]
Reward: -10301  Episode Reward:  -10293
xxxxx
x...x
x ..x
x. gx
xxxxx
xxxxx
x...x
x...x
x.agx
xxxxx
Step:1, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  7559.59788773 15572.98574054]
New Q values:  [ 3134.5582149  -8521.23367799  7559.59788773  7098.47244322]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x...x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2879.59382335  1352.37702619 -3346.86631277   -12.17474163]
------
Step:2, Action:North
State  260
Old Q Values:  [-2516.06428022 -5704.51612281  2149.47028662 -5679.36893145]
New Q values:  [ 1012.41752329 -5704.51612281  2149.47028662 -5679.36893145]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xa..x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  6711.47745126     0.        ]
------
Step:3, Action:East
State  183
Old Q Values:  [  883.67312173  1357.09471455 14112.47890682  1554.80203889]
New Q values:  [  883.67312173  1357.09471455 17139.79655294  1554.80203889]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x a.x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[38298.01663404  6511.46529034  5981.269706    2179.39995143]
------
Step:4, Action:North
State  196
Old Q Values:  [-534.40699599 2514.5149872  4150.14119018  231.67262594]
New Q values:  [38417.86529363  2514.5149872   4150.14119018   231.67262594]
Reward: 9  Episode Reward:  36
xxxxx
x.a.x
x g.x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 128754.09364007]
------
Step:5, Action:West
State  118
Old Q Values:  [1761.89752936 3834.21664979    0.          503.49427758]
New Q values:  [1761.89752936 3834.21664979    0.         3200.20664299]
Reward: 9  Episode Reward:  45
xxxxx
xa .x
xg .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        9978.02977319 2846.56389321 -180.6       ]
------
Step:6, Action:East
State  103
Old Q Values:  [ 221.30610858 2104.41915027 1796.41351142    0.        ]
New Q values:  [ 221.30610858 2104.41915027 1868.2303995     0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x a.x
x g.x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[1761.89752936 3834.21664979    0.         3200.20664299]
------
Step:7, Action:West
State  119
Old Q Values:  [  0.        465.4216645   0.          0.       ]
New Q values:  [  0.         465.4216645    0.         630.72574508]
Reward: -1  Episode Reward:  43
xxxxx
xa .x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2104.41915027 1868.2303995     0.        ]
------
Step:8, Action:South
State  103
Old Q Values:  [ 221.30610858 2104.41915027 1868.2303995     0.        ]
New Q values:  [ 221.30610858 5983.10662599 1868.2303995     0.        ]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  883.67312173  1357.09471455 17139.79655294  1554.80203889]
------
Step:9, Action:East
State  181
Old Q Values:  [1.73488392e+03 1.78073141e+03 3.95001451e+03 3.33862213e+00]
New Q values:  [1734.88392142 1780.73141042 1997.75624973    3.33862213]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[-5833.78831344  1394.50148356 -5538.30598082   403.06255908]
------
Step:10, Action:South
State  199
Old Q Values:  [   14.86214194  1549.78616729 11260.80108661  1915.70494401]
New Q values:  [   14.86214194  1304.45200084 11260.80108661  1915.70494401]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 0.00000000e+00 2.28379178e+03 1.92090455e+03]
------
Step:11, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 2.28379178e+03 1.92090455e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 2.28379178e+03 1.63163997e+03]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2879.59382335  1352.37702619 -3346.86631277   -12.17474163]
------
Step:12, Action:North
State  261
Old Q Values:  [ 2879.59382335  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 1750.56440426  1352.37702619 -3346.86631277   -12.17474163]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1734.88392142 1780.73141042 1997.75624973    3.33862213]
------
Step:13, Action:South
State  181
Old Q Values:  [1734.88392142 1780.73141042 1997.75624973    3.33862213]
New Q values:  [1734.88392142 1236.86188545 1997.75624973    3.33862213]
Reward: -1  Episode Reward:  37
xxxxx
x g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1750.56440426  1352.37702619 -3346.86631277   -12.17474163]
------
Step:14, Action:North
State  260
Old Q Values:  [ 1012.41752329 -5704.51612281  2149.47028662 -5679.36893145]
New Q values:  [ 2574.23921699 -5704.51612281  2149.47028662 -5679.36893145]
Reward: -1  Episode Reward:  36
xxxxx
xg .x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 4507.91302948  3060.24711158  7232.90735891 -4966.32149798]
------
Step:15, Action:East
State  180
Old Q Values:  [ 4507.91302948  3060.24711158  7232.90735891 -4966.32149798]
New Q values:  [ 4507.91302948  3060.24711158 14417.92253165 -4966.32149798]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[38417.86529363  2514.5149872   4150.14119018   231.67262594]
------
Step:16, Action:North
State  196
Old Q Values:  [38417.86529363  2514.5149872   4150.14119018   231.67262594]
New Q values:  [16516.81111239  2514.5149872   4150.14119018   231.67262594]
Reward: -1  Episode Reward:  34
xxxxx
x a.x
x g.x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[1761.89752936 3834.21664979    0.         3200.20664299]
------
Step:17, Action:West
State  118
Old Q Values:  [1761.89752936 3834.21664979    0.         3200.20664299]
New Q values:  [1761.89752936 3834.21664979    0.         4272.89158915]
Reward: -1  Episode Reward:  33
xxxxx
xa .x
xg .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        9978.02977319 2846.56389321 -180.6       ]
------
Step:18, Action:East
State  102
Old Q Values:  [-180.6        9978.02977319 2846.56389321 -180.6       ]
New Q values:  [-180.6        9978.02977319 2419.89303403 -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x a.x
x  .x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[1761.89752936 3834.21664979    0.         4272.89158915]
------
Step:19, Action:West
State  118
Old Q Values:  [1761.89752936 3834.21664979    0.         4272.89158915]
New Q values:  [1761.89752936 3834.21664979    0.         4701.96556762]
Reward: -1  Episode Reward:  31
xxxxx
xa .x
xg .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        9978.02977319 2419.89303403 -180.6       ]
------
Step:20, Action:East
State  100
Old Q Values:  [   0.         4228.78310806  493.77732793    0.        ]
New Q values:  [   0.         4228.78310806  507.82710548    0.        ]
Reward: -1  Episode Reward:  30
xxxxx
xga.x
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:NW
[   0.         1036.38724771    0.            0.        ]
------
Step:21, Action:South
State  116
Old Q Values:  [   0.         1036.38724771    0.            0.        ]
New Q values:  [   0.        5368.9982328    0.           0.       ]
Reward: -1  Episode Reward:  29
xxxxx
x g.x
x a.x
x   x
xxxxx
Step:22, Action:East
State  196
Old Q Values:  [16516.81111239  2514.5149872   4150.14119018   231.67262594]
New Q values:  [16516.81111239  2514.5149872  25650.99582026   231.67262594]
Reward: 9  Episode Reward:  38
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[79951.79781397  3138.4680197  -4584.50430574 -1049.83093042]
------
Step:23, Action:North
State  208
Old Q Values:  [79951.79781397  3138.4680197  -4584.50430574 -1049.83093042]
New Q values:  [114144.14987036   3138.4680197   -4584.50430574  -1049.83093042]
Reward: 100009  Episode Reward:  100047
xxxxx
x  ax
xg  x
x   x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 5.07231947e+03 4.92263035e+03 2.45392999e+03]
------
Step:1, Action:South
State  192
Old Q Values:  [3.89777037e-01 5.07231947e+03 4.92263035e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 4.30220715e+03 4.92263035e+03 2.45392999e+03]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799  7559.59788773  7098.47244322]
------
Step:2, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  7559.59788773  7098.47244322]
New Q values:  [ 3134.5582149  -8521.23367799  4657.29491086  7098.47244322]
Reward: 9  Episode Reward:  18
xxxxx
x...x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308 -4059.26960032 -5588.09647059  5426.85251922]
------
Step:3, Action:West
State  288
Old Q Values:  [   65.88494308 -4059.26960032 -5588.09647059  5426.85251922]
New Q values:  [   65.88494308 -4059.26960032 -5588.09647059  4299.68274065]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
x  .x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799  4657.29491086  7098.47244322]
------
Step:4, Action:West
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  1979.55225807  2673.47231393]
New Q values:  [-2561.28592178 -5807.06396197  1979.55225807  1599.95824685]
Reward: 9  Episode Reward:  26
xxxxx
x.g.x
x  .x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1750.56440426  1352.37702619 -3346.86631277   -12.17474163]
------
Step:5, Action:North
State  260
Old Q Values:  [ 2574.23921699 -5704.51612281  2149.47028662 -5679.36893145]
New Q values:  [ 5354.47244629 -5704.51612281  2149.47028662 -5679.36893145]
Reward: -1  Episode Reward:  25
xxxxx
xg..x
xa .x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 4507.91302948  3060.24711158 14417.92253165 -4966.32149798]
------
Step:6, Action:East
State  180
Old Q Values:  [ 4507.91302948  3060.24711158 14417.92253165 -4966.32149798]
New Q values:  [ 4507.91302948  3060.24711158  7243.35811816 -4966.32149798]
Reward: -1  Episode Reward:  24
xxxxx
x...x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.30220715e+03 4.92263035e+03 2.45392999e+03]
------
Step:7, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.30220715e+03 4.92263035e+03 2.45392999e+03]
New Q values:  [3.89777037e-01 4.30220715e+03 3.62176971e+04 2.45392999e+03]
Reward: 9  Episode Reward:  33
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[114144.14987036   3138.4680197   -4584.50430574  -1049.83093042]
------
Step:8, Action:North
State  208
Old Q Values:  [114144.14987036   3138.4680197   -4584.50430574  -1049.83093042]
New Q values:  [63997.86852068  3138.4680197  -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  42
xxxxx
x.gax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[39176.10440381  7449.59323443 -8652.84       61116.02857511]
------
Step:9, Action:North
State  128
Old Q Values:  [39176.10440381  7449.59323443 -8652.84       61116.02857511]
New Q values:  [33824.65033405  7449.59323443 -8652.84       61116.02857511]
Reward: -301  Episode Reward:  -259
xxxxx
xg.ax
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:NW
[33824.65033405  7449.59323443 -8652.84       61116.02857511]
------
Step:10, Action:West
State  128
Old Q Values:  [33824.65033405  7449.59323443 -8652.84       61116.02857511]
New Q values:  [33824.65033405  7449.59323443 -8652.84       56592.57475141]
Reward: -9991  Episode Reward:  -10250
xxxxx
x.g x
x   x
x   x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.30220715e+03 3.62176971e+04 2.45392999e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [  169.9257398  23608.78227304  3353.15003737   610.93635926]
New Q values:  [  169.9257398  23608.78227304  2240.8128026    610.93635926]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2929.17100536  2980.50929218   606.149024   -2865.34274144]
------
Step:2, Action:South
State  208
Old Q Values:  [63997.86852068  3138.4680197  -4584.50430574 -1049.83093042]
New Q values:  [63997.86852068  2550.69203008 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308 -4059.26960032 -5588.09647059  4299.68274065]
------
Step:3, Action:West
State  288
Old Q Values:  [   65.88494308 -4059.26960032 -5588.09647059  4299.68274065]
New Q values:  [   65.88494308 -4059.26960032 -5588.09647059  3854.81482923]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799  4657.29491086  7098.47244322]
------
Step:4, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  4657.29491086  7098.47244322]
New Q values:  [ 3134.5582149  -8521.23367799  4657.29491086  4451.13071118]
Reward: 9  Episode Reward:  36
xxxxx
xg..x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 5354.47244629 -5704.51612281  2149.47028662 -5679.36893145]
------
Step:5, Action:North
State  261
Old Q Values:  [ 1750.56440426  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 1304.95263662  1352.37702619 -3346.86631277   -12.17474163]
Reward: 9  Episode Reward:  45
xxxxx
x g.x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1734.88392142 1236.86188545 1997.75624973    3.33862213]
------
Step:6, Action:East
State  181
Old Q Values:  [1734.88392142 1236.86188545 1997.75624973    3.33862213]
New Q values:  [1734.88392142 1236.86188545 2848.73550347    3.33862213]
Reward: -1  Episode Reward:  44
xxxxx
x .gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-1649.89560358  6834.11001193  5348.19728675   767.35890262]
------
Step:7, Action:South
State  196
Old Q Values:  [16516.81111239  2514.5149872  25650.99582026   231.67262594]
New Q values:  [16516.81111239  1599.0716723  25650.99582026   231.67262594]
Reward: -1  Episode Reward:  43
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  1979.55225807  1599.95824685]
------
Step:8, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  4657.29491086  4451.13071118]
New Q values:  [ 3134.5582149  -8521.23367799  3018.76241311  4451.13071118]
Reward: -1  Episode Reward:  42
xxxxx
xg..x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308 -4059.26960032 -5588.09647059  3854.81482923]
------
Step:9, Action:West
State  288
Old Q Values:  [   65.88494308 -4059.26960032 -5588.09647059  3854.81482923]
New Q values:  [   65.88494308 -4059.26960032 -5588.09647059  2135.19160911]
Reward: -1  Episode Reward:  41
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  1979.55225807  1599.95824685]
------
Step:10, Action:East
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  3018.76241311  4451.13071118]
New Q values:  [ 3134.5582149  -8521.23367799  1847.46244798  4451.13071118]
Reward: -1  Episode Reward:  40
xxxxx
xg..x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308 -4059.26960032 -5588.09647059  2135.19160911]
------
Step:11, Action:West
State  288
Old Q Values:  [   65.88494308 -4059.26960032 -5588.09647059  2135.19160911]
New Q values:  [   65.88494308 -4059.26960032 -5588.09647059  2188.815857  ]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3134.5582149  -8521.23367799  1847.46244798  4451.13071118]
------
Step:12, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  1847.46244798  4451.13071118]
New Q values:  [ 3134.5582149  -8521.23367799  1847.46244798  3386.19401836]
Reward: -1  Episode Reward:  38
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 5354.47244629 -5704.51612281  2149.47028662 -5679.36893145]
------
Step:13, Action:North
State  260
Old Q Values:  [ 5354.47244629 -5704.51612281  2149.47028662 -5679.36893145]
New Q values:  [-1685.80358604 -5704.51612281  2149.47028662 -5679.36893145]
Reward: -10001  Episode Reward:  -9963
xxxxx
x ..x
xg  x
x   x
xxxxx
xxxxx
x..gx
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308 -4059.26960032 -5588.09647059  2188.815857  ]
------
Step:1, Action:West
State  288
Old Q Values:  [   65.88494308 -4059.26960032 -5588.09647059  2188.815857  ]
New Q values:  [   65.88494308 -4059.26960032 -5588.09647059  2632.85997153]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 5839.77876242 5762.50802749]
------
Step:2, Action:East
State  273
Old Q Values:  [1637.72437281 1974.75214244 5839.77876242 5762.50802749]
New Q values:  [ 1637.72437281  1974.75214244 -2874.83050357  5762.50802749]
Reward: -10001  Episode Reward:  -9992
xxxxx
x.. x
x...x
x. gx
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1734.88392142 1236.86188545 2848.73550347    3.33862213]
------
Step:1, Action:East
State  189
Old Q Values:  [ 533.05203844 2927.0296359  2817.40346164  154.04646645]
New Q values:  [ 533.05203844 2927.0296359  3206.93897112  154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[ 613.33320563 6915.25862153 -789.02220255 1311.30124863]
------
Step:2, Action:South
State  196
Old Q Values:  [16516.81111239  1599.0716723  25650.99582026   231.67262594]
New Q values:  [16516.81111239  1238.89434634 25650.99582026   231.67262594]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  1979.55225807  1599.95824685]
------
Step:3, Action:East
State  273
Old Q Values:  [ 1637.72437281  1974.75214244 -2874.83050357  5762.50802749]
New Q values:  [1637.72437281 1974.75214244 -354.67420997 5762.50802749]
Reward: 9  Episode Reward:  27
xxxxx
x. gx
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308 -4059.26960032 -5588.09647059  2632.85997153]
------
Step:4, Action:West
State  288
Old Q Values:  [   65.88494308 -4059.26960032 -5588.09647059  2632.85997153]
New Q values:  [   65.88494308 -4059.26960032 -5588.09647059  2781.29639686]
Reward: -1  Episode Reward:  26
xxxxx
x. .x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 -354.67420997 5762.50802749]
------
Step:5, Action:West
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 2.28379178e+03 1.63163997e+03]
New Q values:  [1.64433000e+00 0.00000000e+00 2.28379178e+03 1.06376909e+03]
Reward: 9  Episode Reward:  35
xxxxx
x. .x
x  .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1304.95263662  1352.37702619 -3346.86631277   -12.17474163]
------
Step:6, Action:South
State  257
Old Q Values:  [36289.45311927 12764.58618105 13169.98702937  1875.31501677]
New Q values:  [36289.45311927 15812.0704082  13169.98702937  1875.31501677]
Reward: -301  Episode Reward:  -266
xxxxx
x. .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[36289.45311927 15812.0704082  13169.98702937  1875.31501677]
------
Step:7, Action:North
State  261
Old Q Values:  [ 1304.95263662  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 1376.00170569  1352.37702619 -3346.86631277   -12.17474163]
Reward: -1  Episode Reward:  -267
xxxxx
x. .x
xag.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1734.88392142 1236.86188545 2848.73550347    3.33862213]
------
Step:8, Action:North
State  180
Old Q Values:  [ 4507.91302948  3060.24711158  7243.35811816 -4966.32149798]
New Q values:  [ 4801.97414375  3060.24711158  7243.35811816 -4966.32149798]
Reward: 9  Episode Reward:  -258
xxxxx
xa .x
xg .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        9978.02977319 2419.89303403 -180.6       ]
------
Step:9, Action:East
State  100
Old Q Values:  [   0.         4228.78310806  507.82710548    0.        ]
New Q values:  [   0.         4228.78310806 1813.23031203    0.        ]
Reward: -1  Episode Reward:  -259
xxxxx
xga.x
x  .x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:NW
[   0.        5368.9982328    0.           0.       ]
------
Step:10, Action:South
State  118
Old Q Values:  [1761.89752936 3834.21664979    0.         4701.96556762]
New Q values:  [1761.89752936 9228.38540599    0.         4701.96556762]
Reward: -1  Episode Reward:  -260
xxxxx
x  .x
xga.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[16516.81111239  1238.89434634 25650.99582026   231.67262594]
------
Step:11, Action:East
State  198
Old Q Values:  [1922.41774059 -200.61022961 6317.9675351     0.        ]
New Q values:  [ 1922.41774059  -200.61022961 11290.0677349      0.        ]
Reward: 9  Episode Reward:  -251
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[29191.60240288  6233.36404734   790.72804752  5103.37501425]
------
Step:12, Action:North
State  208
Old Q Values:  [63997.86852068  2550.69203008 -4584.50430574 -1049.83093042]
New Q values:  [107762.57815304   2550.69203008  -4584.50430574  -1049.83093042]
Reward: 100009  Episode Reward:  99758
xxxxx
x  ax
xg  x
x   x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1734.88392142 1236.86188545 2848.73550347    3.33862213]
------
Step:1, Action:East
State  189
Old Q Values:  [ 533.05203844 2927.0296359  3206.93897112  154.04646645]
New Q values:  [ 533.05203844 2927.0296359  8370.81027036  154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  23608.78227304  2240.8128026    610.93635926]
------
Step:2, Action:South
State  193
Old Q Values:  [-1649.89560358  6834.11001193  5348.19728675   767.35890262]
New Q values:  [-1649.89560358  4467.79641302  5348.19728675   767.35890262]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 -354.67420997 5762.50802749]
------
Step:3, Action:West
State  273
Old Q Values:  [1637.72437281 1974.75214244 -354.67420997 5762.50802749]
New Q values:  [1637.72437281 1974.75214244 -354.67420997 2723.2037227 ]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1376.00170569  1352.37702619 -3346.86631277   -12.17474163]
------
Step:4, Action:North
State  261
Old Q Values:  [ 1376.00170569  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 1404.42133332  1352.37702619 -3346.86631277   -12.17474163]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1734.88392142 1236.86188545 2848.73550347    3.33862213]
------
Step:5, Action:North
State  183
Old Q Values:  [  883.67312173  1357.09471455 17139.79655294  1554.80203889]
New Q values:  [  861.58025644  1357.09471455 17139.79655294  1554.80203889]
Reward: 9  Episode Reward:  35
xxxxx
xa. x
x  .x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1675.70335915  431.25952337 -120.29354603]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869 1675.70335915  431.25952337 -120.29354603]
New Q values:  [-177.44732869 1524.3019947   431.25952337 -120.29354603]
Reward: -1  Episode Reward:  34
xxxxx
x . x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1734.88392142 1236.86188545 2848.73550347    3.33862213]
------
Step:7, Action:North
State  181
Old Q Values:  [1734.88392142 1236.86188545 2848.73550347    3.33862213]
New Q values:  [2488.28555637 1236.86188545 2848.73550347    3.33862213]
Reward: -1  Episode Reward:  33
xxxxx
xa. x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 5983.10662599 1868.2303995     0.        ]
------
Step:8, Action:South
State  109
Old Q Values:  [ -241.10880094  1909.515118   -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1617.82669824 -2165.66138672  -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2488.28555637 1236.86188545 2848.73550347    3.33862213]
------
Step:9, Action:East
State  181
Old Q Values:  [2488.28555637 1236.86188545 2848.73550347    3.33862213]
New Q values:  [2.48828556e+03 1.23686189e+03 8.83419295e+03 3.33862213e+00]
Reward: -1  Episode Reward:  31
xxxxx
x g x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[16516.81111239  1238.89434634 25650.99582026   231.67262594]
------
Step:10, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.30220715e+03 3.62176971e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 4.30220715e+03 4.68212523e+04 2.45392999e+03]
Reward: 9  Episode Reward:  40
xxxxx
x . x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[107762.57815304   2550.69203008  -4584.50430574  -1049.83093042]
------
Step:11, Action:North
State  208
Old Q Values:  [107762.57815304   2550.69203008  -4584.50430574  -1049.83093042]
New Q values:  [65262.46200599  2550.69203008 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  39
xxxxx
x .ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377  6927.8788297   -180.00807518 73860.10248258]
------
Step:12, Action:West
State  138
Old Q Values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  6.86523504e+02]
New Q values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  2.02593654e+03]
Reward: 9  Episode Reward:  48
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 5819.75713236  350.00244198]
------
Step:13, Action:East
State  124
Old Q Values:  [   0.         1166.51141701 2055.02688029 1772.94838375]
New Q values:  [   0.         1166.51141701 3054.93376968 1772.94838375]
Reward: -1  Episode Reward:  47
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  7445.0767252    660.86649319 -1803.08599325]
------
Step:14, Action:South
State  136
Old Q Values:  [  878.22269011  7445.0767252    660.86649319 -1803.08599325]
New Q values:  [  878.22269011  3871.58347773   660.86649319 -1803.08599325]
Reward: -1  Episode Reward:  46
xxxxx
x  gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2929.17100536  2980.50929218   606.149024   -2865.34274144]
------
Step:15, Action:South
State  208
Old Q Values:  [65262.46200599  2550.69203008 -4584.50430574 -1049.83093042]
New Q values:  [65262.46200599 61860.06573109 -4584.50430574 -1049.83093042]
Reward: 100009  Episode Reward:  100055
xxxxx
x  gx
x   x
x  ax
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308 -4059.26960032 -5588.09647059  2781.29639686]
------
Step:1, Action:West
State  288
Old Q Values:  [   65.88494308 -4059.26960032 -5588.09647059  2781.29639686]
New Q values:  [   65.88494308 -4059.26960032 -5588.09647059  1934.87967555]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 -354.67420997 2723.2037227 ]
------
Step:2, Action:West
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  1847.46244798  3386.19401836]
New Q values:  [ 3134.5582149  -8521.23367799  1847.46244798  1781.20400734]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x...x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1404.42133332  1352.37702619 -3346.86631277   -12.17474163]
------
Step:3, Action:North
State  260
Old Q Values:  [-1685.80358604 -5704.51612281  2149.47028662 -5679.36893145]
New Q values:  [ 1504.08600103 -5704.51612281  2149.47028662 -5679.36893145]
Reward: 9  Episode Reward:  27
xxxxx
xg .x
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 4801.97414375  3060.24711158  7243.35811816 -4966.32149798]
------
Step:4, Action:East
State  180
Old Q Values:  [ 4801.97414375  3060.24711158  7243.35811816 -4966.32149798]
New Q values:  [ 4801.97414375  3060.24711158 10598.04199334 -4966.32149798]
Reward: 9  Episode Reward:  36
xxxxx
xg .x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[16516.81111239  1238.89434634 25650.99582026   231.67262594]
------
Step:5, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.30220715e+03 4.68212523e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 4.30220715e+03 3.83126395e+04 2.45392999e+03]
Reward: 9  Episode Reward:  45
xxxxx
x. .x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[65262.46200599 61860.06573109 -4584.50430574 -1049.83093042]
------
Step:6, Action:North
State  208
Old Q Values:  [65262.46200599 61860.06573109 -4584.50430574 -1049.83093042]
New Q values:  [48268.41554717 61860.06573109 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  54
xxxxx
x. ax
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377  6927.8788297   -180.00807518 73860.10248258]
------
Step:7, Action:West
State  130
Old Q Values:  [41234.48978377  6927.8788297   -180.00807518 73860.10248258]
New Q values:  [41234.48978377  6927.8788297   -180.00807518 68169.66908505]
Reward: -1  Episode Reward:  53
xxxxx
x.a x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 128754.09364007]
------
Step:8, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   49543.89769946 128754.09364007]
New Q values:  [  -180.6          3557.6642036   49543.89769946 129190.58891738]
Reward: 100009  Episode Reward:  100062
xxxxx
xa  x
x g x
x   x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 -354.67420997 2723.2037227 ]
------
Step:1, Action:West
State  273
Old Q Values:  [1637.72437281 1974.75214244 -354.67420997 2723.2037227 ]
New Q values:  [1637.72437281 1974.75214244 -354.67420997 1516.00788908]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
x.. x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1404.42133332  1352.37702619 -3346.86631277   -12.17474163]
------
Step:2, Action:North
State  261
Old Q Values:  [ 1404.42133332  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 3217.42641757  1352.37702619 -3346.86631277   -12.17474163]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
xa. x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2.48828556e+03 1.23686189e+03 8.83419295e+03 3.33862213e+00]
------
Step:3, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039   4469.47019812     0.        ]
New Q values:  [60476.05138135 21430.9929039   3397.64726527     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x..gx
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-1649.89560358  4467.79641302  5348.19728675   767.35890262]
------
Step:4, Action:East
State  193
Old Q Values:  [-1649.89560358  4467.79641302  5348.19728675   767.35890262]
New Q values:  [-1649.89560358  4467.79641302 14696.69863402   767.35890262]
Reward: -10001  Episode Reward:  -9974
xxxxx
x...x
x  gx
x  .x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[38298.01663404  6511.46529034  5981.269706    2179.39995143]
------
Step:1, Action:North
State  194
Old Q Values:  [38298.01663404  6511.46529034  5981.269706    2179.39995143]
New Q values:  [17070.53379332  6511.46529034  5981.269706    2179.39995143]
Reward: 9  Episode Reward:  9
xxxxx
x.a.x
x. .x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 5819.75713236  350.00244198]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1254.14745842   512.52083676]
New Q values:  [ -281.736      -1150.91067548  1114.83994582   512.52083676]
Reward: 9  Episode Reward:  18
xxxxx
x. ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  2.02593654e+03]
------
Step:3, Action:West
State  138
Old Q Values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  2.02593654e+03]
New Q values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  1.14422660e+03]
Reward: -1  Episode Reward:  17
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1114.83994582   512.52083676]
------
Step:4, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1114.83994582   512.52083676]
New Q values:  [ -281.736      -1150.91067548   788.60395843   512.52083676]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  1.14422660e+03]
------
Step:5, Action:West
State  138
Old Q Values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  1.14422660e+03]
New Q values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  6.93671828e+02]
Reward: -1  Episode Reward:  15
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   788.60395843   512.52083676]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   788.60395843   512.52083676]
New Q values:  [ -281.736      -1150.91067548   522.94313167   512.52083676]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  6.93671828e+02]
------
Step:7, Action:West
State  138
Old Q Values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  6.93671828e+02]
New Q values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  4.33751671e+02]
Reward: -1  Episode Reward:  13
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   522.94313167   512.52083676]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   522.94313167   512.52083676]
New Q values:  [ -281.736      -1150.91067548   338.70275384   512.52083676]
Reward: -1  Episode Reward:  12
xxxxx
x. ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  4.33751671e+02]
------
Step:9, Action:West
State  138
Old Q Values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  4.33751671e+02]
New Q values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  8.86511900e+02]
Reward: -1  Episode Reward:  11
xxxxx
x.a x
x. .x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2378.70410583   867.03619284]
------
Step:10, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   338.70275384   512.52083676]
New Q values:  [ -281.736      -1150.91067548   400.83467153   512.52083676]
Reward: -1  Episode Reward:  10
xxxxx
x. ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  8.86511900e+02]
------
Step:11, Action:West
State  138
Old Q Values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  8.86511900e+02]
New Q values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  1.06761599e+03]
Reward: -1  Episode Reward:  9
xxxxx
x.a x
x. .x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  2378.70410583   867.03619284]
------
Step:12, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  2378.70410583   867.03619284]
New Q values:  [ -253.44886264 -1902.20915811  1271.16643985   867.03619284]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  1.06761599e+03]
------
Step:13, Action:West
State  138
Old Q Values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  1.06761599e+03]
New Q values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  8.07796329e+02]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x. .x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811  1271.16643985   867.03619284]
------
Step:14, Action:East
State  123
Old Q Values:  [ -253.44886264 -1902.20915811  1271.16643985   867.03619284]
New Q values:  [ -253.44886264 -1902.20915811   750.20547454   867.03619284]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  8.07796329e+02]
------
Step:15, Action:West
State  136
Old Q Values:  [  878.22269011  3871.58347773   660.86649319 -1803.08599325]
New Q values:  [ 878.22269011 3871.58347773  660.86649319 -639.74056106]
Reward: -1  Episode Reward:  5
xxxxx
x.agx
x. .x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8209.41191864   273.64612079]
------
Step:16, Action:West
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   750.20547454   867.03619284]
New Q values:  [ -253.44886264 -1902.20915811   750.20547454   605.97306875]
Reward: 9  Episode Reward:  14
xxxxx
xa  x
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  760.71844312  845.86197205 -252.78192178]
------
Step:17, Action:East
State  107
Old Q Values:  [-252.35169558  760.71844312  845.86197205 -252.78192178]
New Q values:  [-252.35169558  760.71844312  562.80643118 -252.78192178]
Reward: -1  Episode Reward:  13
xxxxx
x a x
x. .x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[ -253.44886264 -1902.20915811   750.20547454   605.97306875]
------
Step:18, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   400.83467153   512.52083676]
New Q values:  [ -281.736      -1150.91067548   402.07276721   512.52083676]
Reward: -1  Episode Reward:  12
xxxxx
x  ax
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  8.07796329e+02]
------
Step:19, Action:West
State  138
Old Q Values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  8.07796329e+02]
New Q values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  5.47580174e+02]
Reward: -1  Episode Reward:  11
xxxxx
x a x
x. .x
x. gx
xxxxx
Step:20, Action:North
State  123
Old Q Values:  [ -253.44886264 -1902.20915811   750.20547454   605.97306875]
New Q values:  [  -56.91790269 -1902.20915811   750.20547454   605.97306875]
Reward: -301  Episode Reward:  -290
xxxxx
x a x
x. gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811   750.20547454   605.97306875]
------
Step:21, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   402.07276721   512.52083676]
New Q values:  [ -281.736      -1150.91067548   324.50315903   512.52083676]
Reward: -1  Episode Reward:  -291
xxxxx
x  ax
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  5.47580174e+02]
------
Step:22, Action:West
State  138
Old Q Values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  5.47580174e+02]
New Q values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  3.72188321e+02]
Reward: -1  Episode Reward:  -292
xxxxx
x a x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   324.50315903   512.52083676]
------
Step:23, Action:West
State  126
Old Q Values:  [   0.          331.64678262 5819.75713236  350.00244198]
New Q values:  [   0.          331.64678262 5819.75713236  601.69442602]
Reward: -1  Episode Reward:  -293
xxxxx
xa  x
x. .x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 1540.97816408 -180.6       ]
------
Step:24, Action:East
State  107
Old Q Values:  [-252.35169558  760.71844312  562.80643118 -252.78192178]
New Q values:  [-252.35169558  760.71844312  378.2788235  -252.78192178]
Reward: -1  Episode Reward:  -294
xxxxx
x a x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   324.50315903   512.52083676]
------
Step:25, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   324.50315903   512.52083676]
New Q values:  [ -281.736      -1150.91067548   324.50315903   432.62386764]
Reward: -1  Episode Reward:  -295
xxxxx
xa  x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  760.71844312  378.2788235  -252.78192178]
------
Step:26, Action:South
State  107
Old Q Values:  [-252.35169558  760.71844312  378.2788235  -252.78192178]
New Q values:  [-252.35169558  874.82149175  378.2788235  -252.78192178]
Reward: 9  Episode Reward:  -286
xxxxx
x   x
xa gx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 357.2991616     0.         1883.78038168 -178.98      ]
------
Step:27, Action:East
State  183
Old Q Values:  [  861.58025644  1357.09471455 17139.79655294  1554.80203889]
New Q values:  [  861.58025644  1357.09471455 10233.55894716  1554.80203889]
Reward: -1  Episode Reward:  -287
xxxxx
x   x
x a.x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[   14.86214194  1304.45200084 11260.80108661  1915.70494401]
------
Step:28, Action:East
State  201
Old Q Values:  [ 613.33320563 6915.25862153 -789.02220255 1311.30124863]
New Q values:  [  613.33320563  6915.25862153 -5416.05609337  1311.30124863]
Reward: -9991  Episode Reward:  -10278
xxxxx
x   x
x  gx
x. .x
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3217.42641757  1352.37702619 -3346.86631277   -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [ 3217.42641757  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 3942.62845127  1352.37702619 -3346.86631277   -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2.48828556e+03 1.23686189e+03 8.83419295e+03 3.33862213e+00]
------
Step:2, Action:East
State  181
Old Q Values:  [2.48828556e+03 1.23686189e+03 8.83419295e+03 3.33862213e+00]
New Q values:  [2.48828556e+03 1.23686189e+03 9.03286903e+03 3.33862213e+00]
Reward: -9991  Episode Reward:  -9982
xxxxx
x...x
x g.x
x . x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  3.72188321e+02]
------
Step:1, Action:North
State  138
Old Q Values:  [ 4.10582115e+02 -3.97609766e+03 -3.22965309e-01  3.72188321e+02]
New Q values:  [ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  3.72188321e+02]
Reward: -301  Episode Reward:  -301
xxxxx
x..ax
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  3.72188321e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  3.72188321e+02]
New Q values:  [ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  2.84062489e+02]
Reward: 9  Episode Reward:  -292
xxxxx
x.a x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   324.50315903   432.62386764]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   324.50315903   432.62386764]
New Q values:  [ -281.736      -1150.91067548   324.50315903   640.74299628]
Reward: 9  Episode Reward:  -283
xxxxx
xa  x
x ..x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 1540.97816408 -180.6       ]
------
Step:4, Action:East
State  111
Old Q Values:  [-177.44732869 1524.3019947   431.25952337 -120.29354603]
New Q values:  [-177.44732869 1524.3019947   364.12670823 -120.29354603]
Reward: -1  Episode Reward:  -284
xxxxx
x a x
x ..x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   324.50315903   640.74299628]
------
Step:5, Action:West
State  123
Old Q Values:  [  -56.91790269 -1902.20915811   750.20547454   605.97306875]
New Q values:  [  -56.91790269 -1902.20915811   750.20547454   699.07982591]
Reward: -1  Episode Reward:  -285
xxxxx
xa  x
x ..x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 1524.3019947   364.12670823 -120.29354603]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869 1524.3019947   364.12670823 -120.29354603]
New Q values:  [-177.44732869 3120.36387899  364.12670823 -120.29354603]
Reward: -1  Episode Reward:  -286
xxxxx
x   x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 533.05203844 2927.0296359  8370.81027036  154.04646645]
------
Step:7, Action:East
State  189
Old Q Values:  [ 533.05203844 2927.0296359  8370.81027036  154.04646645]
New Q values:  [ 533.05203844 2927.0296359  5428.3016946   154.04646645]
Reward: 9  Episode Reward:  -277
xxxxx
x  gx
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563  6915.25862153 -5416.05609337  1311.30124863]
------
Step:8, Action:South
State  201
Old Q Values:  [  613.33320563  6915.25862153 -5416.05609337  1311.30124863]
New Q values:  [  613.33320563  3363.92909135 -5416.05609337  1311.30124863]
Reward: 9  Episode Reward:  -268
xxxxx
x   x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244 -354.67420997 1516.00788908]
------
Step:9, Action:South
State  277
Old Q Values:  [1.64433000e+00 0.00000000e+00 2.28379178e+03 1.06376909e+03]
New Q values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 1.06376909e+03]
Reward: -301  Episode Reward:  -569
xxxxx
x  gx
x  .x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 5.04537534e+02 2.28379178e+03 1.06376909e+03]
------
Step:10, Action:East
State  273
Old Q Values:  [1637.72437281 1974.75214244 -354.67420997 1516.00788908]
New Q values:  [1637.72437281 1974.75214244  443.99421868 1516.00788908]
Reward: 9  Episode Reward:  -560
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308 -4059.26960032 -5588.09647059  1934.87967555]
------
Step:11, Action:West
State  288
Old Q Values:  [   65.88494308 -4059.26960032 -5588.09647059  1934.87967555]
New Q values:  [   65.88494308 -4059.26960032 -5588.09647059  1365.77751295]
Reward: -1  Episode Reward:  -561
xxxxx
x   x
x  .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1974.75214244  443.99421868 1516.00788908]
------
Step:12, Action:South
State  273
Old Q Values:  [1637.72437281 1974.75214244  443.99421868 1516.00788908]
New Q values:  [1637.72437281 1201.72649971  443.99421868 1516.00788908]
Reward: -301  Episode Reward:  -862
xxxxx
x   x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1637.72437281 1201.72649971  443.99421868 1516.00788908]
------
Step:13, Action:North
State  273
Old Q Values:  [1637.72437281 1201.72649971  443.99421868 1516.00788908]
New Q values:  [4032.73007511 1201.72649971  443.99421868 1516.00788908]
Reward: -1  Episode Reward:  -863
xxxxx
x   x
x a.x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[   14.86214194  1304.45200084 11260.80108661  1915.70494401]
------
Step:14, Action:East
State  201
Old Q Values:  [  613.33320563  3363.92909135 -5416.05609337  1311.30124863]
New Q values:  [  613.33320563  3363.92909135 -7266.86964969  1311.30124863]
Reward: -9991  Episode Reward:  -10854
xxxxx
x   x
x  gx
x.  x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[48268.41554717 61860.06573109 -4584.50430574 -1049.83093042]
------
Step:1, Action:South
State  208
Old Q Values:  [48268.41554717 61860.06573109 -4584.50430574 -1049.83093042]
New Q values:  [48268.41554717 25159.15954632 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308 -4059.26960032 -5588.09647059  1365.77751295]
------
Step:2, Action:West
State  288
Old Q Values:  [   65.88494308 -4059.26960032 -5588.09647059  1365.77751295]
New Q values:  [   65.88494308 -4059.26960032 -5588.09647059  1761.53002771]
Reward: 9  Episode Reward:  18
xxxxx
x..gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[4032.73007511 1201.72649971  443.99421868 1516.00788908]
------
Step:3, Action:North
State  273
Old Q Values:  [4032.73007511 1201.72649971  443.99421868 1516.00788908]
New Q values:  [6021.50162025 1201.72649971  443.99421868 1516.00788908]
Reward: -1  Episode Reward:  17
xxxxx
x...x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-1649.89560358  4467.79641302 14696.69863402   767.35890262]
------
Step:4, Action:South
State  193
Old Q Values:  [-1649.89560358  4467.79641302 14696.69863402   767.35890262]
New Q values:  [-1649.89560358  3592.96905128 14696.69863402   767.35890262]
Reward: -1  Episode Reward:  16
xxxxx
x..gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[6021.50162025 1201.72649971  443.99421868 1516.00788908]
------
Step:5, Action:North
State  272
Old Q Values:  [ 3134.5582149  -8521.23367799  1847.46244798  1781.20400734]
New Q values:  [12747.0151409  -8521.23367799  1847.46244798  1781.20400734]
Reward: -1  Episode Reward:  15
xxxxx
x.g.x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.30220715e+03 3.83126395e+04 2.45392999e+03]
------
Step:6, Action:East
State  193
Old Q Values:  [-1649.89560358  3592.96905128 14696.69863402   767.35890262]
New Q values:  [-1649.89560358  3592.96905128 20358.60411776   767.35890262]
Reward: -1  Episode Reward:  14
xxxxx
x..gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[48268.41554717 25159.15954632 -4584.50430574 -1049.83093042]
------
Step:7, Action:South
State  208
Old Q Values:  [48268.41554717 25159.15954632 -4584.50430574 -1049.83093042]
New Q values:  [48268.41554717 10591.52282684 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  13
xxxxx
x.g.x
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308 -4059.26960032 -5588.09647059  1761.53002771]
------
Step:8, Action:West
State  288
Old Q Values:  [   65.88494308 -4059.26960032 -5588.09647059  1761.53002771]
New Q values:  [   65.88494308 -4059.26960032 -5588.09647059  4528.11655335]
Reward: -1  Episode Reward:  12
xxxxx
xg..x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[12747.0151409  -8521.23367799  1847.46244798  1781.20400734]
------
Step:9, Action:North
State  272
Old Q Values:  [12747.0151409  -8521.23367799  1847.46244798  1781.20400734]
New Q values:  [16591.99791129 -8521.23367799  1847.46244798  1781.20400734]
Reward: -1  Episode Reward:  11
xxxxx
x...x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.30220715e+03 3.83126395e+04 2.45392999e+03]
------
Step:10, Action:East
State  194
Old Q Values:  [17070.53379332  6511.46529034  5981.269706    2179.39995143]
New Q values:  [17070.53379332  6511.46529034 11149.38860326  2179.39995143]
Reward: -1  Episode Reward:  10
xxxxx
x...x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[29191.60240288  6233.36404734   790.72804752  5103.37501425]
------
Step:11, Action:North
State  210
Old Q Values:  [29191.60240288  6233.36404734   790.72804752  5103.37501425]
New Q values:  [32132.94168667  6233.36404734   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  19
xxxxx
x..ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377  6927.8788297   -180.00807518 68169.66908505]
------
Step:12, Action:West
State  130
Old Q Values:  [41234.48978377  6927.8788297   -180.00807518 68169.66908505]
New Q values:  [41234.48978377  6927.8788297   -180.00807518 66030.44430924]
Reward: 9  Episode Reward:  28
xxxxx
x.a x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 129190.58891738]
------
Step:13, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   324.50315903   640.74299628]
New Q values:  [ -281.736      -1150.91067548   324.50315903   524.14364604]
Reward: 9  Episode Reward:  37
xxxxx
xa  x
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  874.82149175  378.2788235  -252.78192178]
------
Step:14, Action:South
State  98
Old Q Values:  [    0.         43902.01212913 58026.90348558     0.        ]
New Q values:  [    0.         46610.82711026 58026.90348558     0.        ]
Reward: 9  Episode Reward:  46
xxxxx
x   x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         96815.40752869     0.        ]
------
Step:15, Action:East
State  187
Old Q Values:  [ 836.45017667    0.         1890.23066022    0.        ]
New Q values:  [ 836.45017667    0.         1303.73788672    0.        ]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:16, Action:East
State  194
Old Q Values:  [17070.53379332  6511.46529034 11149.38860326  2179.39995143]
New Q values:  [17070.53379332  6511.46529034 14099.03794731  2179.39995143]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32132.94168667  6233.36404734   790.72804752  5103.37501425]
------
Step:17, Action:North
State  216
Old Q Values:  [ 2929.17100536  2980.50929218   606.149024   -2865.34274144]
New Q values:  [ 1256.2871487   2980.50929218   606.149024   -2865.34274144]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  2.84062489e+02]
------
Step:18, Action:West
State  136
Old Q Values:  [ 878.22269011 3871.58347773  660.86649319 -639.74056106]
New Q values:  [ 878.22269011 3871.58347773  660.86649319  830.6837742 ]
Reward: -1  Episode Reward:  42
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  3623.93332877   766.31091109]
------
Step:19, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   324.50315903   524.14364604]
New Q values:  [ -281.736      -1150.91067548   214.42001017   524.14364604]
Reward: -1  Episode Reward:  41
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  2.84062489e+02]
------
Step:20, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  2.84062489e+02]
New Q values:  [ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  2.70268089e+02]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   214.42001017   524.14364604]
------
Step:21, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   214.42001017   524.14364604]
New Q values:  [ -281.736      -1150.91067548   214.42001017   491.97216067]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -6764.65639938   943.04900753  -180.6       ]
------
Step:22, Action:East
State  107
Old Q Values:  [-252.35169558  874.82149175  378.2788235  -252.78192178]
New Q values:  [-252.35169558  874.82149175  298.3031776  -252.78192178]
Reward: -1  Episode Reward:  38
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   214.42001017   491.97216067]
------
Step:23, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   214.42001017   491.97216067]
New Q values:  [ -281.736      -1150.91067548   214.42001017   479.10356653]
Reward: -1  Episode Reward:  37
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -6764.65639938   943.04900753  -180.6       ]
------
Step:24, Action:East
State  98
Old Q Values:  [    0.         46610.82711026 58026.90348558     0.        ]
New Q values:  [    0.         46610.82711026 61967.33806945     0.        ]
Reward: -1  Episode Reward:  36
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 129190.58891738]
------
Step:25, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   214.42001017   479.10356653]
New Q values:  [ -281.736      -1150.91067548   214.42001017   453.48787414]
Reward: -1  Episode Reward:  35
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  874.82149175  298.3031776  -252.78192178]
------
Step:26, Action:South
State  107
Old Q Values:  [-252.35169558  874.82149175  298.3031776  -252.78192178]
New Q values:  [-252.35169558  740.44996272  298.3031776  -252.78192178]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xa  x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 836.45017667    0.         1303.73788672    0.        ]
------
Step:27, Action:East
State  187
Old Q Values:  [ 836.45017667    0.         1303.73788672    0.        ]
New Q values:  [ 836.45017667    0.         1069.14077732    0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:28, Action:East
State  200
Old Q Values:  [  169.9257398  23608.78227304  2240.8128026    610.93635926]
New Q values:  [  169.9257398  23608.78227304  1789.8779087    610.93635926]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1256.2871487   2980.50929218   606.149024   -2865.34274144]
------
Step:29, Action:South
State  216
Old Q Values:  [ 1256.2871487   2980.50929218   606.149024   -2865.34274144]
New Q values:  [ 1256.2871487   2550.03868288   606.149024   -2865.34274144]
Reward: -1  Episode Reward:  31
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308 -4059.26960032 -5588.09647059  4528.11655335]
------
Step:30, Action:West
State  288
Old Q Values:  [   65.88494308 -4059.26960032 -5588.09647059  4528.11655335]
New Q values:  [   65.88494308 -4059.26960032 -5588.09647059  6788.24599473]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[16591.99791129 -8521.23367799  1847.46244798  1781.20400734]
------
Step:31, Action:East
State  272
Old Q Values:  [16591.99791129 -8521.23367799  1847.46244798  1781.20400734]
New Q values:  [16591.99791129 -8521.23367799  2774.85877761  1781.20400734]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[   65.88494308 -4059.26960032 -5588.09647059  6788.24599473]
------
Step:32, Action:North
State  288
Old Q Values:  [   65.88494308 -4059.26960032 -5588.09647059  6788.24599473]
New Q values:  [ 1924.68940669 -4059.26960032 -5588.09647059  6788.24599473]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2522.02841669 6329.78476486    0.          930.00701399]
------
Step:33, Action:North
State  216
Old Q Values:  [ 1256.2871487   2550.03868288   606.149024   -2865.34274144]
New Q values:  [  582.99528624  2550.03868288   606.149024   -2865.34274144]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  2.70268089e+02]
------
Step:34, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  2.70268089e+02]
New Q values:  [ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  3.32568878e+02]
Reward: -1  Episode Reward:  26
xxxxx
x a x
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811   750.20547454   699.07982591]
------
Step:35, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   214.42001017   453.48787414]
New Q values:  [ -281.736      -1150.91067548   184.93866748   453.48787414]
Reward: -1  Episode Reward:  25
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  3.32568878e+02]
------
Step:36, Action:West
State  130
Old Q Values:  [41234.48978377  6927.8788297   -180.00807518 66030.44430924]
New Q values:  [41234.48978377  6927.8788297   -180.00807518 65168.75439891]
Reward: -1  Episode Reward:  24
xxxxx
x a x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 129190.58891738]
------
Step:37, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.93866748   453.48787414]
New Q values:  [ -281.736      -1150.91067548   184.93866748   402.93013847]
Reward: -1  Episode Reward:  23
xxxxx
xa  x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  740.44996272  298.3031776  -252.78192178]
------
Step:38, Action:South
State  98
Old Q Values:  [    0.         46610.82711026 61967.33806945     0.        ]
New Q values:  [    0.         47688.35310271 61967.33806945     0.        ]
Reward: -1  Episode Reward:  22
xxxxx
x   x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         96815.40752869     0.        ]
------
Step:39, Action:East
State  187
Old Q Values:  [ 836.45017667    0.         1069.14077732    0.        ]
New Q values:  [836.45017667   0.         975.30193356   0.        ]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:40, Action:East
State  194
Old Q Values:  [17070.53379332  6511.46529034 14099.03794731  2179.39995143]
New Q values:  [17070.53379332  6511.46529034 15278.89768492  2179.39995143]
Reward: -1  Episode Reward:  20
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32132.94168667  6233.36404734   790.72804752  5103.37501425]
------
Step:41, Action:North
State  216
Old Q Values:  [  582.99528624  2550.03868288   606.149024   -2865.34274144]
New Q values:  [  332.36877791  2550.03868288   606.149024   -2865.34274144]
Reward: -1  Episode Reward:  19
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  3.32568878e+02]
------
Step:42, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  3.32568878e+02]
New Q values:  [ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  2.53306593e+02]
Reward: -1  Episode Reward:  18
xxxxx
x a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.93866748   402.93013847]
------
Step:43, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3623.93332877   766.31091109]
New Q values:  [-9594.56523706 -8069.05606225  3623.93332877  1556.4604472 ]
Reward: -1  Episode Reward:  17
xxxxx
xag x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        4168.45360921 -764.93196255    0.        ]
------
Step:44, Action:South
State  104
Old Q Values:  [-8652.84        4133.7122365   2353.85801711 -8652.84      ]
New Q values:  [-8652.84        4347.64484782  2353.85801711 -8652.84      ]
Reward: -1  Episode Reward:  16
xxxxx
xg  x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NW
[ 577.4048542    0.        8982.5331774    0.       ]
------
Step:45, Action:East
State  184
Old Q Values:  [ 577.4048542    0.        8982.5331774    0.       ]
New Q values:  [  577.4048542      0.         10675.04795287     0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  23608.78227304  1789.8779087    610.93635926]
------
Step:46, Action:South
State  194
Old Q Values:  [17070.53379332  6511.46529034 15278.89768492  2179.39995143]
New Q values:  [17070.53379332  7581.58548952 15278.89768492  2179.39995143]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[16591.99791129 -8521.23367799  2774.85877761  1781.20400734]
------
Step:47, Action:North
State  272
Old Q Values:  [16591.99791129 -8521.23367799  2774.85877761  1781.20400734]
New Q values:  [13718.83384643 -8521.23367799  2774.85877761  1781.20400734]
Reward: -1  Episode Reward:  13
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  23608.78227304  1789.8779087    610.93635926]
------
Step:48, Action:South
State  200
Old Q Values:  [  169.9257398  23608.78227304  1789.8779087    610.93635926]
New Q values:  [  169.9257398  13558.56306315  1789.8779087    610.93635926]
Reward: -1  Episode Reward:  12
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[13718.83384643 -8521.23367799  2774.85877761  1781.20400734]
------
Step:49, Action:North
State  272
Old Q Values:  [13718.83384643 -8521.23367799  2774.85877761  1781.20400734]
New Q values:  [ 9554.50245752 -8521.23367799  2774.85877761  1781.20400734]
Reward: -1  Episode Reward:  11
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  13558.56306315  1789.8779087    610.93635926]
------
Step:50, Action:South
State  200
Old Q Values:  [  169.9257398  13558.56306315  1789.8779087    610.93635926]
New Q values:  [ 169.9257398  8289.17596251 1789.8779087   610.93635926]
Reward: -1  Episode Reward:  10
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9554.50245752 -8521.23367799  2774.85877761  1781.20400734]
------
Step:51, Action:North
State  272
Old Q Values:  [ 9554.50245752 -8521.23367799  2774.85877761  1781.20400734]
New Q values:  [ 6307.95377176 -8521.23367799  2774.85877761  1781.20400734]
Reward: -1  Episode Reward:  9
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  8289.17596251 1789.8779087   610.93635926]
------
Step:52, Action:South
State  201
Old Q Values:  [  613.33320563  3363.92909135 -7266.86964969  1311.30124863]
New Q values:  [  613.33320563  3151.42212261 -7266.86964969  1311.30124863]
Reward: -1  Episode Reward:  8
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[6021.50162025 1201.72649971  443.99421868 1516.00788908]
------
Step:53, Action:North
State  273
Old Q Values:  [6021.50162025 1201.72649971  443.99421868 1516.00788908]
New Q values:  [3353.42728488 1201.72649971  443.99421868 1516.00788908]
Reward: -1  Episode Reward:  7
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563  3151.42212261 -7266.86964969  1311.30124863]
------
Step:54, Action:South
State  200
Old Q Values:  [ 169.9257398  8289.17596251 1789.8779087   610.93635926]
New Q values:  [ 169.9257398  5207.45651653 1789.8779087   610.93635926]
Reward: -1  Episode Reward:  6
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6307.95377176 -8521.23367799  2774.85877761  1781.20400734]
------
Step:55, Action:East
State  272
Old Q Values:  [ 6307.95377176 -8521.23367799  2774.85877761  1781.20400734]
New Q values:  [ 6307.95377176 -8521.23367799  3145.81730946  1781.20400734]
Reward: -1  Episode Reward:  5
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1924.68940669 -4059.26960032 -5588.09647059  6788.24599473]
------
Step:56, Action:West
State  288
Old Q Values:  [ 1924.68940669 -4059.26960032 -5588.09647059  6788.24599473]
New Q values:  [ 1924.68940669 -4059.26960032 -5588.09647059  4607.08452942]
Reward: -1  Episode Reward:  4
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6307.95377176 -8521.23367799  3145.81730946  1781.20400734]
------
Step:57, Action:North
State  272
Old Q Values:  [ 6307.95377176 -8521.23367799  3145.81730946  1781.20400734]
New Q values:  [ 4084.81846366 -8521.23367799  3145.81730946  1781.20400734]
Reward: -1  Episode Reward:  3
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  5207.45651653 1789.8779087   610.93635926]
------
Step:58, Action:South
State  201
Old Q Values:  [  613.33320563  3151.42212261 -7266.86964969  1311.30124863]
New Q values:  [  613.33320563  2265.99703451 -7266.86964969  1311.30124863]
Reward: -1  Episode Reward:  2
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[3353.42728488 1201.72649971  443.99421868 1516.00788908]
------
Step:59, Action:North
State  273
Old Q Values:  [3353.42728488 1201.72649971  443.99421868 1516.00788908]
New Q values:  [2020.57002431 1201.72649971  443.99421868 1516.00788908]
Reward: -1  Episode Reward:  1
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563  2265.99703451 -7266.86964969  1311.30124863]
------
Step:60, Action:South
State  203
Old Q Values:  [3.60604218e+00 3.76178822e+03 1.42261918e+03 4.59156348e+03]
New Q values:  [3.60604218e+00 2.11028630e+03 1.42261918e+03 4.59156348e+03]
Reward: -1  Episode Reward:  0
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[2020.57002431 1201.72649971  443.99421868 1516.00788908]
------
Step:61, Action:North
State  273
Old Q Values:  [2020.57002431 1201.72649971  443.99421868 1516.00788908]
New Q values:  [1487.42712008 1201.72649971  443.99421868 1516.00788908]
Reward: -1  Episode Reward:  -1
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563  2265.99703451 -7266.86964969  1311.30124863]
------
Step:62, Action:South
State  203
Old Q Values:  [3.60604218e+00 2.11028630e+03 1.42261918e+03 4.59156348e+03]
New Q values:  [3.60604218e+00 1.29831689e+03 1.42261918e+03 4.59156348e+03]
Reward: -1  Episode Reward:  -2
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[1487.42712008 1201.72649971  443.99421868 1516.00788908]
------
Step:63, Action:West
State  273
Old Q Values:  [1487.42712008 1201.72649971  443.99421868 1516.00788908]
New Q values:  [ 1487.42712008  1201.72649971   443.99421868 71498.63909141]
Reward: 100009  Episode Reward:  100007
xxxxx
x   x
x   x
xa gx
xxxxx
Episode # 1400
xxxxx
x...x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1924.68940669 -4059.26960032 -5588.09647059  4607.08452942]
------
Step:1, Action:West
State  288
Old Q Values:  [ 1924.68940669 -4059.26960032 -5588.09647059  4607.08452942]
New Q values:  [ 1924.68940669 -4059.26960032 -5588.09647059  3073.67935087]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4084.81846366 -8521.23367799  3145.81730946  1781.20400734]
------
Step:2, Action:East
State  272
Old Q Values:  [ 4084.81846366 -8521.23367799  3145.81730946  1781.20400734]
New Q values:  [ 4084.81846366 -8521.23367799  2179.83072905  1781.20400734]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.. x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1924.68940669 -4059.26960032 -5588.09647059  3073.67935087]
------
Step:3, Action:North
State  288
Old Q Values:  [ 1924.68940669 -4059.26960032 -5588.09647059  3073.67935087]
New Q values:  [10409.15826868 -4059.26960032 -5588.09647059  3073.67935087]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32132.94168667  6233.36404734   790.72804752  5103.37501425]
------
Step:4, Action:North
State  208
Old Q Values:  [48268.41554717 10591.52282684 -4584.50430574 -1049.83093042]
New Q values:  [38863.39253854 10591.52282684 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  16
xxxxx
x..ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377  6927.8788297   -180.00807518 65168.75439891]
------
Step:5, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  2.53306593e+02]
New Q values:  [ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  2.27601679e+02]
Reward: 9  Episode Reward:  25
xxxxx
x.a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.93866748   402.93013847]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.93866748   402.93013847]
New Q values:  [ -281.736      -1150.91067548   184.93866748   449.48675765]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -6764.65639938   943.04900753  -180.6       ]
------
Step:7, Action:East
State  104
Old Q Values:  [-8652.84        4347.64484782  2353.85801711 -8652.84      ]
New Q values:  [-8652.84        4347.64484782  2028.12320547 -8652.84      ]
Reward: -1  Episode Reward:  33
xxxxx
xga x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  3623.93332877  1556.4604472 ]
------
Step:8, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3623.93332877  1556.4604472 ]
New Q values:  [-9594.56523706 -8069.05606225  2610.44837483  1556.4604472 ]
Reward: -1  Episode Reward:  32
xxxxx
x gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 3871.58347773  660.86649319  830.6837742 ]
------
Step:9, Action:South
State  138
Old Q Values:  [ 1.06807480e+02 -3.97609766e+03 -3.22965309e-01  2.27601679e+02]
New Q values:  [ 1.06807480e+02  1.00679787e+04 -3.22965309e-01  2.27601679e+02]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[38863.39253854 10591.52282684 -4584.50430574 -1049.83093042]
------
Step:10, Action:North
State  208
Old Q Values:  [38863.39253854 10591.52282684 -4584.50430574 -1049.83093042]
New Q values:  [18565.15062488 10591.52282684 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  1.00679787e+04 -3.22965309e-01  2.27601679e+02]
------
Step:11, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  1.00679787e+04 -3.22965309e-01  2.27601679e+02]
New Q values:  [ 1.06807480e+02  1.00679787e+04 -3.22965309e-01  2.25286699e+02]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.93866748   449.48675765]
------
Step:12, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2610.44837483  1556.4604472 ]
New Q values:  [-9594.56523706 -8069.05606225  2610.44837483  1872.52026164]
Reward: -1  Episode Reward:  28
xxxxx
xag x
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        4168.45360921 -764.93196255    0.        ]
------
Step:13, Action:South
State  107
Old Q Values:  [-252.35169558  740.44996272  298.3031776  -252.78192178]
New Q values:  [-252.35169558  866.71409959  298.3031776  -252.78192178]
Reward: 9  Episode Reward:  37
xxxxx
x   x
xag x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 357.2991616     0.         1883.78038168 -178.98      ]
------
Step:14, Action:North
State  181
Old Q Values:  [2.48828556e+03 1.23686189e+03 9.03286903e+03 3.33862213e+00]
New Q values:  [1.25472845e+03 1.23686189e+03 9.03286903e+03 3.33862213e+00]
Reward: -1  Episode Reward:  36
xxxxx
xa  x
x .gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558  866.71409959  298.3031776  -252.78192178]
------
Step:15, Action:South
State  107
Old Q Values:  [-252.35169558  866.71409959  298.3031776  -252.78192178]
New Q values:  [-252.35169558 3416.15332398  298.3031776  -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa. x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  861.58025644  1357.09471455 10233.55894716  1554.80203889]
------
Step:16, Action:East
State  187
Old Q Values:  [836.45017667   0.         975.30193356   0.        ]
New Q values:  [836.45017667   0.         943.76639605   0.        ]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:17, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.29831689e+03 1.42261918e+03 4.59156348e+03]
New Q values:  [3.60604218e+00 1.29831689e+03 2.46738310e+03 4.59156348e+03]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2522.02841669 6329.78476486    0.          930.00701399]
------
Step:18, Action:North
State  218
Old Q Values:  [2522.02841669 6329.78476486    0.          930.00701399]
New Q values:  [4028.60497614 6329.78476486    0.          930.00701399]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  1.00679787e+04 -3.22965309e-01  2.25286699e+02]
------
Step:19, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  1.00679787e+04 -3.22965309e-01  2.25286699e+02]
New Q values:  [ 1.06807480e+02  4.79160308e+03 -3.22965309e-01  2.25286699e+02]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  332.36877791  2550.03868288   606.149024   -2865.34274144]
------
Step:20, Action:South
State  216
Old Q Values:  [  332.36877791  2550.03868288   606.149024   -2865.34274144]
New Q values:  [  332.36877791  4142.16295375   606.149024   -2865.34274144]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10409.15826868 -4059.26960032 -5588.09647059  3073.67935087]
------
Step:21, Action:West
State  288
Old Q Values:  [10409.15826868 -4059.26960032 -5588.09647059  3073.67935087]
New Q values:  [10409.15826868 -4059.26960032 -5588.09647059 22678.46346777]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1487.42712008  1201.72649971   443.99421868 71498.63909141]
------
Step:22, Action:West
State  273
Old Q Values:  [ 1487.42712008  1201.72649971   443.99421868 71498.63909141]
New Q values:  [ 1487.42712008  1201.72649971   443.99421868 99491.69157235]
Reward: 100009  Episode Reward:  100048
xxxxx
x   x
x  gx
xa  x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.30220715e+03 3.83126395e+04 2.45392999e+03]
------
Step:1, Action:East
State  200
Old Q Values:  [ 169.9257398  5207.45651653 1789.8779087   610.93635926]
New Q values:  [ 169.9257398  5207.45651653 1964.0000496   610.93635926]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  332.36877791  4142.16295375   606.149024   -2865.34274144]
------
Step:2, Action:South
State  208
Old Q Values:  [18565.15062488 10591.52282684 -4584.50430574 -1049.83093042]
New Q values:  [18565.15062488 11045.54817107 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10409.15826868 -4059.26960032 -5588.09647059 22678.46346777]
------
Step:3, Action:West
State  288
Old Q Values:  [10409.15826868 -4059.26960032 -5588.09647059 22678.46346777]
New Q values:  [10409.15826868 -4059.26960032 -5588.09647059 10302.23092621]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 4084.81846366 -8521.23367799  2179.83072905  1781.20400734]
------
Step:4, Action:North
State  272
Old Q Values:  [ 4084.81846366 -8521.23367799  2179.83072905  1781.20400734]
New Q values:  [13127.1192404  -8521.23367799  2179.83072905  1781.20400734]
Reward: -1  Episode Reward:  26
xxxxx
xg..x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3.89777037e-01 4.30220715e+03 3.83126395e+04 2.45392999e+03]
------
Step:5, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.30220715e+03 3.83126395e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 4.30220715e+03 2.08940010e+04 2.45392999e+03]
Reward: -1  Episode Reward:  25
xxxxx
x g.x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18565.15062488 11045.54817107 -4584.50430574 -1049.83093042]
------
Step:6, Action:North
State  208
Old Q Values:  [18565.15062488 11045.54817107 -4584.50430574 -1049.83093042]
New Q values:  [ 8868.9411752  11045.54817107 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  34
xxxxx
x .ax
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  4.79160308e+03 -3.22965309e-01  2.25286699e+02]
------
Step:7, Action:South
State  136
Old Q Values:  [ 878.22269011 3871.58347773  660.86649319  830.6837742 ]
New Q values:  [ 878.22269011 4861.69784241  660.86649319  830.6837742 ]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8868.9411752  11045.54817107 -4584.50430574 -1049.83093042]
------
Step:8, Action:South
State  208
Old Q Values:  [ 8868.9411752  11045.54817107 -4584.50430574 -1049.83093042]
New Q values:  [ 8868.9411752   7540.36674903 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  32
xxxxx
x . x
x.g x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10409.15826868 -4059.26960032 -5588.09647059 10302.23092621]
------
Step:9, Action:North
State  288
Old Q Values:  [10409.15826868 -4059.26960032 -5588.09647059 10302.23092621]
New Q values:  [  823.74566003 -4059.26960032 -5588.09647059 10302.23092621]
Reward: -10001  Episode Reward:  -9969
xxxxx
x . x
x. gx
x.  x
xxxxx
xxxxx
x..gx
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-1649.89560358  3592.96905128 20358.60411776   767.35890262]
------
Step:1, Action:East
State  192
Old Q Values:  [3.89777037e-01 4.30220715e+03 2.08940010e+04 2.45392999e+03]
New Q values:  [3.89777037e-01 4.30220715e+03 9.60564928e+03 2.45392999e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  332.36877791  4142.16295375   606.149024   -2865.34274144]
------
Step:2, Action:South
State  208
Old Q Values:  [ 8868.9411752   7540.36674903 -4584.50430574 -1049.83093042]
New Q values:  [ 8868.9411752   6112.21597747 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  823.74566003 -4059.26960032 -5588.09647059 10302.23092621]
------
Step:3, Action:West
State  288
Old Q Values:  [  823.74566003 -4059.26960032 -5588.09647059 10302.23092621]
New Q values:  [  823.74566003 -4059.26960032 -5588.09647059  8064.4281426 ]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[13127.1192404  -8521.23367799  2179.83072905  1781.20400734]
------
Step:4, Action:North
State  272
Old Q Values:  [13127.1192404  -8521.23367799  2179.83072905  1781.20400734]
New Q values:  [10371.40783416 -8521.23367799  2179.83072905  1781.20400734]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
x.a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[17070.53379332  7581.58548952 15278.89768492  2179.39995143]
------
Step:5, Action:North
State  192
Old Q Values:  [3.89777037e-01 4.30220715e+03 9.60564928e+03 2.45392999e+03]
New Q values:  [38762.73258603  4302.20715371  9605.64928374  2453.92999194]
Reward: 9  Episode Reward:  35
xxxxx
x.a x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 129190.58891738]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.93866748   449.48675765]
New Q values:  [ -281.736      -1150.91067548   184.93866748   468.10940532]
Reward: 9  Episode Reward:  44
xxxxx
xa  x
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -6764.65639938   943.04900753  -180.6       ]
------
Step:7, Action:East
State  106
Old Q Values:  [ -180.6        -6764.65639938   943.04900753  -180.6       ]
New Q values:  [ -180.6        -6764.65639938   517.05242461  -180.6       ]
Reward: -1  Episode Reward:  43
xxxxx
x a x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.93866748   468.10940532]
------
Step:8, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2610.44837483  1872.52026164]
New Q values:  [-9594.56523706 -8069.05606225  2610.44837483 -3947.298441  ]
Reward: -10001  Episode Reward:  -9958
xxxxx
xg  x
x.  x
x.  x
xxxxx
xxxxx
x...x
x.g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10371.40783416 -8521.23367799  2179.83072905  1781.20400734]
------
Step:1, Action:East
State  272
Old Q Values:  [10371.40783416 -8521.23367799  2179.83072905  1781.20400734]
New Q values:  [10371.40783416 -8521.23367799  3296.6607344   1781.20400734]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  823.74566003 -4059.26960032 -5588.09647059  8064.4281426 ]
------
Step:2, Action:West
State  288
Old Q Values:  [  823.74566003 -4059.26960032 -5588.09647059  8064.4281426 ]
New Q values:  [  823.74566003 -4059.26960032 -5588.09647059  6336.59360729]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.g.x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10371.40783416 -8521.23367799  3296.6607344   1781.20400734]
------
Step:3, Action:East
State  272
Old Q Values:  [10371.40783416 -8521.23367799  3296.6607344   1781.20400734]
New Q values:  [10371.40783416 -8521.23367799  3219.04237595  1781.20400734]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x. .x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  823.74566003 -4059.26960032 -5588.09647059  6336.59360729]
------
Step:4, Action:North
State  288
Old Q Values:  [  823.74566003 -4059.26960032 -5588.09647059  6336.59360729]
New Q values:  [ 9974.78077001 -4059.26960032 -5588.09647059  6336.59360729]
Reward: 9  Episode Reward:  16
xxxxx
x...x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32132.94168667  6233.36404734   790.72804752  5103.37501425]
------
Step:5, Action:North
State  208
Old Q Values:  [ 8868.9411752   6112.21597747 -4584.50430574 -1049.83093042]
New Q values:  [23103.60278975  6112.21597747 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  25
xxxxx
x..ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377  6927.8788297   -180.00807518 65168.75439891]
------
Step:6, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  4.79160308e+03 -3.22965309e-01  2.25286699e+02]
New Q values:  [ 1.06807480e+02  4.79160308e+03 -3.22965309e-01  2.35947501e+02]
Reward: 9  Episode Reward:  34
xxxxx
x.a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.93866748   468.10940532]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.93866748   468.10940532]
New Q values:  [ -281.736      -1150.91067548   184.93866748   347.75948951]
Reward: 9  Episode Reward:  43
xxxxx
xa  x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -6764.65639938   517.05242461  -180.6       ]
------
Step:8, Action:East
State  107
Old Q Values:  [-252.35169558 3416.15332398  298.3031776  -252.78192178]
New Q values:  [-252.35169558 3416.15332398  223.04911789 -252.78192178]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.93866748   347.75948951]
------
Step:9, Action:West
State  123
Old Q Values:  [  -56.91790269 -1902.20915811   750.20547454   699.07982591]
New Q values:  [  -56.91790269 -1902.20915811   750.20547454  1303.87792756]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 3416.15332398  223.04911789 -252.78192178]
------
Step:10, Action:South
State  105
Old Q Values:  [-180.6        4168.45360921 -764.93196255    0.        ]
New Q values:  [-180.6        2237.91555819 -764.93196255    0.        ]
Reward: 9  Episode Reward:  50
xxxxx
x  gx
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 357.2991616     0.         1883.78038168 -178.98      ]
------
Step:11, Action:East
State  185
Old Q Values:  [ 357.2991616     0.         1883.78038168 -178.98      ]
New Q values:  [ 357.2991616     0.         1432.71126302 -178.98      ]
Reward: -1  Episode Reward:  49
xxxxx
x  gx
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563  2265.99703451 -7266.86964969  1311.30124863]
------
Step:12, Action:South
State  200
Old Q Values:  [ 169.9257398  5207.45651653 1964.0000496   610.93635926]
New Q values:  [ 169.9257398  5193.80495686 1964.0000496   610.93635926]
Reward: -1  Episode Reward:  48
xxxxx
x g x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10371.40783416 -8521.23367799  3219.04237595  1781.20400734]
------
Step:13, Action:North
State  273
Old Q Values:  [ 1487.42712008  1201.72649971   443.99421868 99491.69157235]
New Q values:  [ 1274.16995838  1201.72649971   443.99421868 99491.69157235]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563  2265.99703451 -7266.86964969  1311.30124863]
------
Step:14, Action:South
State  200
Old Q Values:  [ 169.9257398  5193.80495686 1964.0000496   610.93635926]
New Q values:  [ 169.9257398  5188.34433299 1964.0000496   610.93635926]
Reward: -1  Episode Reward:  46
xxxxx
x g x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10371.40783416 -8521.23367799  3219.04237595  1781.20400734]
------
Step:15, Action:North
State  273
Old Q Values:  [ 1274.16995838  1201.72649971   443.99421868 99491.69157235]
New Q values:  [ 1188.86709371  1201.72649971   443.99421868 99491.69157235]
Reward: -1  Episode Reward:  45
xxxxx
x  gx
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563  2265.99703451 -7266.86964969  1311.30124863]
------
Step:16, Action:South
State  201
Old Q Values:  [  613.33320563  2265.99703451 -7266.86964969  1311.30124863]
New Q values:  [  613.33320563 30753.30628551 -7266.86964969  1311.30124863]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971   443.99421868 99491.69157235]
------
Step:17, Action:West
State  272
Old Q Values:  [10371.40783416 -8521.23367799  3219.04237595  1781.20400734]
New Q values:  [10371.40783416 -8521.23367799  3219.04237595 71604.71753872]
Reward: 100009  Episode Reward:  100053
xxxxx
x   x
x g x
xa  x
xxxxx
xxxxx
x.g.x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10371.40783416 -8521.23367799  3219.04237595 71604.71753872]
------
Step:1, Action:West
State  273
Old Q Values:  [ 1188.86709371  1201.72649971   443.99421868 99491.69157235]
New Q values:  [ 1188.86709371  1201.72649971   443.99421868 40984.86516432]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3942.62845127  1352.37702619 -3346.86631277   -12.17474163]
------
Step:2, Action:North
State  261
Old Q Values:  [ 3942.62845127  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 4292.31209068  1352.37702619 -3346.86631277   -12.17474163]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
xa.gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.25472845e+03 1.23686189e+03 9.03286903e+03 3.33862213e+00]
------
Step:3, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039   3397.64726527     0.        ]
New Q values:  [60476.05138135 21430.9929039   7472.04014144     0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x. gx
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-1649.89560358  3592.96905128 20358.60411776   767.35890262]
------
Step:4, Action:East
State  192
Old Q Values:  [38762.73258603  4302.20715371  9605.64928374  2453.92999194]
New Q values:  [38762.73258603  4302.20715371 10778.74055042  2453.92999194]
Reward: 9  Episode Reward:  36
xxxxx
x.g.x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23103.60278975  6112.21597747 -4584.50430574 -1049.83093042]
------
Step:5, Action:North
State  216
Old Q Values:  [  332.36877791  4142.16295375   606.149024   -2865.34274144]
New Q values:  [ 1596.85686389  4142.16295375   606.149024   -2865.34274144]
Reward: 9  Episode Reward:  45
xxxxx
xg ax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 4861.69784241  660.86649319  830.6837742 ]
------
Step:6, Action:South
State  128
Old Q Values:  [33824.65033405  7449.59323443 -8652.84       56592.57475141]
New Q values:  [33824.65033405  9910.3181307  -8652.84       56592.57475141]
Reward: -1  Episode Reward:  44
xxxxx
x.g x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[23103.60278975  6112.21597747 -4584.50430574 -1049.83093042]
------
Step:7, Action:North
State  208
Old Q Values:  [23103.60278975  6112.21597747 -4584.50430574 -1049.83093042]
New Q values:  [28791.46743557  6112.21597747 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  43
xxxxx
x. ax
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377  6927.8788297   -180.00807518 65168.75439891]
------
Step:8, Action:West
State  130
Old Q Values:  [41234.48978377  6927.8788297   -180.00807518 65168.75439891]
New Q values:  [41234.48978377  6927.8788297   -180.00807518 64824.07843478]
Reward: -1  Episode Reward:  42
xxxxx
x.a x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 129190.58891738]
------
Step:9, Action:West
State  126
Old Q Values:  [   0.          331.64678262 5819.75713236  601.69442602]
New Q values:  [   0.          331.64678262 5819.75713236  708.37121963]
Reward: 9  Episode Reward:  51
xxxxx
xa  x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 1540.97816408 -180.6       ]
------
Step:10, Action:East
State  110
Old Q Values:  [-239.29051573 1080.40897381 1540.97816408 -180.6       ]
New Q values:  [-239.29051573 1080.40897381 2361.71840534 -180.6       ]
Reward: -1  Episode Reward:  50
xxxxx
x a x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 5819.75713236  708.37121963]
------
Step:11, Action:East
State  126
Old Q Values:  [   0.          331.64678262 5819.75713236  708.37121963]
New Q values:  [   0.          331.64678262 3764.78377819  708.37121963]
Reward: -1  Episode Reward:  49
xxxxx
x  ax
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  4.79160308e+03 -3.22965309e-01  2.35947501e+02]
------
Step:12, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  4.79160308e+03 -3.22965309e-01  2.35947501e+02]
New Q values:  [ 1.06807480e+02  3.81497666e+03 -3.22965309e-01  2.35947501e+02]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x  ax
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[4028.60497614 6329.78476486    0.          930.00701399]
------
Step:13, Action:South
State  210
Old Q Values:  [32132.94168667  6233.36404734   790.72804752  5103.37501425]
New Q values:  [32132.94168667 65491.17984994   790.72804752  5103.37501425]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x   x
xg ax
xxxxx
xxxxx
x...x
x...x
xa.gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4292.31209068  1352.37702619 -3346.86631277   -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [ 4292.31209068  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 4792.39252042  1352.37702619 -3346.86631277   -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  861.58025644  1357.09471455 10233.55894716  1554.80203889]
------
Step:2, Action:East
State  183
Old Q Values:  [  861.58025644  1357.09471455 10233.55894716  1554.80203889]
New Q values:  [  861.58025644  1357.09471455 15785.50822599  1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[38955.61549043 16053.62807234  8240.17937465  1169.39963074]
------
Step:3, Action:North
State  198
Old Q Values:  [ 1922.41774059  -200.61022961 11290.0677349      0.        ]
New Q values:  [39531.54377145  -200.61022961 11290.0677349      0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x.a.x
x  .x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 129190.58891738]
------
Step:4, Action:West
State  118
Old Q Values:  [1761.89752936 9228.38540599    0.         4701.96556762]
New Q values:  [1761.89752936 9228.38540599    0.         2594.70174865]
Reward: 9  Episode Reward:  36
xxxxx
xa .x
x  .x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 2361.71840534 -180.6       ]
------
Step:5, Action:East
State  110
Old Q Values:  [-239.29051573 1080.40897381 2361.71840534 -180.6       ]
New Q values:  [-239.29051573 1080.40897381 3712.60298393 -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x a.x
xg .x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[1761.89752936 9228.38540599    0.         2594.70174865]
------
Step:6, Action:South
State  118
Old Q Values:  [1761.89752936 9228.38540599    0.         2594.70174865]
New Q values:  [1761.89752936 5386.05290848    0.         2594.70174865]
Reward: -10001  Episode Reward:  -9966
xxxxx
x  .x
x g.x
x . x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 3712.60298393 -180.6       ]
------
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869 3120.36387899  364.12670823 -120.29354603]
New Q values:  [-177.44732869 3120.36387899  255.37853015 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.93866748   347.75948951]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.93866748   347.75948951]
New Q values:  [ -281.736      -1150.91067548   184.93866748  1252.28469098]
Reward: -1  Episode Reward:  8
xxxxx
xa .x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 3712.60298393 -180.6       ]
------
Step:3, Action:East
State  110
Old Q Values:  [-239.29051573 1080.40897381 3712.60298393 -180.6       ]
New Q values:  [-239.29051573 1080.40897381 1860.12660087 -180.6       ]
Reward: -1  Episode Reward:  7
xxxxx
x a.x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.93866748  1252.28469098]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.93866748  1252.28469098]
New Q values:  [ -281.736      -1150.91067548   184.93866748  1436.42304009]
Reward: -1  Episode Reward:  6
xxxxx
xa .x
x.g.x
x ..x
xxxxx
Step:5, Action:West
State  109
Old Q Values:  [ -241.10880094  1617.82669824 -2165.66138672  -180.6       ]
New Q values:  [ -241.10880094  1617.82669824 -2165.66138672   232.50800947]
Reward: -301  Episode Reward:  -295
xxxxx
xag.x
x...x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1617.82669824 -2165.66138672   232.50800947]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869 3120.36387899  255.37853015 -120.29354603]
New Q values:  [-177.44732869 3963.40626177  255.37853015 -120.29354603]
Reward: 9  Episode Reward:  -286
xxxxx
x  .x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.25472845e+03 1.23686189e+03 9.03286903e+03 3.33862213e+00]
------
Step:7, Action:North
State  180
Old Q Values:  [ 4801.97414375  3060.24711158 10598.04199334 -4966.32149798]
New Q values:  [ 2478.22763776  3060.24711158 10598.04199334 -4966.32149798]
Reward: -1  Episode Reward:  -287
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 1860.12660087 -180.6       ]
------
Step:8, Action:East
State  111
Old Q Values:  [-177.44732869 3963.40626177  255.37853015 -120.29354603]
New Q values:  [-177.44732869 3963.40626177 1230.98654552 -120.29354603]
Reward: -1  Episode Reward:  -288
xxxxx
x a.x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 3764.78377819  708.37121963]
------
Step:9, Action:East
State  123
Old Q Values:  [  -56.91790269 -1902.20915811   750.20547454  1303.87792756]
New Q values:  [  -56.91790269 -1902.20915811  1449.97518875  1303.87792756]
Reward: 9  Episode Reward:  -279
xxxxx
x  ax
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  3.81497666e+03 -3.22965309e-01  2.35947501e+02]
------
Step:10, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  3.81497666e+03 -3.22965309e-01  2.35947501e+02]
New Q values:  [ 1.06807480e+02  3.81497666e+03 -3.22965309e-01  5.28771557e+02]
Reward: -1  Episode Reward:  -280
xxxxx
x a x
x ..x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  1449.97518875  1303.87792756]
------
Step:11, Action:East
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  1449.97518875  1303.87792756]
New Q values:  [  -56.91790269 -1902.20915811  1723.88307444  1303.87792756]
Reward: -1  Episode Reward:  -281
xxxxx
x  ax
x .gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  3.81497666e+03 -3.22965309e-01  5.28771557e+02]
------
Step:12, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  3.81497666e+03 -3.22965309e-01  5.28771557e+02]
New Q values:  [ 1.06807480e+02  3.81497666e+03 -3.22965309e-01  1.34034376e+03]
Reward: -1  Episode Reward:  -282
xxxxx
x a x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 3764.78377819  708.37121963]
------
Step:13, Action:East
State  126
Old Q Values:  [   0.          331.64678262 3764.78377819  708.37121963]
New Q values:  [   0.          331.64678262 2649.80651021  708.37121963]
Reward: -1  Episode Reward:  -283
xxxxx
x  ax
x ..x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  3.81497666e+03 -3.22965309e-01  1.34034376e+03]
------
Step:14, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  3.81497666e+03 -3.22965309e-01  1.34034376e+03]
New Q values:  [ 1.06807480e+02  3.43032609e+03 -3.22965309e-01  1.34034376e+03]
Reward: 9  Episode Reward:  -274
xxxxx
x   x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[4028.60497614 6329.78476486    0.          930.00701399]
------
Step:15, Action:South
State  208
Old Q Values:  [28791.46743557  6112.21597747 -4584.50430574 -1049.83093042]
New Q values:  [28791.46743557  5442.72062199 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  -265
xxxxx
x   x
xg. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9974.78077001 -4059.26960032 -5588.09647059  6336.59360729]
------
Step:16, Action:North
State  288
Old Q Values:  [ 9974.78077001 -4059.26960032 -5588.09647059  6336.59360729]
New Q values:  [23636.66626299 -4059.26960032 -5588.09647059  6336.59360729]
Reward: -1  Episode Reward:  -266
xxxxx
x   x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32132.94168667 65491.17984994   790.72804752  5103.37501425]
------
Step:17, Action:South
State  210
Old Q Values:  [32132.94168667 65491.17984994   790.72804752  5103.37501425]
New Q values:  [32132.94168667 33286.87181887   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  -267
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[23636.66626299 -4059.26960032 -5588.09647059  6336.59360729]
------
Step:18, Action:North
State  288
Old Q Values:  [23636.66626299 -4059.26960032 -5588.09647059  6336.59360729]
New Q values:  [19440.12805086 -4059.26960032 -5588.09647059  6336.59360729]
Reward: -1  Episode Reward:  -268
xxxxx
x   x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32132.94168667 33286.87181887   790.72804752  5103.37501425]
------
Step:19, Action:South
State  208
Old Q Values:  [28791.46743557  5442.72062199 -4584.50430574 -1049.83093042]
New Q values:  [28791.46743557  8008.52666405 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  -269
xxxxx
x   x
xg. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19440.12805086 -4059.26960032 -5588.09647059  6336.59360729]
------
Step:20, Action:North
State  288
Old Q Values:  [19440.12805086 -4059.26960032 -5588.09647059  6336.59360729]
New Q values:  [ 9018.10010647 -4059.26960032 -5588.09647059  6336.59360729]
Reward: -1  Episode Reward:  -270
xxxxx
x   x
x gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1596.85686389  4142.16295375   606.149024   -2865.34274144]
------
Step:21, Action:South
State  208
Old Q Values:  [28791.46743557  8008.52666405 -4584.50430574 -1049.83093042]
New Q values:  [28791.46743557  5908.24069756 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  -271
xxxxx
x   x
x .gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9018.10010647 -4059.26960032 -5588.09647059  6336.59360729]
------
Step:22, Action:West
State  288
Old Q Values:  [ 9018.10010647 -4059.26960032 -5588.09647059  6336.59360729]
New Q values:  [ 9018.10010647 -4059.26960032 -5588.09647059 14835.49699221]
Reward: 9  Episode Reward:  -262
xxxxx
x  gx
x . x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971   443.99421868 40984.86516432]
------
Step:23, Action:West
State  272
Old Q Values:  [10371.40783416 -8521.23367799  3219.04237595 71604.71753872]
New Q values:  [10371.40783416 -8521.23367799  3219.04237595 30079.00477161]
Reward: -1  Episode Reward:  -263
xxxxx
x g x
x . x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4792.39252042  1352.37702619 -3346.86631277   -12.17474163]
------
Step:24, Action:North
State  257
Old Q Values:  [36289.45311927 15812.0704082  13169.98702937  1875.31501677]
New Q values:  [32657.99666211 15812.0704082  13169.98702937  1875.31501677]
Reward: -1  Episode Reward:  -264
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039   7472.04014144     0.        ]
------
Step:25, Action:North
State  181
Old Q Values:  [1.25472845e+03 1.23686189e+03 9.03286903e+03 3.33862213e+00]
New Q values:  [9.86639390e+02 1.23686189e+03 9.03286903e+03 3.33862213e+00]
Reward: -1  Episode Reward:  -265
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1617.82669824 -2165.66138672   232.50800947]
------
Step:26, Action:South
State  109
Old Q Values:  [ -241.10880094  1617.82669824 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  3356.39138947 -2165.66138672   232.50800947]
Reward: -1  Episode Reward:  -266
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[9.86639390e+02 1.23686189e+03 9.03286903e+03 3.33862213e+00]
------
Step:27, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039   7472.04014144     0.        ]
New Q values:  [60476.05138135 21430.9929039  74623.03583238     0.        ]
Reward: 100009  Episode Reward:  99743
xxxxx
x g x
x a x
x   x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2610.44837483 -3947.298441  ]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2610.44837483 -3947.298441  ]
New Q values:  [-9594.56523706 -8069.05606225  2508.08870266 -3947.298441  ]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 4861.69784241  660.86649319  830.6837742 ]
------
Step:2, Action:South
State  136
Old Q Values:  [ 878.22269011 4861.69784241  660.86649319  830.6837742 ]
New Q values:  [ 878.22269011 3192.72802309  660.86649319  830.6837742 ]
Reward: 9  Episode Reward:  18
xxxxx
x  gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1596.85686389  4142.16295375   606.149024   -2865.34274144]
------
Step:3, Action:South
State  208
Old Q Values:  [28791.46743557  5908.24069756 -4584.50430574 -1049.83093042]
New Q values:  [28791.46743557  6819.34537669 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9018.10010647 -4059.26960032 -5588.09647059 14835.49699221]
------
Step:4, Action:West
State  288
Old Q Values:  [ 9018.10010647 -4059.26960032 -5588.09647059 14835.49699221]
New Q values:  [ 9018.10010647 -4059.26960032 -5588.09647059 14963.30022837]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10371.40783416 -8521.23367799  3219.04237595 30079.00477161]
------
Step:5, Action:West
State  272
Old Q Values:  [10371.40783416 -8521.23367799  3219.04237595 30079.00477161]
New Q values:  [10371.40783416 -8521.23367799  3219.04237595 21834.40090728]
Reward: 9  Episode Reward:  45
xxxxx
x g x
x.. x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[32657.99666211 15812.0704082  13169.98702937  1875.31501677]
------
Step:6, Action:North
State  261
Old Q Values:  [ 4792.39252042  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 4632.21771834  1352.37702619 -3346.86631277   -12.17474163]
Reward: 9  Episode Reward:  54
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[9.86639390e+02 1.23686189e+03 9.03286903e+03 3.33862213e+00]
------
Step:7, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039  74623.03583238     0.        ]
New Q values:  [60476.05138135 21430.9929039  95962.19556828     0.        ]
Reward: 100009  Episode Reward:  100063
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x.a.x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.93866748  1436.42304009]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.93866748  1436.42304009]
New Q values:  [ -281.736      -1150.91067548   184.93866748  1768.99109457]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3963.40626177 1230.98654552 -120.29354603]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 3963.40626177 1230.98654552 -120.29354603]
New Q values:  [-177.44732869 6320.41497251 1230.98654552 -120.29354603]
Reward: -1  Episode Reward:  8
xxxxx
x  .x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  861.58025644  1357.09471455 15785.50822599  1554.80203889]
------
Step:3, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243  6711.47745126     0.        ]
New Q values:  [    0.         -5536.05678243 14549.45411194     0.        ]
Reward: 9  Episode Reward:  17
xxxxx
x  .x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[39531.54377145  -200.61022961 11290.0677349      0.        ]
------
Step:4, Action:North
State  196
Old Q Values:  [16516.81111239  1238.89434634 25650.99582026   231.67262594]
New Q values:  [ 7136.82177333  1238.89434634 25650.99582026   231.67262594]
Reward: -1  Episode Reward:  16
xxxxx
x a.x
xg .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.93866748  1768.99109457]
------
Step:5, Action:West
State  126
Old Q Values:  [   0.          331.64678262 2649.80651021  708.37121963]
New Q values:  [   0.          331.64678262 2649.80651021  840.78646811]
Reward: -1  Episode Reward:  15
xxxxx
xa .x
x  .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 1860.12660087 -180.6       ]
------
Step:6, Action:East
State  111
Old Q Values:  [-177.44732869 6320.41497251 1230.98654552 -120.29354603]
New Q values:  [-177.44732869 6320.41497251 1286.73657127 -120.29354603]
Reward: -1  Episode Reward:  14
xxxxx
x a.x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 2649.80651021  840.78646811]
------
Step:7, Action:East
State  126
Old Q Values:  [   0.          331.64678262 2649.80651021  840.78646811]
New Q values:  [   0.          331.64678262 2094.4204325   840.78646811]
Reward: 9  Episode Reward:  23
xxxxx
x  ax
x  .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  3.43032609e+03 -3.22965309e-01  1.34034376e+03]
------
Step:8, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  3.43032609e+03 -3.22965309e-01  1.34034376e+03]
New Q values:  [ 1.06807480e+02  3.27646587e+03 -3.22965309e-01  1.34034376e+03]
Reward: 9  Episode Reward:  32
xxxxx
x   x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[4028.60497614 6329.78476486    0.          930.00701399]
------
Step:9, Action:South
State  216
Old Q Values:  [ 1596.85686389  4142.16295375   606.149024   -2865.34274144]
New Q values:  [ 1596.85686389  6151.25525001   606.149024   -2865.34274144]
Reward: 9  Episode Reward:  41
xxxxx
x   x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9018.10010647 -4059.26960032 -5588.09647059 14963.30022837]
------
Step:10, Action:West
State  288
Old Q Values:  [ 9018.10010647 -4059.26960032 -5588.09647059 14963.30022837]
New Q values:  [ 9018.10010647 -4059.26960032 -5588.09647059 12541.04036353]
Reward: 9  Episode Reward:  50
xxxxx
x g x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10371.40783416 -8521.23367799  3219.04237595 21834.40090728]
------
Step:11, Action:West
State  272
Old Q Values:  [10371.40783416 -8521.23367799  3219.04237595 21834.40090728]
New Q values:  [ 10371.40783416  -8521.23367799   3219.04237595 100252.10070949]
Reward: 100009  Episode Reward:  100059
xxxxx
xg  x
x   x
xa  x
xxxxx
xxxxx
xa..x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 6320.41497251 1286.73657127 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 5983.10662599 1868.2303995     0.        ]
New Q values:  [ 221.30610858 7134.29511819 1868.2303995     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  861.58025644  1357.09471455 15785.50822599  1554.80203889]
------
Step:2, Action:East
State  183
Old Q Values:  [  861.58025644  1357.09471455 15785.50822599  1554.80203889]
New Q values:  [  861.58025644  1357.09471455 18179.06642183  1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[39531.54377145  -200.61022961 11290.0677349      0.        ]
------
Step:3, Action:North
State  198
Old Q Values:  [39531.54377145  -200.61022961 11290.0677349      0.        ]
New Q values:  [16446.34363833  -200.61022961 11290.0677349      0.        ]
Reward: 9  Episode Reward:  27
xxxxx
x a.x
x  .x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 2094.4204325   840.78646811]
------
Step:4, Action:East
State  126
Old Q Values:  [   0.          331.64678262 2094.4204325   840.78646811]
New Q values:  [   0.          331.64678262 1826.1079332   840.78646811]
Reward: 9  Episode Reward:  36
xxxxx
x  ax
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  3.27646587e+03 -3.22965309e-01  1.34034376e+03]
------
Step:5, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  3.27646587e+03 -3.22965309e-01  1.34034376e+03]
New Q values:  [ 1.06807480e+02  3.21492178e+03 -3.22965309e-01  1.34034376e+03]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[4028.60497614 6329.78476486    0.          930.00701399]
------
Step:6, Action:North
State  218
Old Q Values:  [4028.60497614 6329.78476486    0.          930.00701399]
New Q values:  [2575.31852337 6329.78476486    0.          930.00701399]
Reward: -1  Episode Reward:  44
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  3.21492178e+03 -3.22965309e-01  1.34034376e+03]
------
Step:7, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  3.21492178e+03 -3.22965309e-01  1.34034376e+03]
New Q values:  [ 1.06807480e+02  3.18430414e+03 -3.22965309e-01  1.34034376e+03]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[2575.31852337 6329.78476486    0.          930.00701399]
------
Step:8, Action:North
State  218
Old Q Values:  [2575.31852337 6329.78476486    0.          930.00701399]
New Q values:  [1984.81865135 6329.78476486    0.          930.00701399]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  3.18430414e+03 -3.22965309e-01  1.34034376e+03]
------
Step:9, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  3.18430414e+03 -3.22965309e-01  1.34034376e+03]
New Q values:  [ 1.06807480e+02  3.11849823e+03 -3.22965309e-01  1.34034376e+03]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1596.85686389  6151.25525001   606.149024   -2865.34274144]
------
Step:10, Action:South
State  216
Old Q Values:  [ 1596.85686389  6151.25525001   606.149024   -2865.34274144]
New Q values:  [ 1596.85686389  6228.21420906   606.149024   -2865.34274144]
Reward: 9  Episode Reward:  50
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9018.10010647 -4059.26960032 -5588.09647059 12541.04036353]
------
Step:11, Action:West
State  288
Old Q Values:  [ 9018.10010647 -4059.26960032 -5588.09647059 12541.04036353]
New Q values:  [ 9018.10010647 -4059.26960032 -5588.09647059 35091.44635826]
Reward: -1  Episode Reward:  49
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 10371.40783416  -8521.23367799   3219.04237595 100252.10070949]
------
Step:12, Action:West
State  272
Old Q Values:  [ 10371.40783416  -8521.23367799   3219.04237595 100252.10070949]
New Q values:  [ 10371.40783416  -8521.23367799   3219.04237595 109903.63928243]
Reward: 100009  Episode Reward:  100058
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
xa..x
x...x
xg..x
xxxxx
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869 6320.41497251 1286.73657127 -120.29354603]
New Q values:  [-177.44732869 6320.41497251 1050.79195688 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.93866748  1768.99109457]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.93866748  1768.99109457]
New Q values:  [ -281.736      -1150.91067548   184.93866748  2603.12092958]
Reward: -1  Episode Reward:  8
xxxxx
xa .x
x.g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 6320.41497251 1050.79195688 -120.29354603]
------
Step:3, Action:South
State  109
Old Q Values:  [ -241.10880094  3356.39138947 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  4057.81726597 -2165.66138672   232.50800947]
Reward: 9  Episode Reward:  17
xxxxx
x g.x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[9.86639390e+02 1.23686189e+03 9.03286903e+03 3.33862213e+00]
------
Step:4, Action:East
State  180
Old Q Values:  [ 2478.22763776  3060.24711158 10598.04199334 -4966.32149798]
New Q values:  [ 2478.22763776  3060.24711158 11939.91554342 -4966.32149798]
Reward: 9  Episode Reward:  26
xxxxx
xg .x
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ 7136.82177333  1238.89434634 25650.99582026   231.67262594]
------
Step:5, Action:East
State  196
Old Q Values:  [ 7136.82177333  1238.89434634 25650.99582026   231.67262594]
New Q values:  [ 7136.82177333  1238.89434634 12134.26259082   231.67262594]
Reward: 9  Episode Reward:  35
xxxxx
x  .x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1596.85686389  6228.21420906   606.149024   -2865.34274144]
------
Step:6, Action:South
State  210
Old Q Values:  [32132.94168667 33286.87181887   790.72804752  5103.37501425]
New Q values:  [32132.94168667 23847.58263503   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  44
xxxxx
x  .x
x   x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9018.10010647 -4059.26960032 -5588.09647059 35091.44635826]
------
Step:7, Action:West
State  288
Old Q Values:  [ 9018.10010647 -4059.26960032 -5588.09647059 35091.44635826]
New Q values:  [ 9018.10010647 -4059.26960032 -5588.09647059 14635.84422073]
Reward: 9  Episode Reward:  53
xxxxx
x  .x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  1979.55225807  1599.95824685]
------
Step:8, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  1979.55225807  1599.95824685]
New Q values:  [-2561.28592178 -5807.06396197  5181.97416945  1599.95824685]
Reward: -1  Episode Reward:  52
xxxxx
xg .x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9018.10010647 -4059.26960032 -5588.09647059 14635.84422073]
------
Step:9, Action:West
State  288
Old Q Values:  [ 9018.10010647 -4059.26960032 -5588.09647059 14635.84422073]
New Q values:  [ 9018.10010647 -4059.26960032 -5588.09647059  7408.32993912]
Reward: -1  Episode Reward:  51
xxxxx
x  .x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  5181.97416945  1599.95824685]
------
Step:10, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  5181.97416945  1599.95824685]
New Q values:  [-2561.28592178 -5807.06396197  4777.61969972  1599.95824685]
Reward: -1  Episode Reward:  50
xxxxx
x  .x
x g x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9018.10010647 -4059.26960032 -5588.09647059  7408.32993912]
------
Step:11, Action:North
State  288
Old Q Values:  [ 9018.10010647 -4059.26960032 -5588.09647059  7408.32993912]
New Q values:  [ 6244.08027326 -4059.26960032 -5588.09647059  7408.32993912]
Reward: -10001  Episode Reward:  -9951
xxxxx
x  .x
x  gx
x   x
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  3.11849823e+03 -3.22965309e-01  1.34034376e+03]
------
Step:1, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  3.11849823e+03 -3.22965309e-01  1.34034376e+03]
New Q values:  [ 1.06807480e+02  1.08926818e+04 -3.22965309e-01  1.34034376e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32132.94168667 23847.58263503   790.72804752  5103.37501425]
------
Step:2, Action:North
State  210
Old Q Values:  [32132.94168667 23847.58263503   790.72804752  5103.37501425]
New Q values:  [16120.38121419 23847.58263503   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  1.08926818e+04 -3.22965309e-01  1.34034376e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  1.08926818e+04 -3.22965309e-01  1.34034376e+03]
New Q values:  [ 1.06807480e+02  1.15107475e+04 -3.22965309e-01  1.34034376e+03]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[16120.38121419 23847.58263503   790.72804752  5103.37501425]
------
Step:4, Action:South
State  210
Old Q Values:  [16120.38121419 23847.58263503   790.72804752  5103.37501425]
New Q values:  [16120.38121419 11766.93203575   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  16
xxxxx
x.. x
x . x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6244.08027326 -4059.26960032 -5588.09647059  7408.32993912]
------
Step:5, Action:North
State  288
Old Q Values:  [ 6244.08027326 -4059.26960032 -5588.09647059  7408.32993912]
New Q values:  [ 7333.14647356 -4059.26960032 -5588.09647059  7408.32993912]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x .ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[16120.38121419 11766.93203575   790.72804752  5103.37501425]
------
Step:6, Action:North
State  210
Old Q Values:  [16120.38121419 11766.93203575   790.72804752  5103.37501425]
New Q values:  [25894.77601611 11766.93203575   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  14
xxxxx
x..ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377  6927.8788297   -180.00807518 64824.07843478]
------
Step:7, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  1.15107475e+04 -3.22965309e-01  1.34034376e+03]
New Q values:  [ 1.06807480e+02  1.15107475e+04 -3.22965309e-01  1.05870242e+03]
Reward: 9  Episode Reward:  23
xxxxx
x.a x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  1723.88307444  1303.87792756]
------
Step:8, Action:East
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  1723.88307444  1303.87792756]
New Q values:  [  -56.91790269 -1902.20915811  4142.17748273  1303.87792756]
Reward: -1  Episode Reward:  22
xxxxx
x. ax
x .gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  1.15107475e+04 -3.22965309e-01  1.05870242e+03]
------
Step:9, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  1.15107475e+04 -3.22965309e-01  1.05870242e+03]
New Q values:  [ 1.06807480e+02  1.15107475e+04 -3.22965309e-01  1.20381725e+03]
Reward: -1  Episode Reward:  21
xxxxx
x.a x
x g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   184.93866748  2603.12092958]
------
Step:10, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.93866748  2603.12092958]
New Q values:  [ -281.736      -1150.91067548   184.93866748  2071.49436903]
Reward: 9  Episode Reward:  30
xxxxx
xa  x
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 3416.15332398  223.04911789 -252.78192178]
------
Step:11, Action:South
State  107
Old Q Values:  [-252.35169558 3416.15332398  223.04911789 -252.78192178]
New Q values:  [-252.35169558 1795.6747085   223.04911789 -252.78192178]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xag x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 357.2991616     0.         1432.71126302 -178.98      ]
------
Step:12, Action:North
State  181
Old Q Values:  [9.86639390e+02 1.23686189e+03 9.03286903e+03 3.33862213e+00]
New Q values:  [2.29018025e+03 1.23686189e+03 9.03286903e+03 3.33862213e+00]
Reward: -1  Episode Reward:  28
xxxxx
xa  x
x .gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 6320.41497251 1050.79195688 -120.29354603]
------
Step:13, Action:South
State  111
Old Q Values:  [-177.44732869 6320.41497251 1050.79195688 -120.29354603]
New Q values:  [-177.44732869 7981.28591555 1050.79195688 -120.29354603]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  861.58025644  1357.09471455 18179.06642183  1554.80203889]
------
Step:14, Action:East
State  185
Old Q Values:  [ 357.2991616     0.         1432.71126302 -178.98      ]
New Q values:  [ 357.2991616     0.         9804.47639086 -178.98      ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563 30753.30628551 -7266.86964969  1311.30124863]
------
Step:15, Action:South
State  203
Old Q Values:  [3.60604218e+00 1.29831689e+03 2.46738310e+03 4.59156348e+03]
New Q values:  [3.60604218e+00 1.28201863e+04 2.46738310e+03 4.59156348e+03]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x   x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971   443.99421868 40984.86516432]
------
Step:16, Action:West
State  272
Old Q Values:  [ 10371.40783416  -8521.23367799   3219.04237595 109903.63928243]
New Q values:  [ 10371.40783416  -8521.23367799   3219.04237595 113764.25471161]
Reward: 100009  Episode Reward:  100054
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4632.21771834  1352.37702619 -3346.86631277   -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [ 4632.21771834  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 4568.14779751  1352.37702619 -3346.86631277   -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x..gx
xa. x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2.29018025e+03 1.23686189e+03 9.03286903e+03 3.33862213e+00]
------
Step:2, Action:East
State  189
Old Q Values:  [ 533.05203844 2927.0296359  5428.3016946   154.04646645]
New Q values:  [  533.05203844  2927.0296359  13805.54045365   154.04646645]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[38762.73258603  4302.20715371 10778.74055042  2453.92999194]
------
Step:3, Action:East
State  192
Old Q Values:  [38762.73258603  4302.20715371 10778.74055042  2453.92999194]
New Q values:  [38762.73258603  4302.20715371 12948.33645084  2453.92999194]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28791.46743557  6819.34537669 -4584.50430574 -1049.83093042]
------
Step:4, Action:North
State  216
Old Q Values:  [ 1596.85686389  6228.21420906   606.149024   -2865.34274144]
New Q values:  [ 1601.96115248  6228.21420906   606.149024   -2865.34274144]
Reward: 9  Episode Reward:  26
xxxxx
x.gax
x   x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 3192.72802309  660.86649319  830.6837742 ]
------
Step:5, Action:South
State  136
Old Q Values:  [ 878.22269011 3192.72802309  660.86649319  830.6837742 ]
New Q values:  [ 878.22269011 3144.95547196  660.86649319  830.6837742 ]
Reward: -1  Episode Reward:  25
xxxxx
xg. x
x  ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1601.96115248  6228.21420906   606.149024   -2865.34274144]
------
Step:6, Action:South
State  208
Old Q Values:  [28791.46743557  6819.34537669 -4584.50430574 -1049.83093042]
New Q values:  [28791.46743557  4955.63713241 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  34
xxxxx
x.g x
x   x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7333.14647356 -4059.26960032 -5588.09647059  7408.32993912]
------
Step:7, Action:West
State  288
Old Q Values:  [ 7333.14647356 -4059.26960032 -5588.09647059  7408.32993912]
New Q values:  [ 7333.14647356 -4059.26960032 -5588.09647059 37098.00838913]
Reward: 9  Episode Reward:  43
xxxxx
xg. x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 10371.40783416  -8521.23367799   3219.04237595 113764.25471161]
------
Step:8, Action:West
State  272
Old Q Values:  [ 10371.40783416  -8521.23367799   3219.04237595 113764.25471161]
New Q values:  [10371.40783416 -8521.23367799  3219.04237595 77018.04223122]
Reward: -1  Episode Reward:  42
xxxxx
x.. x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NW
[105043.13448859  37672.57242934   6661.12187543    644.94785455]
------
Step:9, Action:South
State  257
Old Q Values:  [32657.99666211 15812.0704082  13169.98702937  1875.31501677]
New Q values:  [32657.99666211 15941.62716192 13169.98702937  1875.31501677]
Reward: -301  Episode Reward:  -259
xxxxx
x.. x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[32657.99666211 15941.62716192 13169.98702937  1875.31501677]
------
Step:10, Action:North
State  257
Old Q Values:  [32657.99666211 15941.62716192 13169.98702937  1875.31501677]
New Q values:  [41851.25733533 15941.62716192 13169.98702937  1875.31501677]
Reward: -1  Episode Reward:  -260
xxxxx
x.. x
xa gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039  95962.19556828     0.        ]
------
Step:11, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039  95962.19556828     0.        ]
New Q values:  [60476.05138135 21430.9929039  44491.85946264     0.        ]
Reward: -1  Episode Reward:  -261
xxxxx
x..gx
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[-1649.89560358  3592.96905128 20358.60411776   767.35890262]
------
Step:12, Action:East
State  192
Old Q Values:  [38762.73258603  4302.20715371 12948.33645084  2453.92999194]
New Q values:  [38762.73258603  4302.20715371 13816.17481101  2453.92999194]
Reward: -1  Episode Reward:  -262
xxxxx
x.g x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[28791.46743557  4955.63713241 -4584.50430574 -1049.83093042]
------
Step:13, Action:North
State  208
Old Q Values:  [28791.46743557  4955.63713241 -4584.50430574 -1049.83093042]
New Q values:  [22493.75939965  4955.63713241 -4584.50430574 -1049.83093042]
Reward: -10001  Episode Reward:  -10263
xxxxx
x..gx
x   x
x   x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[10371.40783416 -8521.23367799  3219.04237595 77018.04223122]
------
Step:1, Action:North
State  272
Old Q Values:  [10371.40783416 -8521.23367799  3219.04237595 77018.04223122]
New Q values:  [ 9275.12327166 -8521.23367799  3219.04237595 77018.04223122]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[17070.53379332  7581.58548952 15278.89768492  2179.39995143]
------
Step:2, Action:North
State  195
Old Q Values:  [38955.61549043 16053.62807234  8240.17937465  1169.39963074]
New Q values:  [54522.32013219 16053.62807234  8240.17937465  1169.39963074]
Reward: 9  Episode Reward:  18
xxxxx
x.a.x
x. .x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 129782.24645338]
------
Step:3, Action:West
State  119
Old Q Values:  [  0.         465.4216645    0.         630.72574508]
New Q values:  [   0.          465.4216645     0.         2397.97883349]
Reward: 9  Episode Reward:  27
xxxxx
xa .x
x. gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 7134.29511819 1868.2303995     0.        ]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 7981.28591555 1050.79195688 -120.29354603]
New Q values:  [-177.44732869 5907.7750764  1050.79195688 -120.29354603]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2.29018025e+03 1.23686189e+03 9.03286903e+03 3.33862213e+00]
------
Step:5, Action:North
State  180
Old Q Values:  [ 2478.22763776  3060.24711158 11939.91554342 -4966.32149798]
New Q values:  [ 1548.72903536  3060.24711158 11939.91554342 -4966.32149798]
Reward: -1  Episode Reward:  35
xxxxx
xa .x
xg .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 1860.12660087 -180.6       ]
------
Step:6, Action:East
State  111
Old Q Values:  [-177.44732869 5907.7750764  1050.79195688 -120.29354603]
New Q values:  [-177.44732869 5907.7750764   967.54916271 -120.29354603]
Reward: -1  Episode Reward:  34
xxxxx
x a.x
x g.x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 1826.1079332   840.78646811]
------
Step:7, Action:East
State  124
Old Q Values:  [   0.         1166.51141701 3054.93376968 1772.94838375]
New Q values:  [   0.         1166.51141701 2170.86014946 1772.94838375]
Reward: 9  Episode Reward:  43
xxxxx
x gax
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 3144.95547196  660.86649319  830.6837742 ]
------
Step:8, Action:South
State  136
Old Q Values:  [ 878.22269011 3144.95547196  660.86649319  830.6837742 ]
New Q values:  [ 878.22269011 3131.8464515   660.86649319  830.6837742 ]
Reward: 9  Episode Reward:  52
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1601.96115248  6228.21420906   606.149024   -2865.34274144]
------
Step:9, Action:South
State  208
Old Q Values:  [22493.75939965  4955.63713241 -4584.50430574 -1049.83093042]
New Q values:  [22493.75939965 73117.0573697  -4584.50430574 -1049.83093042]
Reward: 100009  Episode Reward:  100061
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799  3219.04237595 77018.04223122]
------
Step:1, Action:West
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  4777.61969972  1599.95824685]
New Q values:  [-2561.28592178 -5807.06396197  4777.61969972  2015.82763799]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4568.14779751  1352.37702619 -3346.86631277   -12.17474163]
------
Step:2, Action:North
State  261
Old Q Values:  [ 4568.14779751  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 4542.51982918  1352.37702619 -3346.86631277   -12.17474163]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
xa..x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2.29018025e+03 1.23686189e+03 9.03286903e+03 3.33862213e+00]
------
Step:3, Action:East
State  181
Old Q Values:  [2.29018025e+03 1.23686189e+03 9.03286903e+03 3.33862213e+00]
New Q values:  [2.29018025e+03 1.23686189e+03 7.25882639e+03 3.33862213e+00]
Reward: 9  Episode Reward:  27
xxxxx
x g.x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ 7136.82177333  1238.89434634 12134.26259082   231.67262594]
------
Step:4, Action:East
State  192
Old Q Values:  [38762.73258603  4302.20715371 13816.17481101  2453.92999194]
New Q values:  [38762.73258603  4302.20715371 27466.98713531  2453.92999194]
Reward: 9  Episode Reward:  36
xxxxx
xg..x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22493.75939965 73117.0573697  -4584.50430574 -1049.83093042]
------
Step:5, Action:South
State  208
Old Q Values:  [22493.75939965 73117.0573697  -4584.50430574 -1049.83093042]
New Q values:  [22493.75939965 40381.62546462 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  45
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7333.14647356 -4059.26960032 -5588.09647059 37098.00838913]
------
Step:6, Action:West
State  288
Old Q Values:  [ 7333.14647356 -4059.26960032 -5588.09647059 37098.00838913]
New Q values:  [ 7333.14647356 -4059.26960032 -5588.09647059 37944.01602502]
Reward: -1  Episode Reward:  44
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799  3219.04237595 77018.04223122]
------
Step:7, Action:West
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  4777.61969972  2015.82763799]
New Q values:  [-2561.28592178 -5807.06396197  4777.61969972  2168.48700395]
Reward: -1  Episode Reward:  43
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4542.51982918  1352.37702619 -3346.86631277   -12.17474163]
------
Step:8, Action:North
State  260
Old Q Values:  [ 1504.08600103 -5704.51612281  2149.47028662 -5679.36893145]
New Q values:  [ 4183.00906344 -5704.51612281  2149.47028662 -5679.36893145]
Reward: -1  Episode Reward:  42
xxxxx
xg..x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 1548.72903536  3060.24711158 11939.91554342 -4966.32149798]
------
Step:9, Action:East
State  180
Old Q Values:  [ 1548.72903536  3060.24711158 11939.91554342 -4966.32149798]
New Q values:  [ 1548.72903536  3060.24711158 16404.18599318 -4966.32149798]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[38762.73258603  4302.20715371 27466.98713531  2453.92999194]
------
Step:10, Action:North
State  196
Old Q Values:  [ 7136.82177333  1238.89434634 12134.26259082   231.67262594]
New Q values:  [ 4470.82817917  1238.89434634 12134.26259082   231.67262594]
Reward: 9  Episode Reward:  50
xxxxx
xga.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:NW
[   0.        5368.9982328    0.           0.       ]
------
Step:11, Action:South
State  116
Old Q Values:  [   0.        5368.9982328    0.           0.       ]
New Q values:  [   0.         5787.27807037    0.            0.        ]
Reward: -1  Episode Reward:  49
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ 4470.82817917  1238.89434634 12134.26259082   231.67262594]
------
Step:12, Action:East
State  196
Old Q Values:  [ 4470.82817917  1238.89434634 12134.26259082   231.67262594]
New Q values:  [ 4470.82817917  1238.89434634 16967.59267572   231.67262594]
Reward: -1  Episode Reward:  48
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22493.75939965 40381.62546462 -4584.50430574 -1049.83093042]
------
Step:13, Action:South
State  208
Old Q Values:  [22493.75939965 40381.62546462 -4584.50430574 -1049.83093042]
New Q values:  [22493.75939965 27535.25499335 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  47
xxxxx
xg .x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7333.14647356 -4059.26960032 -5588.09647059 37944.01602502]
------
Step:14, Action:West
State  288
Old Q Values:  [ 7333.14647356 -4059.26960032 -5588.09647059 37944.01602502]
New Q values:  [ 7333.14647356 -4059.26960032 -5588.09647059 16610.29231992]
Reward: -1  Episode Reward:  46
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  4777.61969972  2168.48700395]
------
Step:15, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  4777.61969972  2168.48700395]
New Q values:  [-2561.28592178 -5807.06396197  6893.53557586  2168.48700395]
Reward: -1  Episode Reward:  45
xxxxx
xg .x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7333.14647356 -4059.26960032 -5588.09647059 16610.29231992]
------
Step:16, Action:West
State  288
Old Q Values:  [ 7333.14647356 -4059.26960032 -5588.09647059 16610.29231992]
New Q values:  [ 7333.14647356 -4059.26960032 -5588.09647059  8711.57760073]
Reward: -1  Episode Reward:  44
xxxxx
x  .x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  6893.53557586  2168.48700395]
------
Step:17, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  6893.53557586  2168.48700395]
New Q values:  [-2561.28592178 -5807.06396197  5370.28751056  2168.48700395]
Reward: -1  Episode Reward:  43
xxxxx
x  .x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7333.14647356 -4059.26960032 -5588.09647059  8711.57760073]
------
Step:18, Action:West
State  288
Old Q Values:  [ 7333.14647356 -4059.26960032 -5588.09647059  8711.57760073]
New Q values:  [ 7333.14647356 -4059.26960032 -5588.09647059  -904.88270654]
Reward: -10001  Episode Reward:  -9958
xxxxx
x  .x
x   x
x g x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4542.51982918  1352.37702619 -3346.86631277   -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [ 4542.51982918  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 7276.12785822  1352.37702619 -3346.86631277   -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  861.58025644  1357.09471455 18179.06642183  1554.80203889]
------
Step:2, Action:East
State  183
Old Q Values:  [  861.58025644  1357.09471455 18179.06642183  1554.80203889]
New Q values:  [  861.58025644  1357.09471455 23627.72260839  1554.80203889]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[54522.32013219 16053.62807234  8240.17937465  1169.39963074]
------
Step:3, Action:North
State  193
Old Q Values:  [-1649.89560358  3592.96905128 20358.60411776   767.35890262]
New Q values:  [38280.11569458  3592.96905128 20358.60411776   767.35890262]
Reward: 9  Episode Reward:  17
xxxxx
x.a.x
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 129782.24645338]
------
Step:4, Action:West
State  119
Old Q Values:  [   0.          465.4216645     0.         2397.97883349]
New Q values:  [   0.          465.4216645     0.         2736.92405632]
Reward: 9  Episode Reward:  26
xxxxx
xa .x
x  .x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5907.7750764   967.54916271 -120.29354603]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 5907.7750764   967.54916271 -120.29354603]
New Q values:  [-177.44732869 9450.82681308  967.54916271 -120.29354603]
Reward: -1  Episode Reward:  25
xxxxx
x  .x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  861.58025644  1357.09471455 23627.72260839  1554.80203889]
------
Step:6, Action:East
State  183
Old Q Values:  [  861.58025644  1357.09471455 23627.72260839  1554.80203889]
New Q values:  [  861.58025644  1357.09471455 12828.72936934  1554.80203889]
Reward: -1  Episode Reward:  24
xxxxx
x  .x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[   14.86214194  1304.45200084 11260.80108661  1915.70494401]
------
Step:7, Action:East
State  197
Old Q Values:  [-5833.78831344  1394.50148356 -5538.30598082   403.06255908]
New Q values:  [-5833.78831344  1394.50148356 -6341.45812961   403.06255908]
Reward: -9991  Episode Reward:  -9967
xxxxx
x  .x
x  gx
x ..x
xxxxx
xxxxx
xg.ax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 3131.8464515   660.86649319  830.6837742 ]
------
Step:1, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  1.15107475e+04 -3.22965309e-01  1.20381725e+03]
New Q values:  [ 1.06807480e+02  6.47816327e+03 -3.22965309e-01  1.20381725e+03]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 1601.96115248  6228.21420906   606.149024   -2865.34274144]
------
Step:2, Action:South
State  208
Old Q Values:  [22493.75939965 27535.25499335 -4584.50430574 -1049.83093042]
New Q values:  [22493.75939965 13219.44593941 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7333.14647356 -4059.26960032 -5588.09647059  -904.88270654]
------
Step:3, Action:North
State  288
Old Q Values:  [ 7333.14647356 -4059.26960032 -5588.09647059  -904.88270654]
New Q values:  [ 9680.78640932 -4059.26960032 -5588.09647059  -904.88270654]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22493.75939965 13219.44593941 -4584.50430574 -1049.83093042]
------
Step:4, Action:North
State  208
Old Q Values:  [22493.75939965 13219.44593941 -4584.50430574 -1049.83093042]
New Q values:  [10940.35273986 13219.44593941 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  16
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.47816327e+03 -3.22965309e-01  1.20381725e+03]
------
Step:5, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  6.47816327e+03 -3.22965309e-01  1.20381725e+03]
New Q values:  [ 1.06807480e+02  1.03590981e+04 -3.22965309e-01  1.20381725e+03]
Reward: -1  Episode Reward:  15
xxxxx
x . x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[25894.77601611 11766.93203575   790.72804752  5103.37501425]
------
Step:6, Action:North
State  210
Old Q Values:  [25894.77601611 11766.93203575   790.72804752  5103.37501425]
New Q values:  [13465.03983989 11766.93203575   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  14
xxxxx
x .ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  1.03590981e+04 -3.22965309e-01  1.20381725e+03]
------
Step:7, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  1.03590981e+04 -3.22965309e-01  1.20381725e+03]
New Q values:  [ 1.06807480e+02  8.18255120e+03 -3.22965309e-01  1.20381725e+03]
Reward: -1  Episode Reward:  13
xxxxx
x . x
x..ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[13465.03983989 11766.93203575   790.72804752  5103.37501425]
------
Step:8, Action:North
State  210
Old Q Values:  [13465.03983989 11766.93203575   790.72804752  5103.37501425]
New Q values:  [ 7840.18129493 11766.93203575   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  12
xxxxx
x .ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  8.18255120e+03 -3.22965309e-01  1.20381725e+03]
------
Step:9, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  8.18255120e+03 -3.22965309e-01  1.20381725e+03]
New Q values:  [ 1.06807480e+02  7.23825426e+03 -3.22965309e-01  1.20381725e+03]
Reward: -1  Episode Reward:  11
xxxxx
x . x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10940.35273986 13219.44593941 -4584.50430574 -1049.83093042]
------
Step:10, Action:South
State  208
Old Q Values:  [10940.35273986 13219.44593941 -4584.50430574 -1049.83093042]
New Q values:  [10940.35273986  8191.41429856 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  10
xxxxx
xg. x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9680.78640932 -4059.26960032 -5588.09647059  -904.88270654]
------
Step:11, Action:North
State  288
Old Q Values:  [ 9680.78640932 -4059.26960032 -5588.09647059  -904.88270654]
New Q values:  [ 7153.82038569 -4059.26960032 -5588.09647059  -904.88270654]
Reward: -1  Episode Reward:  9
xxxxx
x . x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[10940.35273986  8191.41429856 -4584.50430574 -1049.83093042]
------
Step:12, Action:North
State  208
Old Q Values:  [10940.35273986  8191.41429856 -4584.50430574 -1049.83093042]
New Q values:  [ 6547.01737408  8191.41429856 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  8
xxxxx
x .ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  7.23825426e+03 -3.22965309e-01  1.20381725e+03]
------
Step:13, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  7.23825426e+03 -3.22965309e-01  1.20381725e+03]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.20381725e+03]
Reward: -10001  Episode Reward:  -9993
xxxxx
x . x
x..gx
x.. x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 7276.12785822  1352.37702619 -3346.86631277   -12.17474163]
------
Step:1, Action:North
State  260
Old Q Values:  [ 4183.00906344 -5704.51612281  2149.47028662 -5679.36893145]
New Q values:  [  599.85942333 -5704.51612281  2149.47028662 -5679.36893145]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
xg .x
x ..x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 7276.12785822  1352.37702619 -3346.86631277   -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [ 7276.12785822  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 6764.46995409  1352.37702619 -3346.86631277   -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  861.58025644  1357.09471455 12828.72936934  1554.80203889]
------
Step:2, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243 14549.45411194     0.        ]
New Q values:  [    0.         -5536.05678243 10940.34178277     0.        ]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[17070.53379332  7581.58548952 15278.89768492  2179.39995143]
------
Step:3, Action:North
State  196
Old Q Values:  [ 4470.82817917  1238.89434634 16967.59267572   231.67262594]
New Q values:  [ 2341.56365163  1238.89434634 16967.59267572   231.67262594]
Reward: 9  Episode Reward:  17
xxxxx
x.a.x
xg .x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 1826.1079332   840.78646811]
------
Step:4, Action:East
State  126
Old Q Values:  [   0.          331.64678262 1826.1079332   840.78646811]
New Q values:  [   0.          331.64678262 1096.98834792  840.78646811]
Reward: 9  Episode Reward:  26
xxxxx
x. ax
x  .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.20381725e+03]
------
Step:5, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.20381725e+03]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  8.10023404e+02]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
x  .x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262 1096.98834792  840.78646811]
------
Step:6, Action:East
State  126
Old Q Values:  [   0.          331.64678262 1096.98834792  840.78646811]
New Q values:  [  0.         331.64678262 681.20236034 840.78646811]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
x  .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  8.10023404e+02]
------
Step:7, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  8.10023404e+02]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.75645302e+02]
Reward: -1  Episode Reward:  23
xxxxx
x.a x
x  .x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         331.64678262 681.20236034 840.78646811]
------
Step:8, Action:West
State  126
Old Q Values:  [  0.         331.64678262 681.20236034 840.78646811]
New Q values:  [  0.         331.64678262 681.20236034 899.75256751]
Reward: 9  Episode Reward:  32
xxxxx
xa  x
x  .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 1860.12660087 -180.6       ]
------
Step:9, Action:East
State  111
Old Q Values:  [-177.44732869 9450.82681308  967.54916271 -120.29354603]
New Q values:  [-177.44732869 9450.82681308  656.34543534 -120.29354603]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x  .x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[  0.         331.64678262 681.20236034 899.75256751]
------
Step:10, Action:West
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  4142.17748273  1303.87792756]
New Q values:  [  -56.91790269 -1902.20915811  4142.17748273  3356.19921495]
Reward: -1  Episode Reward:  30
xxxxx
xa  x
x  .x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 9450.82681308  656.34543534 -120.29354603]
------
Step:11, Action:South
State  111
Old Q Values:  [-177.44732869 9450.82681308  656.34543534 -120.29354603]
New Q values:  [-177.44732869 7921.39286133  656.34543534 -120.29354603]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2927.0296359  13805.54045365   154.04646645]
------
Step:12, Action:East
State  189
Old Q Values:  [  533.05203844  2927.0296359  13805.54045365   154.04646645]
New Q values:  [ 533.05203844 2927.0296359  5939.96662653  154.04646645]
Reward: -1  Episode Reward:  28
xxxxx
x  gx
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[-5833.78831344  1394.50148356 -6341.45812961   403.06255908]
------
Step:13, Action:South
State  205
Old Q Values:  [   0.         1488.46719176    0.          198.38683706]
New Q values:  [   0.         1285.92441063    0.          198.38683706]
Reward: 9  Episode Reward:  37
xxxxx
x   x
x  gx
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 5.04537534e+02 2.28379178e+03 1.06376909e+03]
------
Step:14, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  5370.28751056  2168.48700395]
New Q values:  [-2561.28592178 -5807.06396197  4299.66111993  2168.48700395]
Reward: 9  Episode Reward:  46
xxxxx
x   x
x g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7153.82038569 -4059.26960032 -5588.09647059  -904.88270654]
------
Step:15, Action:North
State  288
Old Q Values:  [ 7153.82038569 -4059.26960032 -5588.09647059  -904.88270654]
New Q values:  [59324.35244384 -4059.26960032 -5588.09647059  -904.88270654]
Reward: 90009  Episode Reward:  90055
xxxxx
x   x
x  gx
x   x
xxxxx
xxxxx
x...x
x..ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 7840.18129493 11766.93203575   790.72804752  5103.37501425]
------
Step:1, Action:South
State  208
Old Q Values:  [ 6547.01737408  8191.41429856 -4584.50430574 -1049.83093042]
New Q values:  [ 6547.01737408 21079.27145258 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[59324.35244384 -4059.26960032 -5588.09647059  -904.88270654]
------
Step:2, Action:North
State  288
Old Q Values:  [59324.35244384 -4059.26960032 -5588.09647059  -904.88270654]
New Q values:  [30052.92241331 -4059.26960032 -5588.09647059  -904.88270654]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x..ax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6547.01737408 21079.27145258 -4584.50430574 -1049.83093042]
------
Step:3, Action:South
State  208
Old Q Values:  [ 6547.01737408 21079.27145258 -4584.50430574 -1049.83093042]
New Q values:  [ 6547.01737408 17446.98530502 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  7
xxxxx
x...x
xg. x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[30052.92241331 -4059.26960032 -5588.09647059  -904.88270654]
------
Step:4, Action:North
State  288
Old Q Values:  [30052.92241331 -4059.26960032 -5588.09647059  -904.88270654]
New Q values:  [17254.66455683 -4059.26960032 -5588.09647059  -904.88270654]
Reward: -1  Episode Reward:  6
xxxxx
x...x
x.gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6547.01737408 17446.98530502 -4584.50430574 -1049.83093042]
------
Step:5, Action:South
State  208
Old Q Values:  [ 6547.01737408 17446.98530502 -4584.50430574 -1049.83093042]
New Q values:  [ 6547.01737408 12154.59348906 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  5
xxxxx
x...x
x..gx
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17254.66455683 -4059.26960032 -5588.09647059  -904.88270654]
------
Step:6, Action:West
State  288
Old Q Values:  [17254.66455683 -4059.26960032 -5588.09647059  -904.88270654]
New Q values:  [17254.66455683 -4059.26960032 -5588.09647059 11938.90646668]
Reward: 9  Episode Reward:  14
xxxxx
x...x
x.. x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971   443.99421868 40984.86516432]
------
Step:7, Action:West
State  273
Old Q Values:  [ 1188.86709371  1201.72649971   443.99421868 40984.86516432]
New Q values:  [ 1188.86709371  1201.72649971   443.99421868 18422.68705195]
Reward: -1  Episode Reward:  13
xxxxx
x...x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 6764.46995409  1352.37702619 -3346.86631277   -12.17474163]
------
Step:8, Action:North
State  261
Old Q Values:  [ 6764.46995409  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 4888.83589888  1352.37702619 -3346.86631277   -12.17474163]
Reward: 9  Episode Reward:  22
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2.29018025e+03 1.23686189e+03 7.25882639e+03 3.33862213e+00]
------
Step:9, Action:North
State  181
Old Q Values:  [2.29018025e+03 1.23686189e+03 7.25882639e+03 3.33862213e+00]
New Q values:  [2.35401054e+03 1.23686189e+03 7.25882639e+03 3.33862213e+00]
Reward: 9  Episode Reward:  31
xxxxx
xag.x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         4775.12813192    0.            0.        ]
------
Step:10, Action:South
State  100
Old Q Values:  [   0.         4228.78310806 1813.23031203    0.        ]
New Q values:  [   0.         6612.16904117 1813.23031203    0.        ]
Reward: -1  Episode Reward:  30
xxxxx
xg..x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 1548.72903536  3060.24711158 16404.18599318 -4966.32149798]
------
Step:11, Action:East
State  181
Old Q Values:  [2.35401054e+03 1.23686189e+03 7.25882639e+03 3.33862213e+00]
New Q values:  [2.35401054e+03 1.23686189e+03 7.99920836e+03 3.33862213e+00]
Reward: 9  Episode Reward:  39
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ 2341.56365163  1238.89434634 16967.59267572   231.67262594]
------
Step:12, Action:East
State  193
Old Q Values:  [38280.11569458  3592.96905128 20358.60411776   767.35890262]
New Q values:  [38280.11569458  3592.96905128 11789.21969382   767.35890262]
Reward: -1  Episode Reward:  38
xxxxx
x .gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6547.01737408 12154.59348906 -4584.50430574 -1049.83093042]
------
Step:13, Action:South
State  208
Old Q Values:  [ 6547.01737408 12154.59348906 -4584.50430574 -1049.83093042]
New Q values:  [ 6547.01737408 10037.63676267 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  37
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[17254.66455683 -4059.26960032 -5588.09647059 11938.90646668]
------
Step:14, Action:North
State  288
Old Q Values:  [17254.66455683 -4059.26960032 -5588.09647059 11938.90646668]
New Q values:  [ 9912.55685153 -4059.26960032 -5588.09647059 11938.90646668]
Reward: -1  Episode Reward:  36
xxxxx
xg..x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6547.01737408 10037.63676267 -4584.50430574 -1049.83093042]
------
Step:15, Action:South
State  208
Old Q Values:  [ 6547.01737408 10037.63676267 -4584.50430574 -1049.83093042]
New Q values:  [ 6547.01737408  7596.12664507 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  35
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9912.55685153 -4059.26960032 -5588.09647059 11938.90646668]
------
Step:16, Action:West
State  288
Old Q Values:  [ 9912.55685153 -4059.26960032 -5588.09647059 11938.90646668]
New Q values:  [ 9912.55685153 -4059.26960032 -5588.09647059 27880.37525604]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799  3219.04237595 77018.04223122]
------
Step:17, Action:West
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  4299.66111993  2168.48700395]
New Q values:  [-2561.28592178 -5807.06396197  4299.66111993  2333.44557125]
Reward: -1  Episode Reward:  33
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4888.83589888  1352.37702619 -3346.86631277   -12.17474163]
------
Step:18, Action:North
State  261
Old Q Values:  [ 4888.83589888  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 4354.69686726  1352.37702619 -3346.86631277   -12.17474163]
Reward: -1  Episode Reward:  32
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2.35401054e+03 1.23686189e+03 7.99920836e+03 3.33862213e+00]
------
Step:19, Action:East
State  181
Old Q Values:  [2.35401054e+03 1.23686189e+03 7.99920836e+03 3.33862213e+00]
New Q values:  [2.35401054e+03 1.23686189e+03 1.46831181e+04 3.33862213e+00]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[38280.11569458  3592.96905128 11789.21969382   767.35890262]
------
Step:20, Action:North
State  196
Old Q Values:  [ 2341.56365163  1238.89434634 16967.59267572   231.67262594]
New Q values:  [ 2557.84133319  1238.89434634 16967.59267572   231.67262594]
Reward: 9  Episode Reward:  40
xxxxx
x a.x
x g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[1761.89752936 5386.05290848    0.         2594.70174865]
------
Step:21, Action:West
State  118
Old Q Values:  [1761.89752936 5386.05290848    0.         2594.70174865]
New Q values:  [1761.89752936 5386.05290848    0.         4030.68963142]
Reward: -1  Episode Reward:  39
xxxxx
xa .x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        9978.02977319 2419.89303403 -180.6       ]
------
Step:22, Action:East
State  102
Old Q Values:  [-180.6        9978.02977319 2419.89303403 -180.6       ]
New Q values:  [-180.6        9978.02977319 2583.17308616 -180.6       ]
Reward: -1  Episode Reward:  38
xxxxx
x a.x
x   x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[1761.89752936 5386.05290848    0.         4030.68963142]
------
Step:23, Action:South
State  118
Old Q Values:  [1761.89752936 5386.05290848    0.         4030.68963142]
New Q values:  [1761.89752936 7087.72425489    0.         4030.68963142]
Reward: -1  Episode Reward:  37
xxxxx
x  .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[16446.34363833  -200.61022961 11290.0677349      0.        ]
------
Step:24, Action:North
State  199
Old Q Values:  [   14.86214194  1304.45200084 11260.80108661  1915.70494401]
New Q values:  [  826.42207367  1304.45200084 11260.80108661  1915.70494401]
Reward: -1  Episode Reward:  36
xxxxx
x a.x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SE
[   0.          465.4216645     0.         2736.92405632]
------
Step:25, Action:West
State  118
Old Q Values:  [1761.89752936 7087.72425489    0.         4030.68963142]
New Q values:  [1761.89752936 7087.72425489    0.         3751.96438802]
Reward: -1  Episode Reward:  35
xxxxx
xa .x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 7134.29511819 1868.2303995     0.        ]
------
Step:26, Action:South
State  102
Old Q Values:  [-180.6        9978.02977319 2583.17308616 -180.6       ]
New Q values:  [-180.6        7272.71444411 2583.17308616 -180.6       ]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243 10940.34178277     0.        ]
------
Step:27, Action:East
State  180
Old Q Values:  [ 1548.72903536  3060.24711158 16404.18599318 -4966.32149798]
New Q values:  [ 1548.72903536  3060.24711158 11651.35219998 -4966.32149798]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[ 2557.84133319  1238.89434634 16967.59267572   231.67262594]
------
Step:28, Action:East
State  196
Old Q Values:  [ 2557.84133319  1238.89434634 16967.59267572   231.67262594]
New Q values:  [2557.84133319 1238.89434634 9065.27506381  231.67262594]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6547.01737408  7596.12664507 -4584.50430574 -1049.83093042]
------
Step:29, Action:South
State  210
Old Q Values:  [ 7840.18129493 11766.93203575   790.72804752  5103.37501425]
New Q values:  [ 7840.18129493 13070.28539111   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  31
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9912.55685153 -4059.26960032 -5588.09647059 27880.37525604]
------
Step:30, Action:North
State  288
Old Q Values:  [ 9912.55685153 -4059.26960032 -5588.09647059 27880.37525604]
New Q values:  [ 6243.26073414 -4059.26960032 -5588.09647059 27880.37525604]
Reward: -1  Episode Reward:  30
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6547.01737408  7596.12664507 -4584.50430574 -1049.83093042]
------
Step:31, Action:South
State  208
Old Q Values:  [ 6547.01737408  7596.12664507 -4584.50430574 -1049.83093042]
New Q values:  [ 6547.01737408 11401.96323484 -4584.50430574 -1049.83093042]
Reward: -1  Episode Reward:  29
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6243.26073414 -4059.26960032 -5588.09647059 27880.37525604]
------
Step:32, Action:West
State  288
Old Q Values:  [ 6243.26073414 -4059.26960032 -5588.09647059 27880.37525604]
New Q values:  [ 6243.26073414 -4059.26960032 -5588.09647059 12441.44843839]
Reward: -1  Episode Reward:  28
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  4299.66111993  2333.44557125]
------
Step:33, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  4299.66111993  2333.44557125]
New Q values:  [-2561.28592178 -5807.06396197  5451.69897949  2333.44557125]
Reward: -1  Episode Reward:  27
xxxxx
x  .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6243.26073414 -4059.26960032 -5588.09647059 12441.44843839]
------
Step:34, Action:West
State  288
Old Q Values:  [ 6243.26073414 -4059.26960032 -5588.09647059 12441.44843839]
New Q values:  [ 6243.26073414 -4059.26960032 -5588.09647059  6611.48906921]
Reward: -1  Episode Reward:  26
xxxxx
x  .x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  5451.69897949  2333.44557125]
------
Step:35, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  5451.69897949  2333.44557125]
New Q values:  [-2561.28592178 -5807.06396197  4163.52631256  2333.44557125]
Reward: -1  Episode Reward:  25
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6243.26073414 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:36, Action:North
State  288
Old Q Values:  [ 6243.26073414 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [ 6417.78991099 -4059.26960032 -5588.09647059  6611.48906921]
Reward: -1  Episode Reward:  24
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[ 7840.18129493 13070.28539111   790.72804752  5103.37501425]
------
Step:37, Action:South
State  210
Old Q Values:  [ 7840.18129493 13070.28539111   790.72804752  5103.37501425]
New Q values:  [7840.18129493 7210.96087721  790.72804752 5103.37501425]
Reward: -1  Episode Reward:  23
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6417.78991099 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:38, Action:North
State  288
Old Q Values:  [ 6417.78991099 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [ 5987.10493485 -4059.26960032 -5588.09647059  6611.48906921]
Reward: -1  Episode Reward:  22
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6547.01737408 11401.96323484 -4584.50430574 -1049.83093042]
------
Step:39, Action:South
State  210
Old Q Values:  [7840.18129493 7210.96087721  790.72804752 5103.37501425]
New Q values:  [7840.18129493 4867.23107164  790.72804752 5103.37501425]
Reward: -1  Episode Reward:  21
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5987.10493485 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:40, Action:North
State  288
Old Q Values:  [ 5987.10493485 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [ 4746.29636242 -4059.26960032 -5588.09647059  6611.48906921]
Reward: -1  Episode Reward:  20
xxxxx
x  .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7840.18129493 4867.23107164  790.72804752 5103.37501425]
------
Step:41, Action:North
State  210
Old Q Values:  [7840.18129493 4867.23107164  790.72804752 5103.37501425]
New Q values:  [82588.6960484   4867.23107164   790.72804752  5103.37501425]
Reward: 100009  Episode Reward:  100029
xxxxx
x  ax
x   x
x g x
xxxxx
xxxxx
x..ax
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.75645302e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.75645302e+02]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.47831137e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  4142.17748273  3356.19921495]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   184.93866748  2071.49436903]
New Q values:  [ -281.736      -1150.91067548   516.86887668  2071.49436903]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x.g.x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.47831137e+03]
------
Step:3, Action:West
State  136
Old Q Values:  [ 878.22269011 3131.8464515   660.86649319  830.6837742 ]
New Q values:  [  878.22269011  3131.8464515    660.86649319 -4915.89987952]
Reward: -10001  Episode Reward:  -9993
xxxxx
x.g x
x...x
x.. x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[38762.73258603  4302.20715371 27466.98713531  2453.92999194]
------
Step:1, Action:North
State  196
Old Q Values:  [2557.84133319 1238.89434634 9065.27506381  231.67262594]
New Q values:  [1649.98484399 1238.89434634 9065.27506381  231.67262594]
Reward: 9  Episode Reward:  9
xxxxx
x.a.x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668  2071.49436903]
------
Step:2, Action:West
State  126
Old Q Values:  [  0.         331.64678262 681.20236034 899.75256751]
New Q values:  [   0.          331.64678262  681.20236034 2741.7188854 ]
Reward: 9  Episode Reward:  18
xxxxx
xa .x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 7921.39286133  656.34543534 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 7921.39286133  656.34543534 -120.29354603]
New Q values:  [-177.44732869 7016.57595533  656.34543534 -120.29354603]
Reward: -1  Episode Reward:  17
xxxxx
x  .x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  861.58025644  1357.09471455 12828.72936934  1554.80203889]
------
Step:4, Action:East
State  183
Old Q Values:  [  861.58025644  1357.09471455 12828.72936934  1554.80203889]
New Q values:  [  861.58025644  1357.09471455 10064.79483923  1554.80203889]
Reward: -1  Episode Reward:  16
xxxxx
x  .x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[16446.34363833  -200.61022961 11290.0677349      0.        ]
------
Step:5, Action:North
State  198
Old Q Values:  [16446.34363833  -200.61022961 11290.0677349      0.        ]
New Q values:  [ 7400.45312095  -200.61022961 11290.0677349      0.        ]
Reward: -1  Episode Reward:  15
xxxxx
x a.x
x  .x
xg..x
xxxxx
Step:6, Action:West
State  126
Old Q Values:  [   0.          331.64678262  681.20236034 2741.7188854 ]
New Q values:  [   0.          331.64678262  681.20236034 3201.06034076]
Reward: -1  Episode Reward:  14
xxxxx
xa .x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 7016.57595533  656.34543534 -120.29354603]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869 7016.57595533  656.34543534 -120.29354603]
New Q values:  [-177.44732869 5825.4688339   656.34543534 -120.29354603]
Reward: -1  Episode Reward:  13
xxxxx
x  .x
xa .x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[  861.58025644  1357.09471455 10064.79483923  1554.80203889]
------
Step:8, Action:East
State  189
Old Q Values:  [ 533.05203844 2927.0296359  5939.96662653  154.04646645]
New Q values:  [  533.05203844  2927.0296359  11601.37853626   154.04646645]
Reward: -1  Episode Reward:  12
xxxxx
x  .x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563 30753.30628551 -7266.86964969  1311.30124863]
------
Step:9, Action:South
State  199
Old Q Values:  [  826.42207367  1304.45200084 11260.80108661  1915.70494401]
New Q values:  [  826.42207367  1212.31833426 11260.80108661  1915.70494401]
Reward: 9  Episode Reward:  21
xxxxx
x  .x
x  .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 5.04537534e+02 2.28379178e+03 1.06376909e+03]
------
Step:10, Action:West
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  4163.52631256  2333.44557125]
New Q values:  [-2561.28592178 -5807.06396197  4163.52631256  2245.18728868]
Reward: 9  Episode Reward:  30
xxxxx
x  .x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4354.69686726  1352.37702619 -3346.86631277   -12.17474163]
------
Step:11, Action:North
State  260
Old Q Values:  [  599.85942333 -5704.51612281  2149.47028662 -5679.36893145]
New Q values:  [ 3521.44630416 -5704.51612281  2149.47028662 -5679.36893145]
Reward: -1  Episode Reward:  29
xxxxx
x  .x
xa .x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243 10940.34178277     0.        ]
------
Step:12, Action:East
State  183
Old Q Values:  [  861.58025644  1357.09471455 10064.79483923  1554.80203889]
New Q values:  [ 861.58025644 1357.09471455 7412.33825616 1554.80203889]
Reward: -1  Episode Reward:  28
xxxxx
x  .x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[ 7400.45312095  -200.61022961 11290.0677349      0.        ]
------
Step:13, Action:East
State  196
Old Q Values:  [1649.98484399 1238.89434634 9065.27506381  231.67262594]
New Q values:  [1649.98484399 1238.89434634 7052.09899598  231.67262594]
Reward: 9  Episode Reward:  37
xxxxx
x  .x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6547.01737408 11401.96323484 -4584.50430574 -1049.83093042]
------
Step:14, Action:South
State  210
Old Q Values:  [82588.6960484   4867.23107164   790.72804752  5103.37501425]
New Q values:  [82588.6960484   3935.73914942   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  46
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4746.29636242 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:15, Action:North
State  288
Old Q Values:  [ 4746.29636242 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [ 5318.50751542 -4059.26960032 -5588.09647059  6611.48906921]
Reward: -1  Episode Reward:  45
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6547.01737408 11401.96323484 -4584.50430574 -1049.83093042]
------
Step:16, Action:South
State  210
Old Q Values:  [82588.6960484   3935.73914942   790.72804752  5103.37501425]
New Q values:  [82588.6960484   3557.14238053   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  44
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5318.50751542 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:17, Action:North
State  288
Old Q Values:  [ 5318.50751542 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [26903.41182069 -4059.26960032 -5588.09647059  6611.48906921]
Reward: -1  Episode Reward:  43
xxxxx
x  .x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[82588.6960484   3557.14238053   790.72804752  5103.37501425]
------
Step:18, Action:North
State  210
Old Q Values:  [82588.6960484   3557.14238053   790.72804752  5103.37501425]
New Q values:  [112488.1019498    3557.14238053    790.72804752   5103.37501425]
Reward: 100009  Episode Reward:  100052
xxxxx
x  ax
x   x
x g x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4354.69686726  1352.37702619 -3346.86631277   -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [ 4354.69686726  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 6152.2141625   1352.37702619 -3346.86631277   -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x.g.x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2.35401054e+03 1.23686189e+03 1.46831181e+04 3.33862213e+00]
------
Step:2, Action:East
State  180
Old Q Values:  [ 1548.72903536  3060.24711158 11651.35219998 -4966.32149798]
New Q values:  [ 1548.72903536  3060.24711158  6775.57057879 -4966.32149798]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1649.98484399 1238.89434634 7052.09899598  231.67262594]
------
Step:3, Action:East
State  192
Old Q Values:  [38762.73258603  4302.20715371 27466.98713531  2453.92999194]
New Q values:  [38762.73258603  4302.20715371 14412.78382458  2453.92999194]
Reward: 9  Episode Reward:  17
xxxxx
x...x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6547.01737408 11401.96323484 -4584.50430574 -1049.83093042]
------
Step:4, Action:South
State  210
Old Q Values:  [112488.1019498    3557.14238053    790.72804752   5103.37501425]
New Q values:  [112488.1019498    9499.28049842    790.72804752   5103.37501425]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x   x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[26903.41182069 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:5, Action:North
State  288
Old Q Values:  [26903.41182069 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [44507.19531321 -4059.26960032 -5588.09647059  6611.48906921]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[112488.1019498    9499.28049842    790.72804752   5103.37501425]
------
Step:6, Action:North
State  210
Old Q Values:  [112488.1019498    9499.28049842    790.72804752   5103.37501425]
New Q values:  [64447.86431035  9499.28049842   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  34
xxxxx
x..ax
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377  6927.8788297   -180.00807518 64824.07843478]
------
Step:7, Action:West
State  130
Old Q Values:  [41234.48978377  6927.8788297   -180.00807518 64824.07843478]
New Q values:  [41234.48978377  6927.8788297   -180.00807518 64692.20804913]
Reward: 9  Episode Reward:  43
xxxxx
x.a x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 129190.58891738]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668  2071.49436903]
New Q values:  [ -281.736      -1150.91067548   516.86887668  2581.63839778]
Reward: 9  Episode Reward:  52
xxxxx
xa  x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5825.4688339   656.34543534 -120.29354603]
------
Step:9, Action:South
State  109
Old Q Values:  [ -241.10880094  4057.81726597 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  5102.94046727 -2165.66138672   232.50800947]
Reward: -1  Episode Reward:  51
xxxxx
x g x
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2927.0296359  11601.37853626   154.04646645]
------
Step:10, Action:East
State  188
Old Q Values:  [-6523.78898263  2298.3669598   1963.43704178     0.        ]
New Q values:  [-6523.78898263  2298.3669598   2341.27811661     0.        ]
Reward: -1  Episode Reward:  50
xxxxx
xg  x
x a x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  5188.34433299 1964.0000496   610.93635926]
------
Step:11, Action:South
State  192
Old Q Values:  [38762.73258603  4302.20715371 14412.78382458  2453.92999194]
New Q values:  [38762.73258603 84831.69553085 14412.78382458  2453.92999194]
Reward: 100009  Episode Reward:  100059
xxxxx
x g x
x   x
x a x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5825.4688339   656.34543534 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 7134.29511819 1868.2303995     0.        ]
New Q values:  [ 221.30610858 5082.81952413 1868.2303995     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 861.58025644 1357.09471455 7412.33825616 1554.80203889]
------
Step:2, Action:East
State  183
Old Q Values:  [ 861.58025644 1357.09471455 7412.33825616 1554.80203889]
New Q values:  [ 861.58025644 1357.09471455 6348.57562845 1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  826.42207367  1212.31833426 11260.80108661  1915.70494401]
------
Step:3, Action:East
State  194
Old Q Values:  [17070.53379332  7581.58548952 15278.89768492  2179.39995143]
New Q values:  [17070.53379332  7581.58548952 25451.31836707  2179.39995143]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x  ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[64447.86431035  9499.28049842   790.72804752  5103.37501425]
------
Step:4, Action:North
State  218
Old Q Values:  [1984.81865135 6329.78476486    0.          930.00701399]
New Q values:  [1242.82087023 6329.78476486    0.          930.00701399]
Reward: 9  Episode Reward:  36
xxxxx
x .ax
x   x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.47831137e+03]
------
Step:5, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.47831137e+03]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.37121607e+03]
Reward: 9  Episode Reward:  45
xxxxx
x a x
x   x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668  2581.63839778]
------
Step:6, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668  2581.63839778]
New Q values:  [ -281.736      -1150.91067548   516.86887668  1590.09333937]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 1860.12660087 -180.6       ]
------
Step:7, Action:East
State  106
Old Q Values:  [ -180.6        -6764.65639938   517.05242461  -180.6       ]
New Q values:  [ -180.6        -6764.65639938   683.24897166  -180.6       ]
Reward: -1  Episode Reward:  43
xxxxx
x a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668  1590.09333937]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668  1590.09333937]
New Q values:  [ -281.736      -1150.91067548   516.86887668  1193.47531601]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 1860.12660087 -180.6       ]
------
Step:9, Action:East
State  106
Old Q Values:  [ -180.6        -6764.65639938   683.24897166  -180.6       ]
New Q values:  [ -180.6        -6764.65639938   630.74218347  -180.6       ]
Reward: -1  Episode Reward:  41
xxxxx
x a x
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668  1193.47531601]
------
Step:10, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2508.08870266 -3947.298441  ]
New Q values:  [-9594.56523706 -8069.05606225  2508.08870266 -6275.22592205]
Reward: -10001  Episode Reward:  -9960
xxxxx
xg  x
x   x
x.. x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668  1193.47531601]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668  1193.47531601]
New Q values:  [ -281.736      -1150.91067548   516.86887668  1040.82810666]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1080.40897381 1860.12660087 -180.6       ]
------
Step:2, Action:East
State  110
Old Q Values:  [-239.29051573 1080.40897381 1860.12660087 -180.6       ]
New Q values:  [-239.29051573 1080.40897381 1055.69907235 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668  1040.82810666]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668  1040.82810666]
New Q values:  [ -281.736      -1150.91067548   516.86887668  2163.37189284]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5825.4688339   656.34543534 -120.29354603]
------
Step:4, Action:South
State  110
Old Q Values:  [-239.29051573 1080.40897381 1055.69907235 -180.6       ]
New Q values:  [-239.29051573 3719.66612436 1055.69907235 -180.6       ]
Reward: 9  Episode Reward:  16
xxxxx
x  .x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243 10940.34178277     0.        ]
------
Step:5, Action:East
State  183
Old Q Values:  [ 861.58025644 1357.09471455 6348.57562845 1554.80203889]
New Q values:  [ 861.58025644 1357.09471455 5931.85057185 1554.80203889]
Reward: 9  Episode Reward:  25
xxxxx
x  .x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[ 7400.45312095  -200.61022961 11290.0677349      0.        ]
------
Step:6, Action:East
State  199
Old Q Values:  [  826.42207367  1212.31833426 11260.80108661  1915.70494401]
New Q values:  [  826.42207367  1212.31833426 23844.07972775  1915.70494401]
Reward: 9  Episode Reward:  34
xxxxx
x  .x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[64447.86431035  9499.28049842   790.72804752  5103.37501425]
------
Step:7, Action:North
State  216
Old Q Values:  [ 1601.96115248  6228.21420906   606.149024   -2865.34274144]
New Q values:  [ 1057.54928067  6228.21420906   606.149024   -2865.34274144]
Reward: 9  Episode Reward:  43
xxxxx
x  ax
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.37121607e+03]
------
Step:8, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.37121607e+03]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.19689799e+03]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668  2163.37189284]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668  2163.37189284]
New Q values:  [ -281.736      -1150.91067548   516.86887668  1980.64859444]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
xg  x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 3719.66612436 1055.69907235 -180.6       ]
------
Step:10, Action:East
State  111
Old Q Values:  [-177.44732869 5825.4688339   656.34543534 -120.29354603]
New Q values:  [-177.44732869 5825.4688339   856.13275247 -120.29354603]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x g x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668  1980.64859444]
------
Step:11, Action:West
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  4142.17748273  3356.19921495]
New Q values:  [  -56.91790269 -1902.20915811  4142.17748273  3089.52033615]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
x  gx
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5825.4688339   856.13275247 -120.29354603]
------
Step:12, Action:South
State  111
Old Q Values:  [-177.44732869 5825.4688339   856.13275247 -120.29354603]
New Q values:  [-177.44732869 5810.00109444  856.13275247 -120.29354603]
Reward: -1  Episode Reward:  38
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2927.0296359  11601.37853626   154.04646645]
------
Step:13, Action:South
State  191
Old Q Values:  [   3.06655861 2868.45176852 2029.22435297    0.        ]
New Q values:  [   3.06655861 2992.44495616 2029.22435297    0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 6152.2141625   1352.37702619 -3346.86631277   -12.17474163]
------
Step:14, Action:North
State  261
Old Q Values:  [ 6152.2141625   1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 3358.01915185  1352.37702619 -3346.86631277   -12.17474163]
Reward: -1  Episode Reward:  36
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[   3.06655861 2992.44495616 2029.22435297    0.        ]
------
Step:15, Action:South
State  189
Old Q Values:  [  533.05203844  2927.0296359  11601.37853626   154.04646645]
New Q values:  [  533.05203844  2177.61759991 11601.37853626   154.04646645]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 3358.01915185  1352.37702619 -3346.86631277   -12.17474163]
------
Step:16, Action:North
State  261
Old Q Values:  [ 3358.01915185  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 2240.34114759  1352.37702619 -3346.86631277   -12.17474163]
Reward: -1  Episode Reward:  34
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[   3.06655861 2992.44495616 2029.22435297    0.        ]
------
Step:17, Action:South
State  191
Old Q Values:  [   3.06655861 2992.44495616 2029.22435297    0.        ]
New Q values:  [   3.06655861 1868.48032674 2029.22435297    0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2240.34114759  1352.37702619 -3346.86631277   -12.17474163]
------
Step:18, Action:North
State  261
Old Q Values:  [ 2240.34114759  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 4375.95001991  1352.37702619 -3346.86631277   -12.17474163]
Reward: -1  Episode Reward:  32
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2177.61759991 11601.37853626   154.04646645]
------
Step:19, Action:South
State  191
Old Q Values:  [   3.06655861 1868.48032674 2029.22435297    0.        ]
New Q values:  [   3.06655861 2059.57713667 2029.22435297    0.        ]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 4375.95001991  1352.37702619 -3346.86631277   -12.17474163]
------
Step:20, Action:North
State  261
Old Q Values:  [ 4375.95001991  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 5230.19356884  1352.37702619 -3346.86631277   -12.17474163]
Reward: -1  Episode Reward:  30
xxxxx
x   x
xag x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2177.61759991 11601.37853626   154.04646645]
------
Step:21, Action:South
State  191
Old Q Values:  [   3.06655861 2059.57713667 2029.22435297    0.        ]
New Q values:  [   3.06655861 2392.28892532 2029.22435297    0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 5230.19356884  1352.37702619 -3346.86631277   -12.17474163]
------
Step:22, Action:North
State  260
Old Q Values:  [ 3521.44630416 -5704.51612281  2149.47028662 -5679.36893145]
New Q values:  [ 1848.55637054 -5704.51612281  2149.47028662 -5679.36893145]
Reward: -1  Episode Reward:  28
xxxxx
x   x
xa  x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  1.46859283e+03  0.00000000e+00]
------
Step:23, Action:East
State  188
Old Q Values:  [-6523.78898263  2298.3669598   2341.27811661     0.        ]
New Q values:  [-6523.78898263  2298.3669598   2492.41454654     0.        ]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xga x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  5188.34433299 1964.0000496   610.93635926]
------
Step:24, Action:South
State  206
Old Q Values:  [   0.         2477.63911564 1644.59524509    0.        ]
New Q values:  [   0.         2245.51354002 1644.59524509    0.        ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  4163.52631256  2245.18728868]
------
Step:25, Action:East
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799  3219.04237595 77018.04223122]
New Q values:  [ 9275.12327166 -8521.23367799 74645.17554434 77018.04223122]
Reward: 100009  Episode Reward:  100045
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  5102.94046727 -2165.66138672   232.50800947]
------
Step:1, Action:South
State  111
Old Q Values:  [-177.44732869 5810.00109444  856.13275247 -120.29354603]
New Q values:  [-177.44732869 6734.33585337  856.13275247 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x . x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2.35401054e+03 1.23686189e+03 1.46831181e+04 3.33862213e+00]
------
Step:2, Action:East
State  181
Old Q Values:  [2.35401054e+03 1.23686189e+03 1.46831181e+04 3.33862213e+00]
New Q values:  [2354.01053875 1236.86188545 1435.15052069    3.33862213]
Reward: -9991  Episode Reward:  -9982
xxxxx
x . x
x g.x
x...x
xxxxx
xxxxx
x...x
x...x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[44507.19531321 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:1, Action:North
State  288
Old Q Values:  [44507.19531321 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [37142.63741839 -4059.26960032 -5588.09647059  6611.48906921]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x..ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[64447.86431035  9499.28049842   790.72804752  5103.37501425]
------
Step:2, Action:North
State  210
Old Q Values:  [64447.86431035  9499.28049842   790.72804752  5103.37501425]
New Q values:  [26143.61512237  9499.28049842   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.19689799e+03]
------
Step:3, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.19689799e+03]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.07835378e+03]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
xg. x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668  1980.64859444]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668  1980.64859444]
New Q values:  [ -281.736      -1150.91067548   516.86887668  1336.36185033]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x.g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1795.6747085   223.04911789 -252.78192178]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 6734.33585337  856.13275247 -120.29354603]
New Q values:  [-177.44732869 3405.33750297  856.13275247 -120.29354603]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xa.gx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2354.01053875 1236.86188545 1435.15052069    3.33862213]
------
Step:6, Action:North
State  183
Old Q Values:  [ 861.58025644 1357.09471455 5931.85057185 1554.80203889]
New Q values:  [1365.63335347 1357.09471455 5931.85057185 1554.80203889]
Reward: -1  Episode Reward:  44
xxxxx
xa  x
x . x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3405.33750297  856.13275247 -120.29354603]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869 3405.33750297  856.13275247 -120.29354603]
New Q values:  [-177.44732869 3141.09017274  856.13275247 -120.29354603]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xa. x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[1365.63335347 1357.09471455 5931.85057185 1554.80203889]
------
Step:8, Action:East
State  190
Old Q Values:  [ 1.04129094e+00 -7.77507115e+03  1.46859283e+03  0.00000000e+00]
New Q values:  [ 1.04129094e+00 -7.77507115e+03  1.14108275e+03  0.00000000e+00]
Reward: 9  Episode Reward:  52
xxxxx
x   x
x a x
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:9, Action:East
State  194
Old Q Values:  [17070.53379332  7581.58548952 25451.31836707  2179.39995143]
New Q values:  [17070.53379332  7581.58548952 18023.01188354  2179.39995143]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[26143.61512237  9499.28049842   790.72804752  5103.37501425]
------
Step:10, Action:North
State  216
Old Q Values:  [ 1057.54928067  6228.21420906   606.149024   -2865.34274144]
New Q values:  [  745.92584506  6228.21420906   606.149024   -2865.34274144]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.07835378e+03]
------
Step:11, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.07835378e+03]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  8.31650065e+02]
Reward: -1  Episode Reward:  49
xxxxx
x a x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668  1336.36185033]
------
Step:12, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668  1336.36185033]
New Q values:  [ -281.736      -1150.91067548   516.86887668  1476.27179195]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3141.09017274  856.13275247 -120.29354603]
------
Step:13, Action:South
State  110
Old Q Values:  [-239.29051573 3719.66612436 1055.69907235 -180.6       ]
New Q values:  [ -239.29051573 -3765.00918629  1055.69907235  -180.6       ]
Reward: -10001  Episode Reward:  -9953
xxxxx
x   x
xg  x
x . x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2354.01053875 1236.86188545 1435.15052069    3.33862213]
------
Step:1, Action:North
State  183
Old Q Values:  [1365.63335347 1357.09471455 5931.85057185 1554.80203889]
New Q values:  [2076.49919863 1357.09471455 5931.85057185 1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
xa..x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 5082.81952413 1868.2303995     0.        ]
------
Step:2, Action:South
State  110
Old Q Values:  [ -239.29051573 -3765.00918629  1055.69907235  -180.6       ]
New Q values:  [-239.29051573 1775.49886031 1055.69907235 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
xa .x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243 10940.34178277     0.        ]
------
Step:3, Action:East
State  180
Old Q Values:  [ 1548.72903536  3060.24711158  6775.57057879 -4966.32149798]
New Q values:  [ 1548.72903536  3060.24711158  4825.25793031 -4966.32149798]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1649.98484399 1238.89434634 7052.09899598  231.67262594]
------
Step:4, Action:East
State  194
Old Q Values:  [17070.53379332  7581.58548952 18023.01188354  2179.39995143]
New Q values:  [17070.53379332  7581.58548952 15057.68929013  2179.39995143]
Reward: 9  Episode Reward:  16
xxxxx
x ..x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[26143.61512237  9499.28049842   790.72804752  5103.37501425]
------
Step:5, Action:North
State  216
Old Q Values:  [  745.92584506  6228.21420906   606.149024   -2865.34274144]
New Q values:  [  553.26535767  6228.21420906   606.149024   -2865.34274144]
Reward: 9  Episode Reward:  25
xxxxx
x .ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  8.31650065e+02]
------
Step:6, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  8.31650065e+02]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  7.80941564e+02]
Reward: 9  Episode Reward:  34
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668  1476.27179195]
------
Step:7, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668  1476.27179195]
New Q values:  [ -281.736      -1150.91067548   516.86887668  1122.55837488]
Reward: -1  Episode Reward:  33
xxxxx
xa  x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1775.49886031 1055.69907235 -180.6       ]
------
Step:8, Action:East
State  110
Old Q Values:  [-239.29051573 1775.49886031 1055.69907235 -180.6       ]
New Q values:  [-239.29051573 1775.49886031  758.4471414  -180.6       ]
Reward: -1  Episode Reward:  32
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668  1122.55837488]
------
Step:9, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668  1122.55837488]
New Q values:  [ -281.736      -1150.91067548   516.86887668   981.07300804]
Reward: -1  Episode Reward:  31
xxxxx
xa  x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1775.49886031  758.4471414  -180.6       ]
------
Step:10, Action:East
State  110
Old Q Values:  [-239.29051573 1775.49886031  758.4471414  -180.6       ]
New Q values:  [-239.29051573 1775.49886031  597.10075897 -180.6       ]
Reward: -1  Episode Reward:  30
xxxxx
x a x
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668   981.07300804]
------
Step:11, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668   981.07300804]
New Q values:  [ -281.736      -1150.91067548   516.86887668   924.47886131]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1775.49886031  597.10075897 -180.6       ]
------
Step:12, Action:East
State  111
Old Q Values:  [-177.44732869 3141.09017274  856.13275247 -120.29354603]
New Q values:  [-177.44732869 3141.09017274  619.19675938 -120.29354603]
Reward: -1  Episode Reward:  28
xxxxx
x a x
x g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668   924.47886131]
------
Step:13, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668   924.47886131]
New Q values:  [ -281.736      -1150.91067548   516.86887668   907.89395707]
Reward: -1  Episode Reward:  27
xxxxx
xa  x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1795.6747085   223.04911789 -252.78192178]
------
Step:14, Action:South
State  111
Old Q Values:  [-177.44732869 3141.09017274  619.19675938 -120.29354603]
New Q values:  [-177.44732869 4736.24962998  619.19675938 -120.29354603]
Reward: -1  Episode Reward:  26
xxxxx
x   x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2177.61759991 11601.37853626   154.04646645]
------
Step:15, Action:South
State  188
Old Q Values:  [-6523.78898263  2298.3669598   2492.41454654     0.        ]
New Q values:  [-6523.78898263  1569.58786991  2492.41454654     0.        ]
Reward: 9  Episode Reward:  35
xxxxx
x   x
xg  x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1848.55637054 -5704.51612281  2149.47028662 -5679.36893145]
------
Step:16, Action:East
State  260
Old Q Values:  [ 1848.55637054 -5704.51612281  2149.47028662 -5679.36893145]
New Q values:  [ 1848.55637054 -5704.51612281  2114.24600842 -5679.36893145]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  4163.52631256  2245.18728868]
------
Step:17, Action:East
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799 74645.17554434 77018.04223122]
New Q values:  [  9275.12327166  -8521.23367799 101006.26144325  77018.04223122]
Reward: 100009  Episode Reward:  100053
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243 10940.34178277     0.        ]
------
Step:1, Action:East
State  183
Old Q Values:  [2076.49919863 1357.09471455 5931.85057185 1554.80203889]
New Q values:  [2076.49919863 1357.09471455 7499.30036674 1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[17070.53379332  7581.58548952 15057.68929013  2179.39995143]
------
Step:2, Action:North
State  196
Old Q Values:  [1649.98484399 1238.89434634 7052.09899598  231.67262594]
New Q values:  [1625.71203982 1238.89434634 7052.09899598  231.67262594]
Reward: 9  Episode Reward:  18
xxxxx
x.a.x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262  681.20236034 3201.06034076]
------
Step:3, Action:West
State  124
Old Q Values:  [   0.         1166.51141701 2170.86014946 1772.94838375]
New Q values:  [   0.         1166.51141701 2170.86014946 2245.46149368]
Reward: 9  Episode Reward:  27
xxxxx
xag.x
x  .x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  5102.94046727 -2165.66138672   232.50800947]
------
Step:4, Action:South
State  108
Old Q Values:  [-8463.16477134  3395.06906969  1637.39424494     0.        ]
New Q values:  [-8463.16477134  2805.00500697  1637.39424494     0.        ]
Reward: -1  Episode Reward:  26
xxxxx
xg .x
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 1548.72903536  3060.24711158  4825.25793031 -4966.32149798]
------
Step:5, Action:East
State  181
Old Q Values:  [2354.01053875 1236.86188545 1435.15052069    3.33862213]
New Q values:  [2354.01053875 1236.86188545 2689.08990707    3.33862213]
Reward: -1  Episode Reward:  25
xxxxx
x g.x
x a.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1625.71203982 1238.89434634 7052.09899598  231.67262594]
------
Step:6, Action:East
State  196
Old Q Values:  [1625.71203982 1238.89434634 7052.09899598  231.67262594]
New Q values:  [1625.71203982 1238.89434634 4694.70386111  231.67262594]
Reward: 9  Episode Reward:  34
xxxxx
x  .x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  553.26535767  6228.21420906   606.149024   -2865.34274144]
------
Step:7, Action:South
State  208
Old Q Values:  [ 6547.01737408 11401.96323484 -4584.50430574 -1049.83093042]
New Q values:  [ 6547.01737408 15708.97651945 -4584.50430574 -1049.83093042]
Reward: 9  Episode Reward:  43
xxxxx
x  .x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[37142.63741839 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:8, Action:North
State  288
Old Q Values:  [37142.63741839 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [22699.53950407 -4059.26960032 -5588.09647059  6611.48906921]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[26143.61512237  9499.28049842   790.72804752  5103.37501425]
------
Step:9, Action:North
State  216
Old Q Values:  [  553.26535767  6228.21420906   606.149024   -2865.34274144]
New Q values:  [  460.9886122   6228.21420906   606.149024   -2865.34274144]
Reward: 9  Episode Reward:  51
xxxxx
x  ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  7.80941564e+02]
------
Step:10, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  7.80941564e+02]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.84144813e+02]
Reward: -1  Episode Reward:  50
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668   907.89395707]
------
Step:11, Action:West
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  4142.17748273  3089.52033615]
New Q values:  [  -56.91790269 -1902.20915811  4142.17748273  2656.08302345]
Reward: -1  Episode Reward:  49
xxxxx
xa  x
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4736.24962998  619.19675938 -120.29354603]
------
Step:12, Action:South
State  111
Old Q Values:  [-177.44732869 4736.24962998  619.19675938 -120.29354603]
New Q values:  [-177.44732869 2611.58652959  619.19675938 -120.29354603]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[   3.06655861 2392.28892532 2029.22435297    0.        ]
------
Step:13, Action:South
State  179
Old Q Values:  [82228.67666629 16101.90751562 50537.28135001     0.        ]
New Q values:  [82228.67666629 18995.54020685 50537.28135001     0.        ]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41851.25733533 15941.62716192 13169.98702937  1875.31501677]
------
Step:14, Action:North
State  261
Old Q Values:  [ 5230.19356884  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 2809.16410513  1352.37702619 -3346.86631277   -12.17474163]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SE
[   3.06655861 2392.28892532 2029.22435297    0.        ]
------
Step:15, Action:South
State  189
Old Q Values:  [  533.05203844  2177.61759991 11601.37853626   154.04646645]
New Q values:  [  533.05203844  1713.19627151 11601.37853626   154.04646645]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x  gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 2809.16410513  1352.37702619 -3346.86631277   -12.17474163]
------
Step:16, Action:North
State  261
Old Q Values:  [ 2809.16410513  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [ 4603.47920293  1352.37702619 -3346.86631277   -12.17474163]
Reward: -1  Episode Reward:  44
xxxxx
x  gx
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  1713.19627151 11601.37853626   154.04646645]
------
Step:17, Action:East
State  189
Old Q Values:  [  533.05203844  1713.19627151 11601.37853626   154.04646645]
New Q values:  [  533.05203844  1713.19627151 13865.94330016   154.04646645]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x agx
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563 30753.30628551 -7266.86964969  1311.30124863]
------
Step:18, Action:South
State  193
Old Q Values:  [38280.11569458  3592.96905128 11789.21969382   767.35890262]
New Q values:  [38280.11569458 66969.3937361  11789.21969382   767.35890262]
Reward: 100009  Episode Reward:  100052
xxxxx
x  gx
x   x
x a x
xxxxx
xxxxx
xa..x
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2611.58652959  619.19675938 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 5082.81952413 1868.2303995     0.        ]
New Q values:  [ 221.30610858 2845.25478177 1868.2303995     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2354.01053875 1236.86188545 2689.08990707    3.33862213]
------
Step:2, Action:East
State  183
Old Q Values:  [2076.49919863 1357.09471455 7499.30036674 1554.80203889]
New Q values:  [ 2076.49919863  1357.09471455 10152.34406502  1554.80203889]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  826.42207367  1212.31833426 23844.07972775  1915.70494401]
------
Step:3, Action:East
State  194
Old Q Values:  [17070.53379332  7581.58548952 15057.68929013  2179.39995143]
New Q values:  [17070.53379332  7581.58548952 13871.56025276  2179.39995143]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[26143.61512237  9499.28049842   790.72804752  5103.37501425]
------
Step:4, Action:North
State  218
Old Q Values:  [1242.82087023 6329.78476486    0.          930.00701399]
New Q values:  [ 677.77179188 6329.78476486    0.          930.00701399]
Reward: 9  Episode Reward:  26
xxxxx
x .ax
x   x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.84144813e+02]
------
Step:5, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.84144813e+02]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.11426112e+02]
Reward: 9  Episode Reward:  35
xxxxx
x a x
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   516.86887668   907.89395707]
------
Step:6, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2508.08870266 -6275.22592205]
New Q values:  [-9594.56523706 -8069.05606225  2508.08870266 -7669.18886673]
Reward: -10001  Episode Reward:  -9966
xxxxx
xg  x
x   x
x...x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[ 1848.55637054 -5704.51612281  2114.24600842 -5679.36893145]
------
Step:1, Action:East
State  261
Old Q Values:  [ 4603.47920293  1352.37702619 -3346.86631277   -12.17474163]
New Q values:  [4603.47920293 1352.37702619  -84.28863134  -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  4163.52631256  2245.18728868]
------
Step:2, Action:East
State  273
Old Q Values:  [ 1188.86709371  1201.72649971   443.99421868 18422.68705195]
New Q values:  [ 1188.86709371  1201.72649971  6992.85953869 18422.68705195]
Reward: 9  Episode Reward:  18
xxxxx
x .gx
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[22699.53950407 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:3, Action:North
State  288
Old Q Values:  [22699.53950407 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [ 7797.90875746 -4059.26960032 -5588.09647059  6611.48906921]
Reward: -9991  Episode Reward:  -9973
xxxxx
x ..x
x..gx
x   x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[54522.32013219 16053.62807234  8240.17937465  1169.39963074]
------
Step:1, Action:North
State  193
Old Q Values:  [38280.11569458 66969.3937361  11789.21969382   767.35890262]
New Q values:  [16560.09952265 66969.3937361  11789.21969382   767.35890262]
Reward: 9  Episode Reward:  9
xxxxx
x.a.x
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  4142.17748273  2656.08302345]
------
Step:2, Action:East
State  121
Old Q Values:  [    0.             0.         -8209.41191864   273.64612079]
New Q values:  [    0.             0.         -8338.81083201   273.64612079]
Reward: -9991  Episode Reward:  -9982
xxxxx
x. gx
x. .x
x.. x
xxxxx
xxxxx
x..ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.11426112e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.11426112e+02]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.45262369e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  4142.17748273  2656.08302345]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   516.86887668   907.89395707]
New Q values:  [ -281.736      -1150.91067548   641.93465758   907.89395707]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.45262369e+03]
------
Step:3, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.45262369e+03]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  8.52817663e+02]
Reward: -1  Episode Reward:  7
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   641.93465758   907.89395707]
------
Step:4, Action:West
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  4142.17748273  2656.08302345]
New Q values:  [  -56.91790269 -1902.20915811  4142.17748273  1606.53562193]
Reward: 9  Episode Reward:  16
xxxxx
xa  x
x..gx
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1795.6747085   223.04911789 -252.78192178]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 2611.58652959  619.19675938 -120.29354603]
New Q values:  [-177.44732869 4095.73783134  619.19675938 -120.29354603]
Reward: 9  Episode Reward:  25
xxxxx
x   x
xa..x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2076.49919863  1357.09471455 10152.34406502  1554.80203889]
------
Step:6, Action:East
State  183
Old Q Values:  [ 2076.49919863  1357.09471455 10152.34406502  1554.80203889]
New Q values:  [2076.49919863 1357.09471455 7453.35794648 1554.80203889]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[ 7400.45312095  -200.61022961 11290.0677349      0.        ]
------
Step:7, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.28201863e+04 2.46738310e+03 4.59156348e+03]
New Q values:  [3.60604218e+00 1.28201863e+04 2.89128867e+03 4.59156348e+03]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x  ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 677.77179188 6329.78476486    0.          930.00701399]
------
Step:8, Action:West
State  216
Old Q Values:  [  460.9886122   6228.21420906   606.149024   -2865.34274144]
New Q values:  [ 460.9886122  6228.21420906  606.149024   8079.25478908]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563 30753.30628551 -7266.86964969  1311.30124863]
------
Step:9, Action:South
State  200
Old Q Values:  [ 169.9257398  5188.34433299 1964.0000496   610.93635926]
New Q values:  [  169.9257398  32376.61616617  1964.0000496    610.93635926]
Reward: -1  Episode Reward:  41
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  9275.12327166  -8521.23367799 101006.26144325  77018.04223122]
------
Step:10, Action:East
State  272
Old Q Values:  [  9275.12327166  -8521.23367799 101006.26144325  77018.04223122]
New Q values:  [ 9275.12327166 -8521.23367799 42747.27720454 77018.04223122]
Reward: 9  Episode Reward:  50
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7797.90875746 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:11, Action:North
State  288
Old Q Values:  [ 7797.90875746 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [ 5542.33993971 -4059.26960032 -5588.09647059  6611.48906921]
Reward: -1  Episode Reward:  49
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 460.9886122  6228.21420906  606.149024   8079.25478908]
------
Step:12, Action:West
State  216
Old Q Values:  [ 460.9886122  6228.21420906  606.149024   8079.25478908]
New Q values:  [  460.9886122   6228.21420906   606.149024   12944.08676548]
Reward: -1  Episode Reward:  48
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  32376.61616617  1964.0000496    610.93635926]
------
Step:13, Action:South
State  200
Old Q Values:  [  169.9257398  32376.61616617  1964.0000496    610.93635926]
New Q values:  [  169.9257398  36055.45913583  1964.0000496    610.93635926]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799 42747.27720454 77018.04223122]
------
Step:14, Action:West
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799 42747.27720454 77018.04223122]
New Q values:  [  9275.12327166  -8521.23367799  42747.27720454 103367.99409309]
Reward: 100009  Episode Reward:  100056
xxxxx
x   x
x   x
xag x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[38762.73258603 84831.69553085 14412.78382458  2453.92999194]
------
Step:1, Action:South
State  192
Old Q Values:  [38762.73258603 84831.69553085 14412.78382458  2453.92999194]
New Q values:  [38762.73258603 64948.47644027 14412.78382458  2453.92999194]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  9275.12327166  -8521.23367799  42747.27720454 103367.99409309]
------
Step:2, Action:West
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  4163.52631256  2245.18728868]
New Q values:  [-2561.28592178 -5807.06396197  4163.52631256  2284.51867635]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4603.47920293 1352.37702619  -84.28863134  -12.17474163]
------
Step:3, Action:North
State  261
Old Q Values:  [4603.47920293 1352.37702619  -84.28863134  -12.17474163]
New Q values:  [4076.79906512 1352.37702619  -84.28863134  -12.17474163]
Reward: -1  Episode Reward:  17
xxxxx
x...x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2076.49919863 1357.09471455 7453.35794648 1554.80203889]
------
Step:4, Action:East
State  183
Old Q Values:  [2076.49919863 1357.09471455 7453.35794648 1554.80203889]
New Q values:  [2076.49919863 1357.09471455 8101.90331659 1554.80203889]
Reward: -1  Episode Reward:  16
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[17070.53379332  7581.58548952 13871.56025276  2179.39995143]
------
Step:5, Action:North
State  196
Old Q Values:  [1625.71203982 1238.89434634 4694.70386111  231.67262594]
New Q values:  [1616.00291816 1238.89434634 4694.70386111  231.67262594]
Reward: 9  Episode Reward:  25
xxxxx
x.a.x
x g.x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262  681.20236034 3201.06034076]
------
Step:6, Action:West
State  119
Old Q Values:  [   0.          465.4216645     0.         2736.92405632]
New Q values:  [   0.          465.4216645     0.         1953.74605706]
Reward: 9  Episode Reward:  34
xxxxx
xa .x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2845.25478177 1868.2303995     0.        ]
------
Step:7, Action:South
State  111
Old Q Values:  [-177.44732869 4095.73783134  619.19675938 -120.29354603]
New Q values:  [-177.44732869 2444.42210466  619.19675938 -120.29354603]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
xag.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2354.01053875 1236.86188545 2689.08990707    3.33862213]
------
Step:8, Action:North
State  181
Old Q Values:  [2354.01053875 1236.86188545 2689.08990707    3.33862213]
New Q values:  [1794.58065003 1236.86188545 2689.08990707    3.33862213]
Reward: -1  Episode Reward:  32
xxxxx
xa .x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 2845.25478177 1868.2303995     0.        ]
------
Step:9, Action:South
State  109
Old Q Values:  [ -241.10880094  5102.94046727 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  2847.30315903 -2165.66138672   232.50800947]
Reward: -1  Episode Reward:  31
xxxxx
x  gx
xa .x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1794.58065003 1236.86188545 2689.08990707    3.33862213]
------
Step:10, Action:East
State  181
Old Q Values:  [1794.58065003 1236.86188545 2689.08990707    3.33862213]
New Q values:  [1794.58065003 1236.86188545 2483.44712116    3.33862213]
Reward: -1  Episode Reward:  30
xxxxx
x g.x
x a.x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1616.00291816 1238.89434634 4694.70386111  231.67262594]
------
Step:11, Action:East
State  205
Old Q Values:  [   0.         1285.92441063    0.          198.38683706]
New Q values:  [   0.         1285.92441063 3888.62602964  198.38683706]
Reward: 9  Episode Reward:  39
xxxxx
x  gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  460.9886122   6228.21420906   606.149024   12944.08676548]
------
Step:12, Action:West
State  208
Old Q Values:  [ 6547.01737408 15708.97651945 -4584.50430574 -1049.83093042]
New Q values:  [ 6547.01737408 15708.97651945 -4584.50430574   987.87878616]
Reward: -1  Episode Reward:  38
xxxxx
x g.x
x a x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1616.00291816 1238.89434634 4694.70386111  231.67262594]
------
Step:13, Action:East
State  205
Old Q Values:  [   0.         1285.92441063 3888.62602964  198.38683706]
New Q values:  [   0.         1285.92441063 5438.0764415   198.38683706]
Reward: -1  Episode Reward:  37
xxxxx
x  gx
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  460.9886122   6228.21420906   606.149024   12944.08676548]
------
Step:14, Action:West
State  208
Old Q Values:  [ 6547.01737408 15708.97651945 -4584.50430574   987.87878616]
New Q values:  [ 6547.01737408 15708.97651945 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  36
xxxxx
x  .x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[-5833.78831344  1394.50148356 -6341.45812961   403.06255908]
------
Step:15, Action:South
State  196
Old Q Values:  [1616.00291816 1238.89434634 4694.70386111  231.67262594]
New Q values:  [1616.00291816 1744.0156323  4694.70386111  231.67262594]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  4163.52631256  2284.51867635]
------
Step:16, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  4163.52631256  2284.51867635]
New Q values:  [-2561.28592178 -5807.06396197  3654.25724578  2284.51867635]
Reward: 9  Episode Reward:  44
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5542.33993971 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:17, Action:North
State  288
Old Q Values:  [ 5542.33993971 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [10059.42051259 -4059.26960032 -5588.09647059  6611.48906921]
Reward: -1  Episode Reward:  43
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[26143.61512237  9499.28049842   790.72804752  5103.37501425]
------
Step:18, Action:North
State  210
Old Q Values:  [26143.61512237  9499.28049842   790.72804752  5103.37501425]
New Q values:  [89870.50846368  9499.28049842   790.72804752  5103.37501425]
Reward: 100009  Episode Reward:  100052
xxxxx
x  ax
x   x
x g x
xxxxx
xxxxx
x.a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   641.93465758   907.89395707]
------
Step:1, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   641.93465758   907.89395707]
New Q values:  [ -281.736      -1150.91067548   641.93465758   901.20724092]
Reward: 9  Episode Reward:  9
xxxxx
xa .x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1775.49886031  597.10075897 -180.6       ]
------
Step:2, Action:East
State  110
Old Q Values:  [-239.29051573 1775.49886031  597.10075897 -180.6       ]
New Q values:  [-239.29051573 1775.49886031  508.60247587 -180.6       ]
Reward: -1  Episode Reward:  8
xxxxx
x a.x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   641.93465758   901.20724092]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   641.93465758   901.20724092]
New Q values:  [ -281.736      -1150.91067548   641.93465758  1093.20952777]
Reward: -1  Episode Reward:  7
xxxxx
xa .x
x...x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2444.42210466  619.19675938 -120.29354603]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 2444.42210466  619.19675938 -120.29354603]
New Q values:  [-177.44732869 3413.73983684  619.19675938 -120.29354603]
Reward: 9  Episode Reward:  16
xxxxx
x  .x
xa..x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2076.49919863 1357.09471455 8101.90331659 1554.80203889]
------
Step:5, Action:East
State  183
Old Q Values:  [2076.49919863 1357.09471455 8101.90331659 1554.80203889]
New Q values:  [2076.49919863 1357.09471455 6633.18164711 1554.80203889]
Reward: 9  Episode Reward:  25
xxxxx
x  .x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[ 7400.45312095  -200.61022961 11290.0677349      0.        ]
------
Step:6, Action:East
State  196
Old Q Values:  [1616.00291816 1744.0156323  4694.70386111  231.67262594]
New Q values:  [1616.00291816 1744.0156323  5766.50757409  231.67262594]
Reward: 9  Episode Reward:  34
xxxxx
x  .x
x gax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  460.9886122   6228.21420906   606.149024   12944.08676548]
------
Step:7, Action:South
State  208
Old Q Values:  [ 6547.01737408 15708.97651945 -4584.50430574   812.90195953]
New Q values:  [ 6547.01737408  9306.81676156 -4584.50430574   812.90195953]
Reward: 9  Episode Reward:  43
xxxxx
x g.x
x   x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10059.42051259 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:8, Action:North
State  288
Old Q Values:  [10059.42051259 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [ 6815.2132335  -4059.26960032 -5588.09647059  6611.48906921]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x gax
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6547.01737408  9306.81676156 -4584.50430574   812.90195953]
------
Step:9, Action:South
State  208
Old Q Values:  [ 6547.01737408  9306.81676156 -4584.50430574   812.90195953]
New Q values:  [ 6547.01737408  5766.69067468 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
xg  x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6815.2132335  -4059.26960032 -5588.09647059  6611.48906921]
------
Step:10, Action:North
State  288
Old Q Values:  [ 6815.2132335  -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [29686.63783251 -4059.26960032 -5588.09647059  6611.48906921]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[89870.50846368  9499.28049842   790.72804752  5103.37501425]
------
Step:11, Action:North
State  216
Old Q Values:  [  460.9886122   6228.21420906   606.149024   12944.08676548]
New Q values:  [  445.64074378  6228.21420906   606.149024   12944.08676548]
Reward: 9  Episode Reward:  49
xxxxx
x  ax
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  8.52817663e+02]
------
Step:12, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  8.52817663e+02]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  6.68489924e+02]
Reward: -1  Episode Reward:  48
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   641.93465758  1093.20952777]
------
Step:13, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   641.93465758  1093.20952777]
New Q values:  [ -281.736      -1150.91067548   641.93465758   969.3334692 ]
Reward: -1  Episode Reward:  47
xxxxx
xa  x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1775.49886031  508.60247587 -180.6       ]
------
Step:14, Action:East
State  110
Old Q Values:  [-239.29051573 1775.49886031  508.60247587 -180.6       ]
New Q values:  [-239.29051573 1775.49886031  493.64103111 -180.6       ]
Reward: -1  Episode Reward:  46
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   641.93465758   969.3334692 ]
------
Step:15, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   641.93465758   969.3334692 ]
New Q values:  [ -281.736      -1150.91067548   641.93465758   919.78304577]
Reward: -1  Episode Reward:  45
xxxxx
xa  x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1775.49886031  493.64103111 -180.6       ]
------
Step:16, Action:East
State  110
Old Q Values:  [-239.29051573 1775.49886031  493.64103111 -180.6       ]
New Q values:  [-239.29051573 1775.49886031  472.79132618 -180.6       ]
Reward: -1  Episode Reward:  44
xxxxx
x a x
x   x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   641.93465758   919.78304577]
------
Step:17, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   641.93465758   919.78304577]
New Q values:  [ -281.736      -1150.91067548   641.93465758   899.9628764 ]
Reward: -1  Episode Reward:  43
xxxxx
xa  x
xg  x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1775.49886031  472.79132618 -180.6       ]
------
Step:18, Action:East
State  111
Old Q Values:  [-177.44732869 3413.73983684  619.19675938 -120.29354603]
New Q values:  [-177.44732869 3413.73983684  517.06756667 -120.29354603]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   641.93465758   899.9628764 ]
------
Step:19, Action:West
State  114
Old Q Values:  [  -180.6          3557.6642036   49543.89769946 129190.58891738]
New Q values:  [ -180.6         3557.6642036  49543.89769946 69359.18702831]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NW ghost_dir:SE
[    0.         32370.3103544  58945.17153785     0.        ]
------
Step:20, Action:East
State  111
Old Q Values:  [-177.44732869 3413.73983684  517.06756667 -120.29354603]
New Q values:  [-177.44732869 3413.73983684  476.21588959 -120.29354603]
Reward: -1  Episode Reward:  40
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   641.93465758   899.9628764 ]
------
Step:21, Action:West
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  4142.17748273  1606.53562193]
New Q values:  [  -56.91790269 -1902.20915811  4142.17748273  1666.13619982]
Reward: -1  Episode Reward:  39
xxxxx
xa  x
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3413.73983684  476.21588959 -120.29354603]
------
Step:22, Action:South
State  109
Old Q Values:  [ -241.10880094  2847.30315903 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  5298.10425366 -2165.66138672   232.50800947]
Reward: -1  Episode Reward:  38
xxxxx
x  gx
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  1713.19627151 13865.94330016   154.04646645]
------
Step:23, Action:East
State  189
Old Q Values:  [  533.05203844  1713.19627151 13865.94330016   154.04646645]
New Q values:  [  533.05203844  1713.19627151 16362.41506081   154.04646645]
Reward: -1  Episode Reward:  37
xxxxx
x g x
x a x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  36055.45913583  1964.0000496    610.93635926]
------
Step:24, Action:South
State  193
Old Q Values:  [16560.09952265 66969.3937361  11789.21969382   767.35890262]
New Q values:  [16560.09952265 92319.96361003 11789.21969382   767.35890262]
Reward: 100009  Episode Reward:  100046
xxxxx
x  gx
x   x
x a x
xxxxx
xxxxx
x..gx
x..ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6547.01737408  5766.69067468 -4584.50430574   812.90195953]
------
Step:1, Action:South
State  208
Old Q Values:  [ 6547.01737408  5766.69067468 -4584.50430574   812.90195953]
New Q values:  [ 6547.01737408 11218.06761962 -4584.50430574   812.90195953]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[29686.63783251 -4059.26960032 -5588.09647059  6611.48906921]
------
Step:2, Action:West
State  288
Old Q Values:  [29686.63783251 -4059.26960032 -5588.09647059  6611.48906921]
New Q values:  [29686.63783251 -4059.26960032 -5588.09647059  8176.80174327]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x..gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971  6992.85953869 18422.68705195]
------
Step:3, Action:West
State  272
Old Q Values:  [  9275.12327166  -8521.23367799  42747.27720454 103367.99409309]
New Q values:  [ 9275.12327166 -8521.23367799 42747.27720454 53907.97483783]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[41851.25733533 15941.62716192 13169.98702937  1875.31501677]
------
Step:4, Action:North
State  257
Old Q Values:  [41851.25733533 15941.62716192 13169.98702937  1875.31501677]
New Q values:  [34888.71834854 15941.62716192 13169.98702937  1875.31501677]
Reward: 9  Episode Reward:  36
xxxxx
x.g x
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039  44491.85946264     0.        ]
------
Step:5, Action:North
State  181
Old Q Values:  [1794.58065003 1236.86188545 2483.44712116    3.33862213]
New Q values:  [2155.77069959 1236.86188545 2483.44712116    3.33862213]
Reward: 9  Episode Reward:  45
xxxxx
xa.gx
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         4775.12813192    0.            0.        ]
------
Step:6, Action:South
State  103
Old Q Values:  [ 221.30610858 2845.25478177 1868.2303995     0.        ]
New Q values:  [ 221.30610858 1882.53604906 1868.2303995     0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x . x
xa.gx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2155.77069959 1236.86188545 2483.44712116    3.33862213]
------
Step:7, Action:East
State  183
Old Q Values:  [2076.49919863 1357.09471455 6633.18164711 1554.80203889]
New Q values:  [ 2076.49919863  1357.09471455 19015.3686985   1554.80203889]
Reward: 9  Episode Reward:  53
xxxxx
x . x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[54522.32013219 16053.62807234  8240.17937465  1169.39963074]
------
Step:8, Action:North
State  193
Old Q Values:  [16560.09952265 92319.96361003 11789.21969382   767.35890262]
New Q values:  [105564.11374507  92319.96361003  11789.21969382    767.35890262]
Reward: 100009  Episode Reward:  100062
xxxxx
x a x
x  gx
x   x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[105564.11374507  92319.96361003  11789.21969382    767.35890262]
------
Step:1, Action:North
State  192
Old Q Values:  [38762.73258603 64948.47644027 14412.78382458  2453.92999194]
New Q values:  [15780.48189733 64948.47644027 14412.78382458  2453.92999194]
Reward: 9  Episode Reward:  9
xxxxx
x.a.x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   641.93465758   899.9628764 ]
------
Step:2, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2508.08870266 -7669.18886673]
New Q values:  [-9594.56523706 -8069.05606225  2508.08870266 -2390.90087924]
Reward: 9  Episode Reward:  18
xxxxx
xag.x
x.  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        2237.91555819 -764.93196255    0.        ]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 3413.73983684  476.21588959 -120.29354603]
New Q values:  [-177.44732869 6279.62045298  476.21588959 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
xag x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  1713.19627151 16362.41506081   154.04646645]
------
Step:4, Action:South
State  183
Old Q Values:  [ 2076.49919863  1357.09471455 19015.3686985   1554.80203889]
New Q values:  [ 2076.49919863  1771.27760536 19015.3686985   1554.80203889]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4076.79906512 1352.37702619  -84.28863134  -12.17474163]
------
Step:5, Action:North
State  261
Old Q Values:  [4076.79906512 1352.37702619  -84.28863134  -12.17474163]
New Q values:  [7334.7302356  1352.37702619  -84.28863134  -12.17474163]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
xa  x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2076.49919863  1771.27760536 19015.3686985   1554.80203889]
------
Step:6, Action:East
State  189
Old Q Values:  [  533.05203844  1713.19627151 16362.41506081   154.04646645]
New Q values:  [ 533.05203844 1713.19627151 6962.71646939  154.04646645]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
x agx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[-5833.78831344  1394.50148356 -6341.45812961   403.06255908]
------
Step:7, Action:South
State  199
Old Q Values:  [  826.42207367  1212.31833426 23844.07972775  1915.70494401]
New Q values:  [  826.42207367  1175.46486763 23844.07972775  1915.70494401]
Reward: 9  Episode Reward:  43
xxxxx
x  .x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 5.04537534e+02 2.28379178e+03 1.06376909e+03]
------
Step:8, Action:West
State  277
Old Q Values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 1.06376909e+03]
New Q values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 2.62532671e+03]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[7334.7302356  1352.37702619  -84.28863134  -12.17474163]
------
Step:9, Action:North
State  261
Old Q Values:  [7334.7302356  1352.37702619  -84.28863134  -12.17474163]
New Q values:  [8637.90270379 1352.37702619  -84.28863134  -12.17474163]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2076.49919863  1771.27760536 19015.3686985   1554.80203889]
------
Step:10, Action:East
State  181
Old Q Values:  [2155.77069959 1236.86188545 2483.44712116    3.33862213]
New Q values:  [2155.77069959 1236.86188545 1411.12929353    3.33862213]
Reward: -1  Episode Reward:  40
xxxxx
x  .x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[-5833.78831344  1394.50148356 -6341.45812961   403.06255908]
------
Step:11, Action:South
State  196
Old Q Values:  [1616.00291816 1744.0156323  5766.50757409  231.67262594]
New Q values:  [1616.00291816 1793.28342666 5766.50757409  231.67262594]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  3654.25724578  2284.51867635]
------
Step:12, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  3654.25724578  2284.51867635]
New Q values:  [-2561.28592178 -5807.06396197 10373.09424807  2284.51867635]
Reward: 9  Episode Reward:  48
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[29686.63783251 -4059.26960032 -5588.09647059  8176.80174327]
------
Step:13, Action:North
State  288
Old Q Values:  [29686.63783251 -4059.26960032 -5588.09647059  8176.80174327]
New Q values:  [38835.20767211 -4059.26960032 -5588.09647059  8176.80174327]
Reward: -1  Episode Reward:  47
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[89870.50846368  9499.28049842   790.72804752  5103.37501425]
------
Step:14, Action:North
State  208
Old Q Values:  [ 6547.01737408 11218.06761962 -4584.50430574   812.90195953]
New Q values:  [82031.86936437 11218.06761962 -4584.50430574   812.90195953]
Reward: 100009  Episode Reward:  100056
xxxxx
x  ax
xg  x
x   x
xxxxx
xxxxx
x..ax
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  6.68489924e+02]
------
Step:1, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  6.68489924e+02]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.42784832e+02]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
xg..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   641.93465758   899.9628764 ]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   641.93465758   899.9628764 ]
New Q values:  [ -281.736      -1150.91067548   641.93465758   898.03480866]
Reward: 9  Episode Reward:  18
xxxxx
xa  x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1775.49886031  472.79132618 -180.6       ]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 6279.62045298  476.21588959 -120.29354603]
New Q values:  [-177.44732869 8221.85879074  476.21588959 -120.29354603]
Reward: 9  Episode Reward:  27
xxxxx
x   x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2076.49919863  1771.27760536 19015.3686985   1554.80203889]
------
Step:4, Action:East
State  190
Old Q Values:  [ 1.04129094e+00 -7.77507115e+03  1.14108275e+03  0.00000000e+00]
New Q values:  [ 1.04129094e+00 -7.77507115e+03  3.84885342e+03  0.00000000e+00]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[ 7400.45312095  -200.61022961 11290.0677349      0.        ]
------
Step:5, Action:East
State  200
Old Q Values:  [  169.9257398  36055.45913583  1964.0000496    610.93635926]
New Q values:  [  169.9257398  36055.45913583  4674.22604949   610.93635926]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  445.64074378  6228.21420906   606.149024   12944.08676548]
------
Step:6, Action:West
State  218
Old Q Values:  [ 677.77179188 6329.78476486    0.          930.00701399]
New Q values:  [ 677.77179188 6329.78476486    0.          919.64842823]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x a x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:7, Action:East
State  200
Old Q Values:  [  169.9257398  36055.45913583  4674.22604949   610.93635926]
New Q values:  [  169.9257398  36055.45913583  5752.31644944   610.93635926]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  445.64074378  6228.21420906   606.149024   12944.08676548]
------
Step:8, Action:West
State  216
Old Q Values:  [  445.64074378  6228.21420906   606.149024   12944.08676548]
New Q values:  [  445.64074378  6228.21420906   606.149024   15993.67244694]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
x a x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  36055.45913583  5752.31644944   610.93635926]
------
Step:9, Action:South
State  204
Old Q Values:  [   0.         3600.83401675 4744.72195498  441.58769553]
New Q values:  [   0.         4557.66188112 4744.72195498  441.58769553]
Reward: 9  Episode Reward:  51
xxxxx
x g x
x   x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197 10373.09424807  2284.51867635]
------
Step:10, Action:East
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799 42747.27720454 53907.97483783]
New Q values:  [ 9275.12327166 -8521.23367799 88754.87318345 53907.97483783]
Reward: 100009  Episode Reward:  100060
xxxxx
x   x
x g x
x  ax
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[38835.20767211 -4059.26960032 -5588.09647059  8176.80174327]
------
Step:1, Action:North
State  288
Old Q Values:  [38835.20767211 -4059.26960032 -5588.09647059  8176.80174327]
New Q values:  [40149.04387815 -4059.26960032 -5588.09647059  8176.80174327]
Reward: 9  Episode Reward:  9
xxxxx
xg..x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[82031.86936437 11218.06761962 -4584.50430574   812.90195953]
------
Step:2, Action:North
State  208
Old Q Values:  [82031.86936437 11218.06761962 -4584.50430574   812.90195953]
New Q values:  [33757.7016812  11218.06761962 -4584.50430574   812.90195953]
Reward: 9  Episode Reward:  18
xxxxx
x.gax
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  3131.8464515    660.86649319 -4915.89987952]
------
Step:3, Action:South
State  136
Old Q Values:  [  878.22269011  3131.8464515    660.86649319 -4915.89987952]
New Q values:  [  878.22269011 11379.44908496   660.86649319 -4915.89987952]
Reward: -1  Episode Reward:  17
xxxxx
xg. x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[33757.7016812  11218.06761962 -4584.50430574   812.90195953]
------
Step:4, Action:North
State  208
Old Q Values:  [33757.7016812  11218.06761962 -4584.50430574   812.90195953]
New Q values:  [13665.31612218 11218.06761962 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  16
xxxxx
x..ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.42784832e+02]
------
Step:5, Action:West
State  136
Old Q Values:  [  878.22269011 11379.44908496   660.86649319 -4915.89987952]
New Q values:  [  878.22269011 11379.44908496   660.86649319 -1208.53334101]
Reward: 9  Episode Reward:  25
xxxxx
xga x
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2508.08870266 -2390.90087924]
------
Step:6, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   641.93465758   898.03480866]
New Q values:  [ -281.736      -1150.91067548   419.00931273   898.03480866]
Reward: -1  Episode Reward:  24
xxxxx
x. ax
xg. x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.42784832e+02]
------
Step:7, Action:West
State  136
Old Q Values:  [  878.22269011 11379.44908496   660.86649319 -1208.53334101]
New Q values:  [  878.22269011 11379.44908496   660.86649319   268.41327439]
Reward: -1  Episode Reward:  23
xxxxx
xga x
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  2508.08870266 -2390.90087924]
------
Step:8, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2508.08870266 -2390.90087924]
New Q values:  [-9594.56523706 -8069.05606225  4416.47020655 -2390.90087924]
Reward: -1  Episode Reward:  22
xxxxx
x.gax
x . x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011 11379.44908496   660.86649319   268.41327439]
------
Step:9, Action:South
State  136
Old Q Values:  [  878.22269011 11379.44908496   660.86649319   268.41327439]
New Q values:  [ 878.22269011 8650.77447064  660.86649319  268.41327439]
Reward: -1  Episode Reward:  21
xxxxx
x. gx
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13665.31612218 11218.06761962 -4584.50430574   812.90195953]
------
Step:10, Action:South
State  208
Old Q Values:  [13665.31612218 11218.06761962 -4584.50430574   812.90195953]
New Q values:  [13665.31612218 16531.3402113  -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
x .gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[40149.04387815 -4059.26960032 -5588.09647059  8176.80174327]
------
Step:11, Action:West
State  288
Old Q Values:  [40149.04387815 -4059.26960032 -5588.09647059  8176.80174327]
New Q values:  [40149.04387815 -4059.26960032 -5588.09647059 29902.58265234]
Reward: 9  Episode Reward:  29
xxxxx
x.  x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799 88754.87318345 53907.97483783]
------
Step:12, Action:East
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799 88754.87318345 53907.97483783]
New Q values:  [ 9275.12327166 -8521.23367799 47546.06243683 53907.97483783]
Reward: -1  Episode Reward:  28
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[40149.04387815 -4059.26960032 -5588.09647059 29902.58265234]
------
Step:13, Action:North
State  288
Old Q Values:  [40149.04387815 -4059.26960032 -5588.09647059 29902.58265234]
New Q values:  [43020.17009037 -4059.26960032 -5588.09647059 29902.58265234]
Reward: -1  Episode Reward:  27
xxxxx
x.  x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[89870.50846368  9499.28049842   790.72804752  5103.37501425]
------
Step:14, Action:North
State  208
Old Q Values:  [13665.31612218 16531.3402113  -4584.50430574   812.90195953]
New Q values:  [ 5628.36189857 16531.3402113  -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.42784832e+02]
------
Step:15, Action:West
State  130
Old Q Values:  [41234.48978377  6927.8788297   -180.00807518 64692.20804913]
New Q values:  [41234.48978377  6927.8788297   -180.00807518 46684.03932814]
Reward: -1  Episode Reward:  25
xxxxx
x.a x
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  49543.89769946 69359.18702831]
------
Step:16, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   419.00931273   898.03480866]
New Q values:  [ -281.736      -1150.91067548   419.00931273   903.31633601]
Reward: 9  Episode Reward:  34
xxxxx
xa  x
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1795.6747085   223.04911789 -252.78192178]
------
Step:17, Action:South
State  107
Old Q Values:  [-252.35169558 1795.6747085   223.04911789 -252.78192178]
New Q values:  [-252.35169558 6422.28049295  223.04911789 -252.78192178]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xa. x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2076.49919863  1771.27760536 19015.3686985   1554.80203889]
------
Step:18, Action:East
State  185
Old Q Values:  [ 357.2991616     0.         9804.47639086 -178.98      ]
New Q values:  [  357.2991616     0.        13153.182442   -178.98     ]
Reward: 9  Episode Reward:  42
xxxxx
x   x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563 30753.30628551 -7266.86964969  1311.30124863]
------
Step:19, Action:South
State  201
Old Q Values:  [  613.33320563 30753.30628551 -7266.86964969  1311.30124863]
New Q values:  [  613.33320563 17827.52862979 -7266.86964969  1311.30124863]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971  6992.85953869 18422.68705195]
------
Step:20, Action:West
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799 47546.06243683 53907.97483783]
New Q values:  [ 9275.12327166 -8521.23367799 47546.06243683 92035.20543969]
Reward: 100009  Episode Reward:  100050
xxxxx
x g x
x   x
xa  x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[8637.90270379 1352.37702619  -84.28863134  -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [8637.90270379 1352.37702619  -84.28863134  -12.17474163]
New Q values:  [9165.17169106 1352.37702619  -84.28863134  -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa. x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2076.49919863  1771.27760536 19015.3686985   1554.80203889]
------
Step:2, Action:East
State  183
Old Q Values:  [ 2076.49919863  1771.27760536 19015.3686985   1554.80203889]
New Q values:  [ 2076.49919863  1771.27760536 12732.7076174   1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[17070.53379332  7581.58548952 13871.56025276  2179.39995143]
------
Step:3, Action:North
State  195
Old Q Values:  [54522.32013219 16053.62807234  8240.17937465  1169.39963074]
New Q values:  [60749.00198889 16053.62807234  8240.17937465  1169.39963074]
Reward: 9  Episode Reward:  27
xxxxx
x.a.x
x   x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 129782.24645338]
------
Step:4, Action:West
State  118
Old Q Values:  [1761.89752936 7087.72425489    0.         3751.96438802]
New Q values:  [1761.89752936 7087.72425489    0.         2070.94656993]
Reward: 9  Episode Reward:  36
xxxxx
xa .x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 1882.53604906 1868.2303995     0.        ]
------
Step:5, Action:South
State  110
Old Q Values:  [-239.29051573 1775.49886031  472.79132618 -180.6       ]
New Q values:  [-239.29051573 1864.2555708   472.79132618 -180.6       ]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
xa  x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  3.84885342e+03  0.00000000e+00]
------
Step:6, Action:East
State  188
Old Q Values:  [-6523.78898263  1569.58786991  2492.41454654     0.        ]
New Q values:  [-6523.78898263  1569.58786991  2726.31809084     0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
xga x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1616.00291816 1793.28342666 5766.50757409  231.67262594]
------
Step:7, Action:East
State  198
Old Q Values:  [ 7400.45312095  -200.61022961 11290.0677349      0.        ]
New Q values:  [7400.45312095 -200.61022961 6414.36252342    0.        ]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
x  ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 677.77179188 6329.78476486    0.          919.64842823]
------
Step:8, Action:South
State  210
Old Q Values:  [89870.50846368  9499.28049842   790.72804752  5103.37501425]
New Q values:  [89870.50846368 16711.16322648   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  42
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[43020.17009037 -4059.26960032 -5588.09647059 29902.58265234]
------
Step:9, Action:North
State  288
Old Q Values:  [43020.17009037 -4059.26960032 -5588.09647059 29902.58265234]
New Q values:  [44168.62057525 -4059.26960032 -5588.09647059 29902.58265234]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
x  ax
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[89870.50846368 16711.16322648   790.72804752  5103.37501425]
------
Step:10, Action:North
State  216
Old Q Values:  [  445.64074378  6228.21420906   606.149024   15993.67244694]
New Q values:  [  346.49174721  6228.21420906   606.149024   15993.67244694]
Reward: 9  Episode Reward:  50
xxxxx
x  ax
x  gx
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.42784832e+02]
------
Step:11, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  5.42784832e+02]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  4.87508834e+02]
Reward: -1  Episode Reward:  49
xxxxx
x a x
x g x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   419.00931273   903.31633601]
------
Step:12, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4416.47020655 -2390.90087924]
New Q values:  [-9594.56523706 -8069.05606225  4416.47020655   632.4709244 ]
Reward: -1  Episode Reward:  48
xxxxx
xag x
x   x
x . x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  5298.10425366 -2165.66138672   232.50800947]
------
Step:13, Action:South
State  111
Old Q Values:  [-177.44732869 8221.85879074  476.21588959 -120.29354603]
New Q values:  [-177.44732869 5376.95845711  476.21588959 -120.29354603]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xag x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 533.05203844 1713.19627151 6962.71646939  154.04646645]
------
Step:14, Action:South
State  189
Old Q Values:  [ 533.05203844 1713.19627151 6962.71646939  154.04646645]
New Q values:  [ 533.05203844 3434.23001592 6962.71646939  154.04646645]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x  gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[9165.17169106 1352.37702619  -84.28863134  -12.17474163]
------
Step:15, Action:North
State  261
Old Q Values:  [9165.17169106 1352.37702619  -84.28863134  -12.17474163]
New Q values:  [5754.28361724 1352.37702619  -84.28863134  -12.17474163]
Reward: -1  Episode Reward:  45
xxxxx
x   x
xag x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 533.05203844 3434.23001592 6962.71646939  154.04646645]
------
Step:16, Action:South
State  189
Old Q Values:  [ 533.05203844 3434.23001592 6962.71646939  154.04646645]
New Q values:  [ 533.05203844 3099.37709154 6962.71646939  154.04646645]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x  gx
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5754.28361724 1352.37702619  -84.28863134  -12.17474163]
------
Step:17, Action:North
State  261
Old Q Values:  [5754.28361724 1352.37702619  -84.28863134  -12.17474163]
New Q values:  [4389.92838771 1352.37702619  -84.28863134  -12.17474163]
Reward: -1  Episode Reward:  43
xxxxx
x   x
xag x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 533.05203844 3099.37709154 6962.71646939  154.04646645]
------
Step:18, Action:South
State  189
Old Q Values:  [ 533.05203844 3099.37709154 6962.71646939  154.04646645]
New Q values:  [ 533.05203844 2556.12935293 6962.71646939  154.04646645]
Reward: -1  Episode Reward:  42
xxxxx
x g x
x   x
xa. x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4389.92838771 1352.37702619  -84.28863134  -12.17474163]
------
Step:19, Action:North
State  261
Old Q Values:  [4389.92838771 1352.37702619  -84.28863134  -12.17474163]
New Q values:  [3844.1862959  1352.37702619  -84.28863134  -12.17474163]
Reward: -1  Episode Reward:  41
xxxxx
x  gx
xa  x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 533.05203844 2556.12935293 6962.71646939  154.04646645]
------
Step:20, Action:East
State  189
Old Q Values:  [ 533.05203844 2556.12935293 6962.71646939  154.04646645]
New Q values:  [  533.05203844  2556.12935293 13601.12432851   154.04646645]
Reward: -1  Episode Reward:  40
xxxxx
x g x
x a x
x . x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  36055.45913583  5752.31644944   610.93635926]
------
Step:21, Action:South
State  193
Old Q Values:  [105564.11374507  92319.96361003  11789.21969382    767.35890262]
New Q values:  [105564.11374507 102460.1915596   11789.21969382    767.35890262]
Reward: 100009  Episode Reward:  100049
xxxxx
x  gx
x   x
x a x
xxxxx
xxxxx
x...x
x...x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799 47546.06243683 92035.20543969]
------
Step:1, Action:East
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799 47546.06243683 92035.20543969]
New Q values:  [ 9275.12327166 -8521.23367799 32274.41114731 92035.20543969]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[44168.62057525 -4059.26960032 -5588.09647059 29902.58265234]
------
Step:2, Action:North
State  288
Old Q Values:  [44168.62057525 -4059.26960032 -5588.09647059 29902.58265234]
New Q values:  [44634.00076921 -4059.26960032 -5588.09647059 29902.58265234]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[89870.50846368 16711.16322648   790.72804752  5103.37501425]
------
Step:3, Action:North
State  210
Old Q Values:  [89870.50846368 16711.16322648   790.72804752  5103.37501425]
New Q values:  [49958.81518392 16711.16322648   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x.. x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377  6927.8788297   -180.00807518 46684.03932814]
------
Step:4, Action:West
State  130
Old Q Values:  [41234.48978377  6927.8788297   -180.00807518 46684.03932814]
New Q values:  [41234.48978377  6927.8788297   -180.00807518 39486.77183975]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x.g x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  49543.89769946 69359.18702831]
------
Step:5, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4416.47020655   632.4709244 ]
New Q values:  [-9594.56523706 -8069.05606225  4416.47020655   929.76303722]
Reward: 9  Episode Reward:  45
xxxxx
xag x
x.. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:NE
[-180.6        2237.91555819 -764.93196255    0.        ]
------
Step:6, Action:South
State  109
Old Q Values:  [ -241.10880094  5298.10425366 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  2771.37291134 -2165.66138672   232.50800947]
Reward: 9  Episode Reward:  54
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2155.77069959 1236.86188545 1411.12929353    3.33862213]
------
Step:7, Action:North
State  181
Old Q Values:  [2155.77069959 1236.86188545 1411.12929353    3.33862213]
New Q values:  [1693.12015324 1236.86188545 1411.12929353    3.33862213]
Reward: -1  Episode Reward:  53
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  2771.37291134 -2165.66138672   232.50800947]
------
Step:8, Action:South
State  99
Old Q Values:  [    0.         32370.3103544  58945.17153785     0.        ]
New Q values:  [    0.         31090.33955617 58945.17153785     0.        ]
Reward: -1  Episode Reward:  52
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039  44491.85946264     0.        ]
------
Step:9, Action:North
State  181
Old Q Values:  [1693.12015324 1236.86188545 1411.12929353    3.33862213]
New Q values:  [1508.0599347  1236.86188545 1411.12929353    3.33862213]
Reward: -1  Episode Reward:  51
xxxxx
xag x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  2771.37291134 -2165.66138672   232.50800947]
------
Step:10, Action:South
State  99
Old Q Values:  [    0.         31090.33955617 58945.17153785     0.        ]
New Q values:  [    0.         30578.35123687 58945.17153785     0.        ]
Reward: -1  Episode Reward:  50
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[60476.05138135 21430.9929039  44491.85946264     0.        ]
------
Step:11, Action:North
State  180
Old Q Values:  [ 1548.72903536  3060.24711158  4825.25793031 -4966.32149798]
New Q values:  [ 1178.16828539  3060.24711158  4825.25793031 -4966.32149798]
Reward: -1  Episode Reward:  49
xxxxx
xa  x
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1864.2555708   472.79132618 -180.6       ]
------
Step:12, Action:East
State  108
Old Q Values:  [-8463.16477134  2805.00500697  1637.39424494     0.        ]
New Q values:  [-8463.16477134  2805.00500697  1979.29875994     0.        ]
Reward: -1  Episode Reward:  48
xxxxx
xga x
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  4416.47020655   929.76303722]
------
Step:13, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4416.47020655   929.76303722]
New Q values:  [-9594.56523706 -8069.05606225  4361.22042381   929.76303722]
Reward: -1  Episode Reward:  47
xxxxx
x gax
x . x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 8650.77447064  660.86649319  268.41327439]
------
Step:14, Action:South
State  130
Old Q Values:  [41234.48978377  6927.8788297   -180.00807518 39486.77183975]
New Q values:  [41234.48978377  7729.95359527  -180.00807518 39486.77183975]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5628.36189857 16531.3402113  -4584.50430574   812.90195953]
------
Step:15, Action:South
State  210
Old Q Values:  [49958.81518392 16711.16322648   790.72804752  5103.37501425]
New Q values:  [49958.81518392 20074.06552135   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x . x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[44634.00076921 -4059.26960032 -5588.09647059 29902.58265234]
------
Step:16, Action:North
State  288
Old Q Values:  [44634.00076921 -4059.26960032 -5588.09647059 29902.58265234]
New Q values:  [32840.64486286 -4059.26960032 -5588.09647059 29902.58265234]
Reward: -1  Episode Reward:  44
xxxxx
x   x
x .ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[49958.81518392 20074.06552135   790.72804752  5103.37501425]
------
Step:17, Action:North
State  210
Old Q Values:  [49958.81518392 20074.06552135   790.72804752  5103.37501425]
New Q values:  [20129.17872369 20074.06552135   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  4.87508834e+02]
------
Step:18, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  4.87508834e+02]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  4.65398434e+02]
Reward: -1  Episode Reward:  42
xxxxx
x a x
x . x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   419.00931273   903.31633601]
------
Step:19, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   419.00931273   903.31633601]
New Q values:  [ -281.736      -1150.91067548   419.00931273  1973.81407154]
Reward: -1  Episode Reward:  41
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5376.95845711  476.21588959 -120.29354603]
------
Step:20, Action:South
State  111
Old Q Values:  [-177.44732869 5376.95845711  476.21588959 -120.29354603]
New Q values:  [-177.44732869 5969.99566806  476.21588959 -120.29354603]
Reward: -1  Episode Reward:  40
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2076.49919863  1771.27760536 12732.7076174   1554.80203889]
------
Step:21, Action:East
State  177
Old Q Values:  [60476.05138135 21430.9929039  44491.85946264     0.        ]
New Q values:  [ 60476.05138135  21430.9929039  109471.37790858      0.        ]
Reward: 100009  Episode Reward:  100049
xxxxx
x   x
x agx
x   x
xxxxx
xxxxx
x...x
x.a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[17070.53379332  7581.58548952 13871.56025276  2179.39995143]
------
Step:1, Action:North
State  194
Old Q Values:  [17070.53379332  7581.58548952 13871.56025276  2179.39995143]
New Q values:  [ 7793.93161956  7581.58548952 13871.56025276  2179.39995143]
Reward: 9  Episode Reward:  9
xxxxx
x.a.x
x. .x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262  681.20236034 3201.06034076]
------
Step:2, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   419.00931273  1973.81407154]
New Q values:  [ -281.736      -1150.91067548   419.00931273  1354.20229986]
Reward: 9  Episode Reward:  18
xxxxx
xa .x
x. .x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1864.2555708   472.79132618 -180.6       ]
------
Step:3, Action:South
State  110
Old Q Values:  [-239.29051573 1864.2555708   472.79132618 -180.6       ]
New Q values:  [ -239.29051573 -3801.32039259   472.79132618  -180.6       ]
Reward: -9991  Episode Reward:  -9973
xxxxx
x  .x
xg .x
x ..x
xxxxx
xxxxx
x...x
x..gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971  6992.85953869 18422.68705195]
------
Step:1, Action:West
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799 32274.41114731 92035.20543969]
New Q values:  [ 9275.12327166 -8521.23367799 32274.41114731 37972.73806465]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3844.1862959  1352.37702619  -84.28863134  -12.17474163]
------
Step:2, Action:North
State  260
Old Q Values:  [ 1848.55637054 -5704.51612281  2114.24600842 -5679.36893145]
New Q values:  [-3807.60007269 -5704.51612281  2114.24600842 -5679.36893145]
Reward: -9991  Episode Reward:  -9982
xxxxx
x...x
xg. x
x  .x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5969.99566806  476.21588959 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 1882.53604906 1868.2303995     0.        ]
New Q values:  [ 221.30610858 4578.22670484 1868.2303995     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa..x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2076.49919863  1771.27760536 12732.7076174   1554.80203889]
------
Step:2, Action:East
State  183
Old Q Values:  [ 2076.49919863  1771.27760536 12732.7076174   1554.80203889]
New Q values:  [ 2076.49919863  1771.27760536 12251.70696528  1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  826.42207367  1175.46486763 23844.07972775  1915.70494401]
------
Step:3, Action:East
State  201
Old Q Values:  [  613.33320563 17827.52862979 -7266.86964969  1311.30124863]
New Q values:  [  613.33320563 17827.52862979 -3941.94579649  1311.30124863]
Reward: -9991  Episode Reward:  -9973
xxxxx
x ..x
x  gx
x.. x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3844.1862959  1352.37702619  -84.28863134  -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [3844.1862959  1352.37702619  -84.28863134  -12.17474163]
New Q values:  [1995.49249877 1352.37702619  -84.28863134  -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1508.0599347  1236.86188545 1411.12929353    3.33862213]
------
Step:2, Action:North
State  181
Old Q Values:  [1508.0599347  1236.86188545 1411.12929353    3.33862213]
New Q values:  [1440.03584728 1236.86188545 1411.12929353    3.33862213]
Reward: 9  Episode Reward:  18
xxxxx
xa.gx
x ..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  2771.37291134 -2165.66138672   232.50800947]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 5969.99566806  476.21588959 -120.29354603]
New Q values:  [-177.44732869 2819.40902141  476.21588959 -120.29354603]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1440.03584728 1236.86188545 1411.12929353    3.33862213]
------
Step:4, Action:North
State  183
Old Q Values:  [ 2076.49919863  1771.27760536 12251.70696528  1554.80203889]
New Q values:  [ 1675.82238587  1771.27760536 12251.70696528  1554.80203889]
Reward: -1  Episode Reward:  16
xxxxx
xa. x
x ..x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 2819.40902141  476.21588959 -120.29354603]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 2819.40902141  476.21588959 -120.29354603]
New Q values:  [-177.44732869 4802.67569815  476.21588959 -120.29354603]
Reward: -1  Episode Reward:  15
xxxxx
x . x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1675.82238587  1771.27760536 12251.70696528  1554.80203889]
------
Step:6, Action:East
State  181
Old Q Values:  [1440.03584728 1236.86188545 1411.12929353    3.33862213]
New Q values:  [1.44003585e+03 1.23686189e+03 1.40543946e+04 3.33862213e+00]
Reward: -9991  Episode Reward:  -9976
xxxxx
x . x
x g.x
x ..x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5628.36189857 16531.3402113  -4584.50430574   812.90195953]
------
Step:1, Action:South
State  208
Old Q Values:  [ 5628.36189857 16531.3402113  -4584.50430574   812.90195953]
New Q values:  [ 5628.36189857 16470.12954338 -4584.50430574   812.90195953]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[32840.64486286 -4059.26960032 -5588.09647059 29902.58265234]
------
Step:2, Action:North
State  288
Old Q Values:  [32840.64486286 -4059.26960032 -5588.09647059 29902.58265234]
New Q values:  [18076.69680816 -4059.26960032 -5588.09647059 29902.58265234]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x .ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 5628.36189857 16470.12954338 -4584.50430574   812.90195953]
------
Step:3, Action:South
State  208
Old Q Values:  [ 5628.36189857 16470.12954338 -4584.50430574   812.90195953]
New Q values:  [ 5628.36189857 15558.22661305 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18076.69680816 -4059.26960032 -5588.09647059 29902.58265234]
------
Step:4, Action:West
State  288
Old Q Values:  [18076.69680816 -4059.26960032 -5588.09647059 29902.58265234]
New Q values:  [18076.69680816 -4059.26960032 -5588.09647059 17358.25448033]
Reward: -9991  Episode Reward:  -9984
xxxxx
x...x
x . x
x.g x
xxxxx
xxxxx
x...x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[20129.17872369 20074.06552135   790.72804752  5103.37501425]
------
Step:1, Action:North
State  208
Old Q Values:  [ 5628.36189857 15558.22661305 -4584.50430574   812.90195953]
New Q values:  [ 2396.36428972 15558.22661305 -4584.50430574   812.90195953]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  4.65398434e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  4.65398434e+02]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.43421262e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  4142.17748273  1666.13619982]
------
Step:3, Action:East
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  4142.17748273  1666.13619982]
New Q values:  [  -56.91790269 -1902.20915811  2086.53477866  1666.13619982]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x..gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.43421262e+03]
------
Step:4, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.43421262e+03]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  9.79345737e+02]
Reward: -1  Episode Reward:  16
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   419.00931273  1354.20229986]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   419.00931273  1354.20229986]
New Q values:  [ -281.736      -1150.91067548   419.00931273  2473.76506783]
Reward: 9  Episode Reward:  25
xxxxx
xa  x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 6422.28049295  223.04911789 -252.78192178]
------
Step:6, Action:South
State  111
Old Q Values:  [-177.44732869 4802.67569815  476.21588959 -120.29354603]
New Q values:  [-177.44732869 5601.98236884  476.21588959 -120.29354603]
Reward: 9  Episode Reward:  34
xxxxx
x   x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1675.82238587  1771.27760536 12251.70696528  1554.80203889]
------
Step:7, Action:East
State  187
Old Q Values:  [836.45017667   0.         943.76639605   0.        ]
New Q values:  [836.45017667   0.         931.15218105   0.        ]
Reward: 9  Episode Reward:  43
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:8, Action:East
State  200
Old Q Values:  [  169.9257398  36055.45913583  5752.31644944   610.93635926]
New Q values:  [  169.9257398  36055.45913583  7098.42831386   610.93635926]
Reward: -1  Episode Reward:  42
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  346.49174721  6228.21420906   606.149024   15993.67244694]
------
Step:9, Action:South
State  216
Old Q Values:  [  346.49174721  6228.21420906   606.149024   15993.67244694]
New Q values:  [  346.49174721  7913.69472607   606.149024   15993.67244694]
Reward: -1  Episode Reward:  41
xxxxx
x g x
x   x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18076.69680816 -4059.26960032 -5588.09647059 17358.25448033]
------
Step:10, Action:North
State  288
Old Q Values:  [18076.69680816 -4059.26960032 -5588.09647059 17358.25448033]
New Q values:  [12028.18045735 -4059.26960032 -5588.09647059 17358.25448033]
Reward: -1  Episode Reward:  40
xxxxx
xg  x
x  ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  346.49174721  7913.69472607   606.149024   15993.67244694]
------
Step:11, Action:West
State  216
Old Q Values:  [  346.49174721  7913.69472607   606.149024   15993.67244694]
New Q values:  [  346.49174721  7913.69472607   606.149024   17213.50671953]
Reward: -1  Episode Reward:  39
xxxxx
x g x
x a x
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  36055.45913583  7098.42831386   610.93635926]
------
Step:12, Action:South
State  200
Old Q Values:  [  169.9257398  36055.45913583  7098.42831386   610.93635926]
New Q values:  [  169.9257398  25819.40507373  7098.42831386   610.93635926]
Reward: 9  Episode Reward:  48
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799 32274.41114731 37972.73806465]
------
Step:13, Action:West
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799 32274.41114731 37972.73806465]
New Q values:  [  9275.12327166  -8521.23367799  32274.41114731 106707.43557244]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
xg  x
xa  x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2396.36428972 15558.22661305 -4584.50430574   812.90195953]
------
Step:1, Action:South
State  210
Old Q Values:  [20129.17872369 20074.06552135   790.72804752  5103.37501425]
New Q values:  [20129.17872369 13242.50255264   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x . x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12028.18045735 -4059.26960032 -5588.09647059 17358.25448033]
------
Step:2, Action:West
State  288
Old Q Values:  [12028.18045735 -4059.26960032 -5588.09647059 17358.25448033]
New Q values:  [12028.18045735 -4059.26960032 -5588.09647059 32960.93246386]
Reward: -9991  Episode Reward:  -9982
xxxxx
x...x
x . x
x.g x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1995.49249877 1352.37702619  -84.28863134  -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [1995.49249877 1352.37702619  -84.28863134  -12.17474163]
New Q values:  [5019.91539436 1352.37702619  -84.28863134  -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.44003585e+03 1.23686189e+03 1.40543946e+04 3.33862213e+00]
------
Step:2, Action:East
State  183
Old Q Values:  [ 1675.82238587  1771.27760536 12251.70696528  1554.80203889]
New Q values:  [ 1675.82238587  1771.27760536 23130.78338278  1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x a.x
x .gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[60749.00198889 16053.62807234  8240.17937465  1169.39963074]
------
Step:3, Action:North
State  194
Old Q Values:  [ 7793.93161956  7581.58548952 13871.56025276  2179.39995143]
New Q values:  [ 4083.29075005  7581.58548952 13871.56025276  2179.39995143]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x  .x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262  681.20236034 3201.06034076]
------
Step:4, Action:West
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  2086.53477866  1666.13619982]
New Q values:  [  -56.91790269 -1902.20915811  2086.53477866  2352.44919058]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x  .x
x .gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5601.98236884  476.21588959 -120.29354603]
------
Step:5, Action:South
State  111
Old Q Values:  [-177.44732869 5601.98236884  476.21588959 -120.29354603]
New Q values:  [-177.44732869 6320.53024609  476.21588959 -120.29354603]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2556.12935293 13601.12432851   154.04646645]
------
Step:6, Action:East
State  189
Old Q Values:  [  533.05203844  2556.12935293 13601.12432851   154.04646645]
New Q values:  [ 533.05203844 2556.12935293 1169.80200363  154.04646645]
Reward: -10001  Episode Reward:  -9966
xxxxx
x   x
x g.x
x ..x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.44003585e+03 1.23686189e+03 1.40543946e+04 3.33862213e+00]
------
Step:1, Action:East
State  189
Old Q Values:  [ 533.05203844 2556.12935293 1169.80200363  154.04646645]
New Q values:  [ 533.05203844 2556.12935293 5821.57939039  154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563 17827.52862979 -3941.94579649  1311.30124863]
------
Step:2, Action:South
State  193
Old Q Values:  [105564.11374507 102460.1915596   11789.21969382    767.35890262]
New Q values:  [105564.11374507  46516.28273943  11789.21969382    767.35890262]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971  6992.85953869 18422.68705195]
------
Step:3, Action:West
State  277
Old Q Values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 2.62532671e+03]
New Q values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 2.56150530e+03]
Reward: 9  Episode Reward:  27
xxxxx
x. .x
x  .x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5019.91539436 1352.37702619  -84.28863134  -12.17474163]
------
Step:4, Action:North
State  261
Old Q Values:  [5019.91539436 1352.37702619  -84.28863134  -12.17474163]
New Q values:  [8946.60117258 1352.37702619  -84.28863134  -12.17474163]
Reward: -1  Episode Reward:  26
xxxxx
x. .x
xa .x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 1675.82238587  1771.27760536 23130.78338278  1554.80203889]
------
Step:5, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243 10940.34178277     0.        ]
New Q values:  [    0.         -5536.05678243  6595.67264939     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x. .x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[7400.45312095 -200.61022961 6414.36252342    0.        ]
------
Step:6, Action:North
State  196
Old Q Values:  [1616.00291816 1793.28342666 5766.50757409  231.67262594]
New Q values:  [1606.11926949 1793.28342666 5766.50757409  231.67262594]
Reward: -1  Episode Reward:  24
xxxxx
x.a.x
xg .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262  681.20236034 3201.06034076]
------
Step:7, Action:West
State  124
Old Q Values:  [   0.         1166.51141701 2170.86014946 2245.46149368]
New Q values:  [    0.          1166.51141701  2170.86014946 -4254.91390044]
Reward: -9991  Episode Reward:  -9967
xxxxx
xg .x
x  .x
x  .x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12028.18045735 -4059.26960032 -5588.09647059 32960.93246386]
------
Step:1, Action:West
State  288
Old Q Values:  [12028.18045735 -4059.26960032 -5588.09647059 32960.93246386]
New Q values:  [12028.18045735 -4059.26960032 -5588.09647059 18716.57910113]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x...x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971  6992.85953869 18422.68705195]
------
Step:2, Action:West
State  273
Old Q Values:  [ 1188.86709371  1201.72649971  6992.85953869 18422.68705195]
New Q values:  [ 1188.86709371  1201.72649971  6992.85953869 17841.09032534]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[34888.71834854 15941.62716192 13169.98702937  1875.31501677]
------
Step:3, Action:North
State  261
Old Q Values:  [8946.60117258 1352.37702619  -84.28863134  -12.17474163]
New Q values:  [7800.35886388 1352.37702619  -84.28863134  -12.17474163]
Reward: 9  Episode Reward:  27
xxxxx
x. gx
xa..x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.44003585e+03 1.23686189e+03 1.40543946e+04 3.33862213e+00]
------
Step:4, Action:East
State  181
Old Q Values:  [1.44003585e+03 1.23686189e+03 1.40543946e+04 3.33862213e+00]
New Q values:  [1.44003585e+03 1.23686189e+03 7.35711013e+03 3.33862213e+00]
Reward: 9  Episode Reward:  36
xxxxx
x.g.x
x a.x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1606.11926949 1793.28342666 5766.50757409  231.67262594]
------
Step:5, Action:East
State  193
Old Q Values:  [105564.11374507  46516.28273943  11789.21969382    767.35890262]
New Q values:  [105564.11374507  46516.28273943   9388.55586144    767.35890262]
Reward: 9  Episode Reward:  45
xxxxx
x. gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2396.36428972 15558.22661305 -4584.50430574   812.90195953]
------
Step:6, Action:South
State  208
Old Q Values:  [ 2396.36428972 15558.22661305 -4584.50430574   812.90195953]
New Q values:  [ 2396.36428972 11837.66437556 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  44
xxxxx
x.g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12028.18045735 -4059.26960032 -5588.09647059 18716.57910113]
------
Step:7, Action:West
State  288
Old Q Values:  [12028.18045735 -4059.26960032 -5588.09647059 18716.57910113]
New Q values:  [12028.18045735 -4059.26960032 -5588.09647059 10597.95991487]
Reward: -1  Episode Reward:  43
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197 10373.09424807  2284.51867635]
------
Step:8, Action:East
State  272
Old Q Values:  [  9275.12327166  -8521.23367799  32274.41114731 106707.43557244]
New Q values:  [  9275.12327166  -8521.23367799  16517.61859613 106707.43557244]
Reward: -1  Episode Reward:  42
xxxxx
x. .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12028.18045735 -4059.26960032 -5588.09647059 10597.95991487]
------
Step:9, Action:North
State  288
Old Q Values:  [12028.18045735 -4059.26960032 -5588.09647059 10597.95991487]
New Q values:  [ 8361.97149561 -4059.26960032 -5588.09647059 10597.95991487]
Reward: -1  Episode Reward:  41
xxxxx
x. .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2396.36428972 11837.66437556 -4584.50430574   812.90195953]
------
Step:10, Action:South
State  210
Old Q Values:  [20129.17872369 13242.50255264   790.72804752  5103.37501425]
New Q values:  [20129.17872369  8475.78899552   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  40
xxxxx
x. .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8361.97149561 -4059.26960032 -5588.09647059 10597.95991487]
------
Step:11, Action:North
State  288
Old Q Values:  [ 8361.97149561 -4059.26960032 -5588.09647059 10597.95991487]
New Q values:  [ 6895.48791091 -4059.26960032 -5588.09647059 10597.95991487]
Reward: -1  Episode Reward:  39
xxxxx
x. .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2396.36428972 11837.66437556 -4584.50430574   812.90195953]
------
Step:12, Action:South
State  208
Old Q Values:  [ 2396.36428972 11837.66437556 -4584.50430574   812.90195953]
New Q values:  [ 2396.36428972  7913.85372469 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  38
xxxxx
x. .x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6895.48791091 -4059.26960032 -5588.09647059 10597.95991487]
------
Step:13, Action:West
State  288
Old Q Values:  [ 6895.48791091 -4059.26960032 -5588.09647059 10597.95991487]
New Q values:  [ 6895.48791091 -4059.26960032 -5588.09647059 36250.81463768]
Reward: -1  Episode Reward:  37
xxxxx
x. .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  9275.12327166  -8521.23367799  16517.61859613 106707.43557244]
------
Step:14, Action:West
State  272
Old Q Values:  [  9275.12327166  -8521.23367799  16517.61859613 106707.43557244]
New Q values:  [ 9275.12327166 -8521.23367799 16517.61859613 53148.98973354]
Reward: -1  Episode Reward:  36
xxxxx
x. .x
x   x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[34888.71834854 15941.62716192 13169.98702937  1875.31501677]
------
Step:15, Action:North
State  256
Old Q Values:  [105043.13448859  37672.57242934   6661.12187543    644.94785455]
New Q values:  [71061.27605404 37672.57242934  6661.12187543   644.94785455]
Reward: -1  Episode Reward:  35
xxxxx
x. .x
xa  x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SW
[    0.             0.         96815.40752869     0.        ]
------
Step:16, Action:East
State  176
Old Q Values:  [ 95513.49952646   1621.55095326 107436.54432545      0.        ]
New Q values:  [95513.49952646  1621.55095326 62458.56066226     0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x. .x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[15780.48189733 64948.47644027 14412.78382458  2453.92999194]
------
Step:17, Action:South
State  192
Old Q Values:  [15780.48189733 64948.47644027 14412.78382458  2453.92999194]
New Q values:  [15780.48189733 41923.48749617 14412.78382458  2453.92999194]
Reward: -1  Episode Reward:  33
xxxxx
x. .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799 16517.61859613 53148.98973354]
------
Step:18, Action:West
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799 16517.61859613 53148.98973354]
New Q values:  [ 9275.12327166 -8521.23367799 16517.61859613 31725.61139798]
Reward: -1  Episode Reward:  32
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[34888.71834854 15941.62716192 13169.98702937  1875.31501677]
------
Step:19, Action:North
State  257
Old Q Values:  [34888.71834854 15941.62716192 13169.98702937  1875.31501677]
New Q values:  [46796.30071199 15941.62716192 13169.98702937  1875.31501677]
Reward: -1  Episode Reward:  31
xxxxx
x. .x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[ 60476.05138135  21430.9929039  109471.37790858      0.        ]
------
Step:20, Action:North
State  183
Old Q Values:  [ 1675.82238587  1771.27760536 23130.78338278  1554.80203889]
New Q values:  [ 2049.1969658   1771.27760536 23130.78338278  1554.80203889]
Reward: 9  Episode Reward:  40
xxxxx
xa .x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 4578.22670484 1868.2303995     0.        ]
------
Step:21, Action:South
State  103
Old Q Values:  [ 221.30610858 4578.22670484 1868.2303995     0.        ]
New Q values:  [ 221.30610858 8769.92569677 1868.2303995     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x  .x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2049.1969658   1771.27760536 23130.78338278  1554.80203889]
------
Step:22, Action:East
State  181
Old Q Values:  [1.44003585e+03 1.23686189e+03 7.35711013e+03 3.33862213e+00]
New Q values:  [1.44003585e+03 1.23686189e+03 3.36059450e+03 3.33862213e+00]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[-5833.78831344  1394.50148356 -6341.45812961   403.06255908]
------
Step:23, Action:South
State  196
Old Q Values:  [1606.11926949 1793.28342666 5766.50757409  231.67262594]
New Q values:  [1606.11926949 3828.64164508 5766.50757409  231.67262594]
Reward: -1  Episode Reward:  37
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197 10373.09424807  2284.51867635]
------
Step:24, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197 10373.09424807  2284.51867635]
New Q values:  [-2561.28592178 -5807.06396197 15023.88209053  2284.51867635]
Reward: -1  Episode Reward:  36
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6895.48791091 -4059.26960032 -5588.09647059 36250.81463768]
------
Step:25, Action:North
State  288
Old Q Values:  [ 6895.48791091 -4059.26960032 -5588.09647059 36250.81463768]
New Q values:  [ 5131.75128177 -4059.26960032 -5588.09647059 36250.81463768]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2396.36428972  7913.85372469 -4584.50430574   812.90195953]
------
Step:26, Action:South
State  208
Old Q Values:  [ 2396.36428972  7913.85372469 -4584.50430574   812.90195953]
New Q values:  [ 2396.36428972 14040.18588118 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  34
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5131.75128177 -4059.26960032 -5588.09647059 36250.81463768]
------
Step:27, Action:West
State  288
Old Q Values:  [ 5131.75128177 -4059.26960032 -5588.09647059 36250.81463768]
New Q values:  [ 5131.75128177 -4059.26960032 -5588.09647059 19006.89048223]
Reward: -1  Episode Reward:  33
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197 15023.88209053  2284.51867635]
------
Step:28, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197 15023.88209053  2284.51867635]
New Q values:  [-2561.28592178 -5807.06396197 11711.01998088  2284.51867635]
Reward: -1  Episode Reward:  32
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5131.75128177 -4059.26960032 -5588.09647059 19006.89048223]
------
Step:29, Action:West
State  288
Old Q Values:  [ 5131.75128177 -4059.26960032 -5588.09647059 19006.89048223]
New Q values:  [ 5131.75128177 -4059.26960032 -5588.09647059 11115.46218716]
Reward: -1  Episode Reward:  31
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197 11711.01998088  2284.51867635]
------
Step:30, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197 11711.01998088  2284.51867635]
New Q values:  [-2561.28592178 -5807.06396197  8018.4466485   2284.51867635]
Reward: -1  Episode Reward:  30
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5131.75128177 -4059.26960032 -5588.09647059 11115.46218716]
------
Step:31, Action:West
State  288
Old Q Values:  [ 5131.75128177 -4059.26960032 -5588.09647059 11115.46218716]
New Q values:  [ 5131.75128177 -4059.26960032 -5588.09647059  9797.91197247]
Reward: -1  Episode Reward:  29
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971  6992.85953869 17841.09032534]
------
Step:32, Action:West
State  277
Old Q Values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 2.56150530e+03]
New Q values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 3.36410978e+03]
Reward: -1  Episode Reward:  28
xxxxx
x  .x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[7800.35886388 1352.37702619  -84.28863134  -12.17474163]
------
Step:33, Action:North
State  261
Old Q Values:  [7800.35886388 1352.37702619  -84.28863134  -12.17474163]
New Q values:  [10058.77856039  1352.37702619   -84.28863134   -12.17474163]
Reward: -1  Episode Reward:  27
xxxxx
x  .x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2049.1969658   1771.27760536 23130.78338278  1554.80203889]
------
Step:34, Action:East
State  183
Old Q Values:  [ 2049.1969658   1771.27760536 23130.78338278  1554.80203889]
New Q values:  [ 2049.1969658   1771.27760536 11471.8492894   1554.80203889]
Reward: -1  Episode Reward:  26
xxxxx
x  .x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[7400.45312095 -200.61022961 6414.36252342    0.        ]
------
Step:35, Action:North
State  199
Old Q Values:  [  826.42207367  1175.46486763 23844.07972775  1915.70494401]
New Q values:  [  916.09264659  1175.46486763 23844.07972775  1915.70494401]
Reward: -1  Episode Reward:  25
xxxxx
x a.x
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SE
[   0.          465.4216645     0.         1953.74605706]
------
Step:36, Action:West
State  119
Old Q Values:  [   0.          465.4216645     0.         1953.74605706]
New Q values:  [   0.          465.4216645     0.         3411.87613185]
Reward: -1  Episode Reward:  24
xxxxx
xa .x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 8769.92569677 1868.2303995     0.        ]
------
Step:37, Action:South
State  97
Old Q Values:  [    0.         51365.03378694     0.             0.        ]
New Q values:  [    0.         53386.82688735     0.             0.        ]
Reward: -1  Episode Reward:  23
xxxxx
x  gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[ 60476.05138135  21430.9929039  109471.37790858      0.        ]
------
Step:38, Action:East
State  181
Old Q Values:  [1.44003585e+03 1.23686189e+03 3.36059450e+03 3.33862213e+00]
New Q values:  [1440.03584728 1236.86188545 1761.98824422    3.33862213]
Reward: -1  Episode Reward:  22
xxxxx
x  .x
x agx
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[-5833.78831344  1394.50148356 -6341.45812961   403.06255908]
------
Step:39, Action:South
State  193
Old Q Values:  [105564.11374507  46516.28273943   9388.55586144    767.35890262]
New Q values:  [105564.11374507  23958.24019337   9388.55586144    767.35890262]
Reward: -1  Episode Reward:  21
xxxxx
x  gx
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971  6992.85953869 17841.09032534]
------
Step:40, Action:West
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  8018.4466485   2284.51867635]
New Q values:  [-2561.28592178 -5807.06396197  8018.4466485   3930.84103866]
Reward: -1  Episode Reward:  20
xxxxx
x g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[10058.77856039  1352.37702619   -84.28863134   -12.17474163]
------
Step:41, Action:North
State  260
Old Q Values:  [-3807.60007269 -5704.51612281  2114.24600842 -5679.36893145]
New Q values:  [  -76.06264998 -5704.51612281  2114.24600842 -5679.36893145]
Reward: -1  Episode Reward:  19
xxxxx
xg .x
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NW
[ 1178.16828539  3060.24711158  4825.25793031 -4966.32149798]
------
Step:42, Action:East
State  180
Old Q Values:  [ 1178.16828539  3060.24711158  4825.25793031 -4966.32149798]
New Q values:  [ 1178.16828539  3060.24711158  3659.45544435 -4966.32149798]
Reward: -1  Episode Reward:  18
xxxxx
x  .x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1606.11926949 3828.64164508 5766.50757409  231.67262594]
------
Step:43, Action:East
State  196
Old Q Values:  [1606.11926949 3828.64164508 5766.50757409  231.67262594]
New Q values:  [1606.11926949 3828.64164508 6518.05879399  231.67262594]
Reward: -1  Episode Reward:  17
xxxxx
x  .x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2396.36428972 14040.18588118 -4584.50430574   812.90195953]
------
Step:44, Action:South
State  210
Old Q Values:  [20129.17872369  8475.78899552   790.72804752  5103.37501425]
New Q values:  [20129.17872369  6329.08918995   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  16
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5131.75128177 -4059.26960032 -5588.09647059  9797.91197247]
------
Step:45, Action:North
State  288
Old Q Values:  [ 5131.75128177 -4059.26960032 -5588.09647059  9797.91197247]
New Q values:  [ 8090.85412981 -4059.26960032 -5588.09647059  9797.91197247]
Reward: -1  Episode Reward:  15
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[20129.17872369  6329.08918995   790.72804752  5103.37501425]
------
Step:46, Action:North
State  210
Old Q Values:  [20129.17872369  6329.08918995   790.72804752  5103.37501425]
New Q values:  [80427.4184246   6329.08918995   790.72804752  5103.37501425]
Reward: 100009  Episode Reward:  100024
xxxxx
x  ax
x   x
x g x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799 16517.61859613 31725.61139798]
------
Step:1, Action:West
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  8018.4466485   3930.84103866]
New Q values:  [-2561.28592178 -5807.06396197  8018.4466485   2212.01021799]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[  -76.06264998 -5704.51612281  2114.24600842 -5679.36893145]
------
Step:2, Action:East
State  261
Old Q Values:  [10058.77856039  1352.37702619   -84.28863134   -12.17474163]
New Q values:  [10058.77856039  1352.37702619  2371.21854201   -12.17474163]
Reward: -1  Episode Reward:  8
xxxxx
x ..x
x.g.x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  8018.4466485   2212.01021799]
------
Step:3, Action:East
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799 16517.61859613 31725.61139798]
New Q values:  [ 9275.12327166 -8521.23367799  9551.82103019 31725.61139798]
Reward: 9  Episode Reward:  17
xxxxx
x ..x
x...x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8090.85412981 -4059.26960032 -5588.09647059  9797.91197247]
------
Step:4, Action:North
State  288
Old Q Values:  [ 8090.85412981 -4059.26960032 -5588.09647059  9797.91197247]
New Q values:  [ 7453.79741628 -4059.26960032 -5588.09647059  9797.91197247]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
x.gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 2396.36428972 14040.18588118 -4584.50430574   812.90195953]
------
Step:5, Action:South
State  208
Old Q Values:  [ 2396.36428972 14040.18588118 -4584.50430574   812.90195953]
New Q values:  [ 2396.36428972  8554.84794421 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  25
xxxxx
x ..x
x..gx
x  ax
xxxxx
Step:6, Action:East
State  288
Old Q Values:  [ 7453.79741628 -4059.26960032 -5588.09647059  9797.91197247]
New Q values:  [ 7453.79741628 -4059.26960032   523.5350035   9797.91197247]
Reward: -301  Episode Reward:  -276
xxxxx
x .gx
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7453.79741628 -4059.26960032   523.5350035   9797.91197247]
------
Step:7, Action:West
State  288
Old Q Values:  [ 7453.79741628 -4059.26960032   523.5350035   9797.91197247]
New Q values:  [ 7453.79741628 -4059.26960032   523.5350035  13436.24820838]
Reward: -1  Episode Reward:  -277
xxxxx
x g.x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799  9551.82103019 31725.61139798]
------
Step:8, Action:West
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799  9551.82103019 31725.61139798]
New Q values:  [ 9275.12327166 -8521.23367799  9551.82103019 15707.27812731]
Reward: -1  Episode Reward:  -278
xxxxx
x ..x
x.g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[10058.77856039  1352.37702619  2371.21854201   -12.17474163]
------
Step:9, Action:North
State  260
Old Q Values:  [  -76.06264998 -5704.51612281  2114.24600842 -5679.36893145]
New Q values:  [-4927.18842669 -5704.51612281  2114.24600842 -5679.36893145]
Reward: -9991  Episode Reward:  -10269
xxxxx
x ..x
xg. x
x   x
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-4927.18842669 -5704.51612281  2114.24600842 -5679.36893145]
------
Step:1, Action:East
State  261
Old Q Values:  [10058.77856039  1352.37702619  2371.21854201   -12.17474163]
New Q values:  [10058.77856039  1352.37702619  3359.42141136   -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  8018.4466485   2212.01021799]
------
Step:2, Action:East
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799  9551.82103019 15707.27812731]
New Q values:  [ 9275.12327166 -8521.23367799  7857.00287459 15707.27812731]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x...x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7453.79741628 -4059.26960032   523.5350035  13436.24820838]
------
Step:3, Action:West
State  288
Old Q Values:  [ 7453.79741628 -4059.26960032   523.5350035  13436.24820838]
New Q values:  [ 7453.79741628 -4059.26960032   523.5350035  10086.08272154]
Reward: -1  Episode Reward:  17
xxxxx
x ..x
xg..x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799  7857.00287459 15707.27812731]
------
Step:4, Action:West
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799  7857.00287459 15707.27812731]
New Q values:  [ 9275.12327166 -8521.23367799  7857.00287459   916.58505345]
Reward: -10001  Episode Reward:  -9984
xxxxx
x ..x
x...x
xg  x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[10058.77856039  1352.37702619  3359.42141136   -12.17474163]
------
Step:1, Action:North
State  260
Old Q Values:  [-4927.18842669 -5704.51612281  2114.24600842 -5679.36893145]
New Q values:  [-6867.63873737 -5704.51612281  2114.24600842 -5679.36893145]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
xg .x
x ..x
xxxxx
xxxxx
x...x
x.g.x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[10058.77856039  1352.37702619  3359.42141136   -12.17474163]
------
Step:1, Action:North
State  260
Old Q Values:  [-6867.63873737 -5704.51612281  2114.24600842 -5679.36893145]
New Q values:  [-7643.81886164 -5704.51612281  2114.24600842 -5679.36893145]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
xg .x
x ..x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[80427.4184246   6329.08918995   790.72804752  5103.37501425]
------
Step:1, Action:North
State  208
Old Q Values:  [ 2396.36428972  8554.84794421 -4584.50430574   812.90195953]
New Q values:  [ 1257.7494371   8554.84794421 -4584.50430574   812.90195953]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  9.79345737e+02]
------
Step:2, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  9.79345737e+02]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.13926782e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
xg. x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   419.00931273  2473.76506783]
------
Step:3, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4361.22042381   929.76303722]
New Q values:  [-9594.56523706 -8069.05606225  4361.22042381 -4318.40133077]
Reward: -9991  Episode Reward:  -9973
xxxxx
xg  x
x.. x
x. .x
xxxxx
xxxxx
x...x
xg.ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1257.7494371   8554.84794421 -4584.50430574   812.90195953]
------
Step:1, Action:South
State  210
Old Q Values:  [80427.4184246   6329.08918995   790.72804752  5103.37501425]
New Q values:  [80427.4184246   5562.86049244   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x . x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7453.79741628 -4059.26960032   523.5350035  10086.08272154]
------
Step:2, Action:West
State  288
Old Q Values:  [ 7453.79741628 -4059.26960032   523.5350035  10086.08272154]
New Q values:  [ 7453.79741628 -4059.26960032   523.5350035    822.37007012]
Reward: -9991  Episode Reward:  -9982
xxxxx
x...x
x . x
x.g x
xxxxx
xxxxx
xg..x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7453.79741628 -4059.26960032   523.5350035    822.37007012]
------
Step:1, Action:North
State  288
Old Q Values:  [ 7453.79741628 -4059.26960032   523.5350035    822.37007012]
New Q values:  [ 5553.37334978 -4059.26960032   523.5350035    822.37007012]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1257.7494371   8554.84794421 -4584.50430574   812.90195953]
------
Step:2, Action:South
State  208
Old Q Values:  [ 1257.7494371   8554.84794421 -4584.50430574   812.90195953]
New Q values:  [ 1257.7494371   5087.35118262 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  8
xxxxx
xg..x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5553.37334978 -4059.26960032   523.5350035    822.37007012]
------
Step:3, Action:North
State  288
Old Q Values:  [ 5553.37334978 -4059.26960032   523.5350035    822.37007012]
New Q values:  [ 3746.9546947  -4059.26960032   523.5350035    822.37007012]
Reward: -1  Episode Reward:  7
xxxxx
x ..x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1257.7494371   5087.35118262 -4584.50430574   812.90195953]
------
Step:4, Action:South
State  208
Old Q Values:  [ 1257.7494371   5087.35118262 -4584.50430574   812.90195953]
New Q values:  [ 1257.7494371   3158.42688146 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  6
xxxxx
x ..x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3746.9546947  -4059.26960032   523.5350035    822.37007012]
------
Step:5, Action:North
State  288
Old Q Values:  [ 3746.9546947  -4059.26960032   523.5350035    822.37007012]
New Q values:  [ 2445.70994231 -4059.26960032   523.5350035    822.37007012]
Reward: -1  Episode Reward:  5
xxxxx
x ..x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1257.7494371   3158.42688146 -4584.50430574   812.90195953]
------
Step:6, Action:South
State  210
Old Q Values:  [80427.4184246   5562.86049244   790.72804752  5103.37501425]
New Q values:  [80427.4184246   2958.25717967   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  4
xxxxx
x ..x
x.. x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2445.70994231 -4059.26960032   523.5350035    822.37007012]
------
Step:7, Action:North
State  288
Old Q Values:  [ 2445.70994231 -4059.26960032   523.5350035    822.37007012]
New Q values:  [ 1925.21204136 -4059.26960032   523.5350035    822.37007012]
Reward: -1  Episode Reward:  3
xxxxx
x ..x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1257.7494371   3158.42688146 -4584.50430574   812.90195953]
------
Step:8, Action:South
State  208
Old Q Values:  [ 1257.7494371   3158.42688146 -4584.50430574   812.90195953]
New Q values:  [ 1257.7494371   1840.33436499 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  2
xxxxx
x ..x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1925.21204136 -4059.26960032   523.5350035    822.37007012]
------
Step:9, Action:North
State  288
Old Q Values:  [ 1925.21204136 -4059.26960032   523.5350035    822.37007012]
New Q values:  [ 1321.58512604 -4059.26960032   523.5350035    822.37007012]
Reward: -1  Episode Reward:  1
xxxxx
x g.x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1257.7494371   1840.33436499 -4584.50430574   812.90195953]
------
Step:10, Action:South
State  208
Old Q Values:  [ 1257.7494371   1840.33436499 -4584.50430574   812.90195953]
New Q values:  [ 1257.7494371   1132.00928381 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  0
xxxxx
xg..x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1321.58512604 -4059.26960032   523.5350035    822.37007012]
------
Step:11, Action:North
State  288
Old Q Values:  [ 1321.58512604 -4059.26960032   523.5350035    822.37007012]
New Q values:  [  905.35888155 -4059.26960032   523.5350035    822.37007012]
Reward: -1  Episode Reward:  -1
xxxxx
x ..x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1257.7494371   1132.00928381 -4584.50430574   812.90195953]
------
Step:12, Action:North
State  210
Old Q Values:  [80427.4184246   2958.25717967   790.72804752  5103.37501425]
New Q values:  [32518.14771443  2958.25717967   790.72804752  5103.37501425]
Reward: 9  Episode Reward:  8
xxxxx
x .ax
x.. x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.13926782e+03]
------
Step:13, Action:West
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.13926782e+03]
New Q values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.20323665e+03]
Reward: 9  Episode Reward:  17
xxxxx
x a x
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   419.00931273  2473.76506783]
------
Step:14, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   419.00931273  2473.76506783]
New Q values:  [ -281.736      -1150.91067548   419.00931273  2915.59017502]
Reward: -1  Episode Reward:  16
xxxxx
xa  x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 6422.28049295  223.04911789 -252.78192178]
------
Step:15, Action:South
State  107
Old Q Values:  [-252.35169558 6422.28049295  223.04911789 -252.78192178]
New Q values:  [-252.35169558 6015.866984    223.04911789 -252.78192178]
Reward: 9  Episode Reward:  25
xxxxx
x   x
xa. x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2049.1969658   1771.27760536 11471.8492894   1554.80203889]
------
Step:16, Action:East
State  185
Old Q Values:  [  357.2991616     0.        13153.182442   -178.98     ]
New Q values:  [ 357.2991616     0.         7012.49449892 -178.98      ]
Reward: -9991  Episode Reward:  -9966
xxxxx
x   x
x g x
x.. x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  4361.22042381 -4318.40133077]
------
Step:1, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4361.22042381 -4318.40133077]
New Q values:  [-9594.56523706 -8069.05606225  4345.12051072 -4318.40133077]
Reward: 9  Episode Reward:  9
xxxxx
x gax
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 8650.77447064  660.86649319  268.41327439]
------
Step:2, Action:South
State  138
Old Q Values:  [ 1.06807480e+02 -6.47874006e+02 -3.22965309e-01  1.20323665e+03]
New Q values:  [ 1.06807480e+02  4.91030241e+03 -3.22965309e-01  1.20323665e+03]
Reward: 9  Episode Reward:  18
xxxxx
x   x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  346.49174721  7913.69472607   606.149024   17213.50671953]
------
Step:3, Action:South
State  208
Old Q Values:  [ 1257.7494371   1132.00928381 -4584.50430574   812.90195953]
New Q values:  [ 1257.7494371    729.81137799 -4584.50430574   812.90195953]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  905.35888155 -4059.26960032   523.5350035    822.37007012]
------
Step:4, Action:West
State  288
Old Q Values:  [  905.35888155 -4059.26960032   523.5350035    822.37007012]
New Q values:  [  905.35888155 -4059.26960032   523.5350035   5686.67512565]
Reward: 9  Episode Reward:  36
xxxxx
x   x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971  6992.85953869 17841.09032534]
------
Step:5, Action:West
State  273
Old Q Values:  [ 1188.86709371  1201.72649971  6992.85953869 17841.09032534]
New Q values:  [ 1188.86709371  1201.72649971  6992.85953869 21180.72634373]
Reward: 9  Episode Reward:  45
xxxxx
x   x
x..gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[46796.30071199 15941.62716192 13169.98702937  1875.31501677]
------
Step:6, Action:North
State  261
Old Q Values:  [10058.77856039  1352.37702619  3359.42141136   -12.17474163]
New Q values:  [4557.50789742 1352.37702619 3359.42141136  -12.17474163]
Reward: 9  Episode Reward:  54
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1440.03584728 1236.86188545 1761.98824422    3.33862213]
------
Step:7, Action:East
State  177
Old Q Values:  [ 60476.05138135  21430.9929039  109471.37790858      0.        ]
New Q values:  [ 60476.05138135  21430.9929039  116370.99741228      0.        ]
Reward: 100009  Episode Reward:  100063
xxxxx
x g x
x a x
x   x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  2086.53477866  2352.44919058]
------
Step:1, Action:West
State  121
Old Q Values:  [    0.             0.         -8338.81083201   273.64612079]
New Q values:  [    0.             0.         -8338.81083201   946.27032172]
Reward: 9  Episode Reward:  9
xxxxx
xa gx
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  2771.37291134 -2165.66138672   232.50800947]
------
Step:2, Action:South
State  111
Old Q Values:  [-177.44732869 6320.53024609  476.21588959 -120.29354603]
New Q values:  [-177.44732869 3062.2085717   476.21588959 -120.29354603]
Reward: 9  Episode Reward:  18
xxxxx
x  .x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1440.03584728 1236.86188545 1761.98824422    3.33862213]
------
Step:3, Action:East
State  183
Old Q Values:  [ 2049.1969658   1771.27760536 11471.8492894   1554.80203889]
New Q values:  [2049.1969658  1771.27760536 8440.19560677 1554.80203889]
Reward: 9  Episode Reward:  27
xxxxx
x  .x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 1.28201863e+04 2.89128867e+03 4.59156348e+03]
------
Step:4, Action:South
State  197
Old Q Values:  [-5833.78831344  1394.50148356 -6341.45812961   403.06255908]
New Q values:  [-5833.78831344  1572.43352738 -6341.45812961   403.06255908]
Reward: 9  Episode Reward:  36
xxxxx
x  .x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 5.04537534e+02 2.28379178e+03 3.36410978e+03]
------
Step:5, Action:West
State  277
Old Q Values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 3.36410978e+03]
New Q values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 2.71829628e+03]
Reward: 9  Episode Reward:  45
xxxxx
x  .x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4557.50789742 1352.37702619 3359.42141136  -12.17474163]
------
Step:6, Action:North
State  261
Old Q Values:  [4557.50789742 1352.37702619 3359.42141136  -12.17474163]
New Q values:  [2350.99963223 1352.37702619 3359.42141136  -12.17474163]
Reward: -1  Episode Reward:  44
xxxxx
x  .x
xa gx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1440.03584728 1236.86188545 1761.98824422    3.33862213]
------
Step:7, Action:East
State  183
Old Q Values:  [2049.1969658  1771.27760536 8440.19560677 1554.80203889]
New Q values:  [ 2049.1969658   1771.27760536 10528.70216103  1554.80203889]
Reward: -1  Episode Reward:  43
xxxxx
x  .x
x a x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SE
[  916.09264659  1175.46486763 23844.07972775  1915.70494401]
------
Step:8, Action:East
State  197
Old Q Values:  [-5833.78831344  1572.43352738 -6341.45812961   403.06255908]
New Q values:  [-5833.78831344  1572.43352738 -8159.85842071   403.06255908]
Reward: -10001  Episode Reward:  -9958
xxxxx
x  .x
x  gx
x  .x
xxxxx
xxxxx
x...x
x..gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[2350.99963223 1352.37702619 3359.42141136  -12.17474163]
------
Step:1, Action:East
State  261
Old Q Values:  [2350.99963223 1352.37702619 3359.42141136  -12.17474163]
New Q values:  [2350.99963223 1352.37702619 4131.70554604  -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799  7857.00287459   916.58505345]
------
Step:2, Action:East
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799  7857.00287459   916.58505345]
New Q values:  [ 9275.12327166 -8521.23367799  4854.20368753   916.58505345]
Reward: 9  Episode Reward:  18
xxxxx
x.g.x
x.. x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  905.35888155 -4059.26960032   523.5350035   5686.67512565]
------
Step:3, Action:West
State  288
Old Q Values:  [  905.35888155 -4059.26960032   523.5350035   5686.67512565]
New Q values:  [  905.35888155 -4059.26960032   523.5350035   5056.60703176]
Reward: -1  Episode Reward:  17
xxxxx
xg..x
x.. x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 9275.12327166 -8521.23367799  4854.20368753   916.58505345]
------
Step:4, Action:North
State  272
Old Q Values:  [ 9275.12327166 -8521.23367799  4854.20368753   916.58505345]
New Q values:  [16292.49555751 -8521.23367799  4854.20368753   916.58505345]
Reward: 9  Episode Reward:  26
xxxxx
x...x
xga x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[15780.48189733 41923.48749617 14412.78382458  2453.92999194]
------
Step:5, Action:South
State  192
Old Q Values:  [15780.48189733 41923.48749617 14412.78382458  2453.92999194]
New Q values:  [15780.48189733 21656.54366572 14412.78382458  2453.92999194]
Reward: -1  Episode Reward:  25
xxxxx
x...x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[16292.49555751 -8521.23367799  4854.20368753   916.58505345]
------
Step:6, Action:East
State  273
Old Q Values:  [ 1188.86709371  1201.72649971  6992.85953869 21180.72634373]
New Q values:  [ 1188.86709371  1201.72649971  4313.525925   21180.72634373]
Reward: -1  Episode Reward:  24
xxxxx
x...x
x. gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  905.35888155 -4059.26960032   523.5350035   5056.60703176]
------
Step:7, Action:West
State  288
Old Q Values:  [  905.35888155 -4059.26960032   523.5350035   5056.60703176]
New Q values:  [  905.35888155 -4059.26960032   523.5350035   6909.79147996]
Reward: -1  Episode Reward:  23
xxxxx
x...x
x.g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[16292.49555751 -8521.23367799  4854.20368753   916.58505345]
------
Step:8, Action:East
State  273
Old Q Values:  [ 1188.86709371  1201.72649971  4313.525925   21180.72634373]
New Q values:  [ 1188.86709371  1201.72649971  3797.74781399 21180.72634373]
Reward: -1  Episode Reward:  22
xxxxx
x...x
x. gx
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  905.35888155 -4059.26960032   523.5350035   6909.79147996]
------
Step:9, Action:West
State  288
Old Q Values:  [  905.35888155 -4059.26960032   523.5350035   6909.79147996]
New Q values:  [  905.35888155 -4059.26960032   523.5350035   9117.5344951 ]
Reward: -1  Episode Reward:  21
xxxxx
x..gx
x.  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[ 1188.86709371  1201.72649971  3797.74781399 21180.72634373]
------
Step:10, Action:West
State  272
Old Q Values:  [16292.49555751 -8521.23367799  4854.20368753   916.58505345]
New Q values:  [16292.49555751 -8521.23367799  4854.20368753 14404.92423498]
Reward: -1  Episode Reward:  20
xxxxx
x.g.x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[46796.30071199 15941.62716192 13169.98702937  1875.31501677]
------
Step:11, Action:North
State  261
Old Q Values:  [2350.99963223 1352.37702619 4131.70554604  -12.17474163]
New Q values:  [1474.39632616 1352.37702619 4131.70554604  -12.17474163]
Reward: 9  Episode Reward:  29
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1440.03584728 1236.86188545 1761.98824422    3.33862213]
------
Step:12, Action:North
State  181
Old Q Values:  [1440.03584728 1236.86188545 1761.98824422    3.33862213]
New Q values:  [3212.39204794 1236.86188545 1761.98824422    3.33862213]
Reward: 9  Episode Reward:  38
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 8769.92569677 1868.2303995     0.        ]
------
Step:13, Action:South
State  103
Old Q Values:  [ 221.30610858 8769.92569677 1868.2303995     0.        ]
New Q values:  [ 221.30610858 4471.08789309 1868.2303995     0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[3212.39204794 1236.86188545 1761.98824422    3.33862213]
------
Step:14, Action:North
State  180
Old Q Values:  [ 1178.16828539  3060.24711158  3659.45544435 -4966.32149798]
New Q values:  [ 2652.48164739  3060.24711158  3659.45544435 -4966.32149798]
Reward: -1  Episode Reward:  36
xxxxx
xa..x
xg  x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        7272.71444411 2583.17308616 -180.6       ]
------
Step:15, Action:East
State  100
Old Q Values:  [   0.         6612.16904117 1813.23031203    0.        ]
New Q values:  [   0.         6612.16904117 2466.87554592    0.        ]
Reward: 9  Episode Reward:  45
xxxxx
xga.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:NW
[   0.         5787.27807037    0.            0.        ]
------
Step:16, Action:South
State  116
Old Q Values:  [   0.         5787.27807037    0.            0.        ]
New Q values:  [   0.         4269.72886634    0.            0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1606.11926949 3828.64164508 6518.05879399  231.67262594]
------
Step:17, Action:East
State  196
Old Q Values:  [1606.11926949 3828.64164508 6518.05879399  231.67262594]
New Q values:  [1606.11926949 3828.64164508 2983.94834873  231.67262594]
Reward: -1  Episode Reward:  43
xxxxx
xg .x
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1257.7494371    729.81137799 -4584.50430574   812.90195953]
------
Step:18, Action:North
State  208
Old Q Values:  [ 1257.7494371    729.81137799 -4584.50430574   812.90195953]
New Q values:  [72878.84670997   729.81137799 -4584.50430574   812.90195953]
Reward: 100009  Episode Reward:  100052
xxxxx
x  ax
xg  x
x   x
xxxxx
xxxxx
x...x
x.agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[105564.11374507  23958.24019337   9388.55586144    767.35890262]
------
Step:1, Action:North
State  192
Old Q Values:  [15780.48189733 21656.54366572 14412.78382458  2453.92999194]
New Q values:  [ 7192.26981144 21656.54366572 14412.78382458  2453.92999194]
Reward: 9  Episode Reward:  9
xxxxx
x.a.x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   419.00931273  2915.59017502]
------
Step:2, Action:West
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  2086.53477866  2352.44919058]
New Q values:  [  -56.91790269 -1902.20915811  2086.53477866  2751.13977143]
Reward: 9  Episode Reward:  18
xxxxx
xa .x
x. gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 6015.866984    223.04911789 -252.78192178]
------
Step:3, Action:South
State  109
Old Q Values:  [ -241.10880094  2771.37291134 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  2860.42298165 -2165.66138672   232.50800947]
Reward: 9  Episode Reward:  27
xxxxx
x  gx
xa  x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[ 533.05203844 2556.12935293 5821.57939039  154.04646645]
------
Step:4, Action:East
State  189
Old Q Values:  [ 533.05203844 2556.12935293 5821.57939039  154.04646645]
New Q values:  [  533.05203844  2556.12935293 10073.85327827   154.04646645]
Reward: -1  Episode Reward:  26
xxxxx
x g.x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  25819.40507373  7098.42831386   610.93635926]
------
Step:5, Action:South
State  196
Old Q Values:  [1606.11926949 3828.64164508 2983.94834873  231.67262594]
New Q values:  [1606.11926949 3942.39065258 2983.94834873  231.67262594]
Reward: 9  Episode Reward:  35
xxxxx
xg .x
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  8018.4466485   2212.01021799]
------
Step:6, Action:East
State  272
Old Q Values:  [16292.49555751 -8521.23367799  4854.20368753 14404.92423498]
New Q values:  [16292.49555751 -8521.23367799  4682.34182354 14404.92423498]
Reward: 9  Episode Reward:  44
xxxxx
x  .x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  905.35888155 -4059.26960032   523.5350035   9117.5344951 ]
------
Step:7, Action:West
State  288
Old Q Values:  [  905.35888155 -4059.26960032   523.5350035   9117.5344951 ]
New Q values:  [  905.35888155 -4059.26960032   523.5350035   8534.1624653 ]
Reward: -1  Episode Reward:  43
xxxxx
xg .x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[16292.49555751 -8521.23367799  4682.34182354 14404.92423498]
------
Step:8, Action:North
State  272
Old Q Values:  [16292.49555751 -8521.23367799  4682.34182354 14404.92423498]
New Q values:  [ 7699.11541878 -8521.23367799  4682.34182354 14404.92423498]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1606.11926949 3942.39065258 2983.94834873  231.67262594]
------
Step:9, Action:South
State  196
Old Q Values:  [1606.11926949 3942.39065258 2983.94834873  231.67262594]
New Q values:  [1606.11926949 5897.83353153 2983.94834873  231.67262594]
Reward: -1  Episode Reward:  41
xxxxx
xg .x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7699.11541878 -8521.23367799  4682.34182354 14404.92423498]
------
Step:10, Action:West
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  8018.4466485   2212.01021799]
New Q values:  [-2561.28592178 -5807.06396197  8018.4466485   1524.47788972]
Reward: 9  Episode Reward:  50
xxxxx
x  .x
xg  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-7643.81886164 -5704.51612281  2114.24600842 -5679.36893145]
------
Step:11, Action:East
State  260
Old Q Values:  [-7643.81886164 -5704.51612281  2114.24600842 -5679.36893145]
New Q values:  [-7643.81886164 -5704.51612281  3250.63239792 -5679.36893145]
Reward: -1  Episode Reward:  49
xxxxx
xg .x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  8018.4466485   1524.47788972]
------
Step:12, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  8018.4466485   1524.47788972]
New Q values:  [-2561.28592178 -5807.06396197  5767.02739899  1524.47788972]
Reward: -1  Episode Reward:  48
xxxxx
x g.x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  905.35888155 -4059.26960032   523.5350035   8534.1624653 ]
------
Step:13, Action:West
State  288
Old Q Values:  [  905.35888155 -4059.26960032   523.5350035   8534.1624653 ]
New Q values:  [  905.35888155 -4059.26960032   523.5350035   5143.17320581]
Reward: -1  Episode Reward:  47
xxxxx
x  .x
x g x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  5767.02739899  1524.47788972]
------
Step:14, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  5767.02739899  1524.47788972]
New Q values:  [-2561.28592178 -5807.06396197  3849.16292134  1524.47788972]
Reward: -1  Episode Reward:  46
xxxxx
x  .x
x   x
x gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[  905.35888155 -4059.26960032   523.5350035   5143.17320581]
------
Step:15, Action:North
State  288
Old Q Values:  [  905.35888155 -4059.26960032   523.5350035   5143.17320581]
New Q values:  [10116.98786695 -4059.26960032   523.5350035   5143.17320581]
Reward: -1  Episode Reward:  45
xxxxx
x  .x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[32518.14771443  2958.25717967   790.72804752  5103.37501425]
------
Step:16, Action:North
State  210
Old Q Values:  [32518.14771443  2958.25717967   790.72804752  5103.37501425]
New Q values:  [85383.0060209   2958.25717967   790.72804752  5103.37501425]
Reward: 100009  Episode Reward:  100054
xxxxx
x  ax
x   x
x g x
xxxxx
xxxxx
x..gx
x...x
x.a.x
xxxxx
Step:1, Action:North
State  273
Old Q Values:  [ 1188.86709371  1201.72649971  3797.74781399 21180.72634373]
New Q values:  [32150.180961    1201.72649971  3797.74781399 21180.72634373]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x.agx
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[105564.11374507  23958.24019337   9388.55586144    767.35890262]
------
Step:2, Action:North
State  192
Old Q Values:  [ 7192.26981144 21656.54366572 14412.78382458  2453.92999194]
New Q values:  [ 3756.98497708 21656.54366572 14412.78382458  2453.92999194]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548   419.00931273  2915.59017502]
------
Step:3, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548   419.00931273  2915.59017502]
New Q values:  [ -281.736      -1150.91067548   419.00931273  2976.39616521]
Reward: 9  Episode Reward:  27
xxxxx
xa  x
x. .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 6015.866984    223.04911789 -252.78192178]
------
Step:4, Action:South
State  110
Old Q Values:  [ -239.29051573 -3801.32039259   472.79132618  -180.6       ]
New Q values:  [-239.29051573  463.57363778  472.79132618 -180.6       ]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xa .x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  6595.67264939     0.        ]
------
Step:5, Action:East
State  180
Old Q Values:  [ 2652.48164739  3060.24711158  3659.45544435 -4966.32149798]
New Q values:  [ 2652.48164739  3060.24711158  3232.5322372  -4966.32149798]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xga.x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1606.11926949 5897.83353153 2983.94834873  231.67262594]
------
Step:6, Action:South
State  196
Old Q Values:  [1606.11926949 5897.83353153 2983.94834873  231.67262594]
New Q values:  [1606.11926949 3513.28228901 2983.94834873  231.67262594]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x g.x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  3849.16292134  1524.47788972]
------
Step:7, Action:East
State  272
Old Q Values:  [ 7699.11541878 -8521.23367799  4682.34182354 14404.92423498]
New Q values:  [ 7699.11541878 -8521.23367799  4913.4330895  14404.92423498]
Reward: 9  Episode Reward:  43
xxxxx
x g x
x  .x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10116.98786695 -4059.26960032   523.5350035   5143.17320581]
------
Step:8, Action:North
State  288
Old Q Values:  [10116.98786695 -4059.26960032   523.5350035   5143.17320581]
New Q values:  [ 9216.24716264 -4059.26960032   523.5350035   5143.17320581]
Reward: 9  Episode Reward:  52
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  346.49174721  7913.69472607   606.149024   17213.50671953]
------
Step:9, Action:West
State  216
Old Q Values:  [  346.49174721  7913.69472607   606.149024   17213.50671953]
New Q values:  [  346.49174721  7913.69472607   606.149024   12233.06127675]
Reward: -1  Episode Reward:  51
xxxxx
x  gx
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563 17827.52862979 -3941.94579649  1311.30124863]
------
Step:10, Action:South
State  201
Old Q Values:  [  613.33320563 17827.52862979 -3941.94579649  1311.30124863]
New Q values:  [  613.33320563 16775.46574022 -3941.94579649  1311.30124863]
Reward: -1  Episode Reward:  50
xxxxx
x   x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[32150.180961    1201.72649971  3797.74781399 21180.72634373]
------
Step:11, Action:North
State  273
Old Q Values:  [32150.180961    1201.72649971  3797.74781399 21180.72634373]
New Q values:  [17892.11210647  1201.72649971  3797.74781399 21180.72634373]
Reward: -1  Episode Reward:  49
xxxxx
x  gx
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563 16775.46574022 -3941.94579649  1311.30124863]
------
Step:12, Action:South
State  201
Old Q Values:  [  613.33320563 16775.46574022 -3941.94579649  1311.30124863]
New Q values:  [  613.33320563 13063.80419921 -3941.94579649  1311.30124863]
Reward: -1  Episode Reward:  48
xxxxx
x   x
x  gx
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[17892.11210647  1201.72649971  3797.74781399 21180.72634373]
------
Step:13, Action:West
State  273
Old Q Values:  [17892.11210647  1201.72649971  3797.74781399 21180.72634373]
New Q values:  [17892.11210647  1201.72649971  3797.74781399 82516.58075109]
Reward: 100009  Episode Reward:  100057
xxxxx
x   x
x   x
xa gx
xxxxx
xxxxx
x..ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  4.91030241e+03 -3.22965309e-01  1.20323665e+03]
------
Step:1, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  4.91030241e+03 -3.22965309e-01  1.20323665e+03]
New Q values:  [ 1.06807480e+02  2.75844228e+04 -3.22965309e-01  1.20323665e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x .ax
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[85383.0060209   2958.25717967   790.72804752  5103.37501425]
------
Step:2, Action:North
State  210
Old Q Values:  [85383.0060209   2958.25717967   790.72804752  5103.37501425]
New Q values:  [42427.92923984  2958.25717967   790.72804752  5103.37501425]
Reward: -1  Episode Reward:  8
xxxxx
x..ax
x . x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.75844228e+04 -3.22965309e-01  1.20323665e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  2.75844228e+04 -3.22965309e-01  1.20323665e+03]
New Q values:  [ 1.06807480e+02  2.37615479e+04 -3.22965309e-01  1.20323665e+03]
Reward: -1  Episode Reward:  7
xxxxx
x.. x
x .ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[42427.92923984  2958.25717967   790.72804752  5103.37501425]
------
Step:4, Action:North
State  208
Old Q Values:  [72878.84670997   729.81137799 -4584.50430574   812.90195953]
New Q values:  [36279.40304817   729.81137799 -4584.50430574   812.90195953]
Reward: -1  Episode Reward:  6
xxxxx
x..ax
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.37615479e+04 -3.22965309e-01  1.20323665e+03]
------
Step:5, Action:West
State  136
Old Q Values:  [ 878.22269011 8650.77447064  660.86649319  268.41327439]
New Q values:  [ 878.22269011 8650.77447064  660.86649319  396.64640627]
Reward: 9  Episode Reward:  15
xxxxx
x.agx
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8338.81083201   946.27032172]
------
Step:6, Action:West
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  2086.53477866  2751.13977143]
New Q values:  [  -56.91790269 -1902.20915811  2086.53477866  2024.51848008]
Reward: 9  Episode Reward:  24
xxxxx
xa  x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3062.2085717   476.21588959 -120.29354603]
------
Step:7, Action:South
State  109
Old Q Values:  [ -241.10880094  2860.42298165 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  4165.72517614 -2165.66138672   232.50800947]
Reward: -1  Episode Reward:  23
xxxxx
x  gx
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2556.12935293 10073.85327827   154.04646645]
------
Step:8, Action:East
State  189
Old Q Values:  [  533.05203844  2556.12935293 10073.85327827   154.04646645]
New Q values:  [  533.05203844  2556.12935293 11780.76283343   154.04646645]
Reward: 9  Episode Reward:  32
xxxxx
x g x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  25819.40507373  7098.42831386   610.93635926]
------
Step:9, Action:South
State  200
Old Q Values:  [  169.9257398  25819.40507373  7098.42831386   610.93635926]
New Q values:  [  169.9257398  14654.63929998  7098.42831386   610.93635926]
Reward: 9  Episode Reward:  41
xxxxx
x   x
x g x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7699.11541878 -8521.23367799  4913.4330895  14404.92423498]
------
Step:10, Action:West
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  3849.16292134  1524.47788972]
New Q values:  [-2561.28592178 -5807.06396197  3849.16292134  1854.7028197 ]
Reward: 9  Episode Reward:  50
xxxxx
x   x
x   x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1474.39632616 1352.37702619 4131.70554604  -12.17474163]
------
Step:11, Action:North
State  257
Old Q Values:  [46796.30071199 15941.62716192 13169.98702937  1875.31501677]
New Q values:  [43386.52328468 15941.62716192 13169.98702937  1875.31501677]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:SE
[82228.67666629 18995.54020685 50537.28135001     0.        ]
------
Step:12, Action:North
State  191
Old Q Values:  [   3.06655861 2392.28892532 2029.22435297    0.        ]
New Q values:  [ 919.28919496 2392.28892532 2029.22435297    0.        ]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 3062.2085717   476.21588959 -120.29354603]
------
Step:13, Action:South
State  111
Old Q Values:  [-177.44732869 3062.2085717   476.21588959 -120.29354603]
New Q values:  [-177.44732869 4758.51227871  476.21588959 -120.29354603]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2556.12935293 11780.76283343   154.04646645]
------
Step:14, Action:South
State  189
Old Q Values:  [  533.05203844  2556.12935293 11780.76283343   154.04646645]
New Q values:  [  533.05203844  2261.36340498 11780.76283343   154.04646645]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1474.39632616 1352.37702619 4131.70554604  -12.17474163]
------
Step:15, Action:East
State  261
Old Q Values:  [1474.39632616 1352.37702619 4131.70554604  -12.17474163]
New Q values:  [1474.39632616 1352.37702619 2806.83109482  -12.17474163]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  3849.16292134  1854.7028197 ]
------
Step:16, Action:East
State  272
Old Q Values:  [ 7699.11541878 -8521.23367799  4913.4330895  14404.92423498]
New Q values:  [ 7699.11541878 -8521.23367799 64735.64738459 14404.92423498]
Reward: 100009  Episode Reward:  100054
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
x...x
xa..x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  6595.67264939     0.        ]
------
Step:1, Action:East
State  180
Old Q Values:  [ 2652.48164739  3060.24711158  3232.5322372  -4966.32149798]
New Q values:  [ 2652.48164739  3060.24711158  7795.3759946  -4966.32149798]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xga.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3756.98497708 21656.54366572 14412.78382458  2453.92999194]
------
Step:2, Action:South
State  196
Old Q Values:  [1606.11926949 3513.28228901 2983.94834873  231.67262594]
New Q values:  [1606.11926949 2565.46179201 2983.94834873  231.67262594]
Reward: 9  Episode Reward:  18
xxxxx
xg..x
x  .x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  3849.16292134  1854.7028197 ]
------
Step:3, Action:East
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  3849.16292134  1854.7028197 ]
New Q values:  [-2561.28592178 -5807.06396197  4309.93931733  1854.7028197 ]
Reward: 9  Episode Reward:  27
xxxxx
x.g.x
x  .x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9216.24716264 -4059.26960032   523.5350035   5143.17320581]
------
Step:4, Action:North
State  288
Old Q Values:  [ 9216.24716264 -4059.26960032   523.5350035   5143.17320581]
New Q values:  [14575.71977951 -4059.26960032   523.5350035   5143.17320581]
Reward: 9  Episode Reward:  36
xxxxx
x..gx
x  ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36279.40304817   729.81137799 -4584.50430574   812.90195953]
------
Step:5, Action:West
State  208
Old Q Values:  [36279.40304817   729.81137799 -4584.50430574   812.90195953]
New Q values:  [36279.40304817   729.81137799 -4584.50430574  6821.52388353]
Reward: -1  Episode Reward:  35
xxxxx
x.g.x
x a x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3756.98497708 21656.54366572 14412.78382458  2453.92999194]
------
Step:6, Action:South
State  192
Old Q Values:  [ 3756.98497708 21656.54366572 14412.78382458  2453.92999194]
New Q values:  [ 3756.98497708 28082.71168167 14412.78382458  2453.92999194]
Reward: -1  Episode Reward:  34
xxxxx
xg..x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7699.11541878 -8521.23367799 64735.64738459 14404.92423498]
------
Step:7, Action:East
State  272
Old Q Values:  [ 7699.11541878 -8521.23367799 64735.64738459 14404.92423498]
New Q values:  [ 7699.11541878 -8521.23367799 30266.37488769 14404.92423498]
Reward: -1  Episode Reward:  33
xxxxx
x...x
xg  x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14575.71977951 -4059.26960032   523.5350035   5143.17320581]
------
Step:8, Action:North
State  288
Old Q Values:  [14575.71977951 -4059.26960032   523.5350035   5143.17320581]
New Q values:  [16713.50882625 -4059.26960032   523.5350035   5143.17320581]
Reward: -1  Episode Reward:  32
xxxxx
x...x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[36279.40304817   729.81137799 -4584.50430574  6821.52388353]
------
Step:9, Action:North
State  208
Old Q Values:  [36279.40304817   729.81137799 -4584.50430574  6821.52388353]
New Q values:  [26887.5081544    729.81137799 -4584.50430574  6821.52388353]
Reward: 9  Episode Reward:  41
xxxxx
x..ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[41234.48978377  7729.95359527  -180.00807518 39486.77183975]
------
Step:10, Action:North
State  130
Old Q Values:  [41234.48978377  7729.95359527  -180.00807518 39486.77183975]
New Q values:  [28683.54284864  7729.95359527  -180.00807518 39486.77183975]
Reward: -301  Episode Reward:  -260
xxxxx
x..ax
x   x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[28683.54284864  7729.95359527  -180.00807518 39486.77183975]
------
Step:11, Action:West
State  130
Old Q Values:  [28683.54284864  7729.95359527  -180.00807518 39486.77183975]
New Q values:  [28683.54284864  7729.95359527  -180.00807518 36607.86484439]
Reward: 9  Episode Reward:  -251
xxxxx
x.a x
x   x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[ -180.6         3557.6642036  49543.89769946 69359.18702831]
------
Step:12, Action:West
State  114
Old Q Values:  [ -180.6         3557.6642036  49543.89769946 69359.18702831]
New Q values:  [  -180.6          3557.6642036   49543.89769946 105432.62627268]
Reward: 100009  Episode Reward:  99758
xxxxx
xa  x
x g x
x   x
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2049.1969658   1771.27760536 10528.70216103  1554.80203889]
------
Step:1, Action:East
State  183
Old Q Values:  [ 2049.1969658   1771.27760536 10528.70216103  1554.80203889]
New Q values:  [ 2049.1969658   1771.27760536 22441.58146108  1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[60749.00198889 16053.62807234  8240.17937465  1169.39963074]
------
Step:2, Action:North
State  198
Old Q Values:  [7400.45312095 -200.61022961 6414.36252342    0.        ]
New Q values:  [3925.89935061 -200.61022961 6414.36252342    0.        ]
Reward: 9  Episode Reward:  18
xxxxx
x.a.x
x  .x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262  681.20236034 3201.06034076]
------
Step:3, Action:West
State  126
Old Q Values:  [   0.          331.64678262  681.20236034 3201.06034076]
New Q values:  [   0.          331.64678262  681.20236034 2713.37781992]
Reward: 9  Episode Reward:  27
xxxxx
xa .x
x g.x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4758.51227871  476.21588959 -120.29354603]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 4758.51227871  476.21588959 -120.29354603]
New Q values:  [-177.44732869 8635.27934981  476.21588959 -120.29354603]
Reward: -1  Episode Reward:  26
xxxxx
x  .x
xa .x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2049.1969658   1771.27760536 22441.58146108  1554.80203889]
------
Step:5, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243  6595.67264939     0.        ]
New Q values:  [    0.         -5536.05678243  4561.97781678     0.        ]
Reward: -1  Episode Reward:  25
xxxxx
x  .x
x a.x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[3925.89935061 -200.61022961 6414.36252342    0.        ]
------
Step:6, Action:East
State  196
Old Q Values:  [1606.11926949 2565.46179201 2983.94834873  231.67262594]
New Q values:  [1606.11926949 2565.46179201 4868.89772251  231.67262594]
Reward: 9  Episode Reward:  34
xxxxx
x  .x
xg ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  346.49174721  7913.69472607   606.149024   12233.06127675]
------
Step:7, Action:West
State  210
Old Q Values:  [42427.92923984  2958.25717967   790.72804752  5103.37501425]
New Q values:  [42427.92923984  2958.25717967   790.72804752  3965.05876272]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
x a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[3925.89935061 -200.61022961 6414.36252342    0.        ]
------
Step:8, Action:East
State  196
Old Q Values:  [1606.11926949 2565.46179201 4868.89772251  231.67262594]
New Q values:  [1606.11926949 2565.46179201 5616.87747203  231.67262594]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
xg ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[  346.49174721  7913.69472607   606.149024   12233.06127675]
------
Step:9, Action:West
State  216
Old Q Values:  [  346.49174721  7913.69472607   606.149024   12233.06127675]
New Q values:  [ 346.49174721 7913.69472607  606.149024   6577.68775231]
Reward: -1  Episode Reward:  31
xxxxx
xg .x
x a x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1606.11926949 2565.46179201 5616.87747203  231.67262594]
------
Step:10, Action:East
State  196
Old Q Values:  [1606.11926949 2565.46179201 5616.87747203  231.67262594]
New Q values:  [1606.11926949 2565.46179201 4620.25940663  231.67262594]
Reward: -1  Episode Reward:  30
xxxxx
x g.x
x  ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 346.49174721 7913.69472607  606.149024   6577.68775231]
------
Step:11, Action:South
State  208
Old Q Values:  [26887.5081544    729.81137799 -4584.50430574  6821.52388353]
New Q values:  [26887.5081544   5311.37719907 -4584.50430574  6821.52388353]
Reward: 9  Episode Reward:  39
xxxxx
xg .x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[16713.50882625 -4059.26960032   523.5350035   5143.17320581]
------
Step:12, Action:North
State  288
Old Q Values:  [16713.50882625 -4059.26960032   523.5350035   5143.17320581]
New Q values:  [14751.05597682 -4059.26960032   523.5350035   5143.17320581]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[26887.5081544   5311.37719907 -4584.50430574  6821.52388353]
------
Step:13, Action:North
State  216
Old Q Values:  [ 346.49174721 7913.69472607  606.149024   6577.68775231]
New Q values:  [2739.22904008 7913.69472607  606.149024   6577.68775231]
Reward: 9  Episode Reward:  47
xxxxx
xg ax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 8650.77447064  660.86649319  396.64640627]
------
Step:14, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  2.37615479e+04 -3.22965309e-01  1.20323665e+03]
New Q values:  [ 1.06807480e+02  1.18781276e+04 -3.22965309e-01  1.20323665e+03]
Reward: -1  Episode Reward:  46
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2739.22904008 7913.69472607  606.149024   6577.68775231]
------
Step:15, Action:South
State  210
Old Q Values:  [42427.92923984  2958.25717967   790.72804752  3965.05876272]
New Q values:  [42427.92923984  5608.01966491   790.72804752  3965.05876272]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x   x
xg ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[14751.05597682 -4059.26960032   523.5350035   5143.17320581]
------
Step:16, Action:North
State  288
Old Q Values:  [14751.05597682 -4059.26960032   523.5350035   5143.17320581]
New Q values:  [ 8273.93080855 -4059.26960032   523.5350035   5143.17320581]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2739.22904008 7913.69472607  606.149024   6577.68775231]
------
Step:17, Action:South
State  216
Old Q Values:  [2739.22904008 7913.69472607  606.149024   6577.68775231]
New Q values:  [2739.22904008 5647.05713299  606.149024   6577.68775231]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8273.93080855 -4059.26960032   523.5350035   5143.17320581]
------
Step:18, Action:North
State  288
Old Q Values:  [ 8273.93080855 -4059.26960032   523.5350035   5143.17320581]
New Q values:  [ 5282.27864911 -4059.26960032   523.5350035   5143.17320581]
Reward: -1  Episode Reward:  42
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2739.22904008 5647.05713299  606.149024   6577.68775231]
------
Step:19, Action:West
State  216
Old Q Values:  [2739.22904008 5647.05713299  606.149024   6577.68775231]
New Q values:  [2739.22904008 5647.05713299  606.149024   1026.86689092]
Reward: -10001  Episode Reward:  -9959
xxxxx
x   x
x g x
x.  x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1474.39632616 1352.37702619 2806.83109482  -12.17474163]
------
Step:1, Action:East
State  261
Old Q Values:  [1474.39632616 1352.37702619 2806.83109482  -12.17474163]
New Q values:  [ 1474.39632616  1352.37702619 10208.04490423   -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7699.11541878 -8521.23367799 30266.37488769 14404.92423498]
------
Step:2, Action:East
State  272
Old Q Values:  [ 7699.11541878 -8521.23367799 30266.37488769 14404.92423498]
New Q values:  [ 7699.11541878 -8521.23367799 13696.63354981 14404.92423498]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x.g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5282.27864911 -4059.26960032   523.5350035   5143.17320581]
------
Step:3, Action:North
State  288
Old Q Values:  [ 5282.27864911 -4059.26960032   523.5350035   5143.17320581]
New Q values:  [ 4184.56390596 -4059.26960032   523.5350035   5143.17320581]
Reward: -9991  Episode Reward:  -9973
xxxxx
x.. x
x..gx
x   x
xxxxx
xxxxx
x...x
x...x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[ 1474.39632616  1352.37702619 10208.04490423   -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [ 1474.39632616  1352.37702619 10208.04490423   -12.17474163]
New Q values:  [ 7327.63296879  1352.37702619 10208.04490423   -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x...x
xa..x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2049.1969658   1771.27760536 22441.58146108  1554.80203889]
------
Step:2, Action:East
State  183
Old Q Values:  [ 2049.1969658   1771.27760536 22441.58146108  1554.80203889]
New Q values:  [ 2049.1969658   1771.27760536 13143.50066026  1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ 4083.29075005  7581.58548952 13871.56025276  2179.39995143]
------
Step:3, Action:East
State  195
Old Q Values:  [60749.00198889 16053.62807234  8240.17937465  1169.39963074]
New Q values:  [60749.00198889 16053.62807234 16029.85052182  1169.39963074]
Reward: 9  Episode Reward:  27
xxxxx
x...x
x  ax
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[42427.92923984  5608.01966491   790.72804752  3965.05876272]
------
Step:4, Action:North
State  210
Old Q Values:  [42427.92923984  5608.01966491   790.72804752  3965.05876272]
New Q values:  [27958.93114926  5608.01966491   790.72804752  3965.05876272]
Reward: 9  Episode Reward:  36
xxxxx
x..ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[28683.54284864  7729.95359527  -180.00807518 36607.86484439]
------
Step:5, Action:West
State  130
Old Q Values:  [28683.54284864  7729.95359527  -180.00807518 36607.86484439]
New Q values:  [28683.54284864  7729.95359527  -180.00807518 46278.33381956]
Reward: 9  Episode Reward:  45
xxxxx
x.a x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 105432.62627268]
------
Step:6, Action:West
State  126
Old Q Values:  [   0.          331.64678262  681.20236034 2713.37781992]
New Q values:  [   0.          331.64678262  681.20236034 3681.33493291]
Reward: 9  Episode Reward:  54
xxxxx
xa  x
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 8635.27934981  476.21588959 -120.29354603]
------
Step:7, Action:South
State  110
Old Q Values:  [-239.29051573  463.57363778  472.79132618 -180.6       ]
New Q values:  [-239.29051573 1339.48548179  472.79132618 -180.6       ]
Reward: -1  Episode Reward:  53
xxxxx
x   x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  3.84885342e+03  0.00000000e+00]
------
Step:8, Action:East
State  188
Old Q Values:  [-6523.78898263  1569.58786991  2726.31809084     0.        ]
New Q values:  [-6523.78898263  1569.58786991  2513.34382283     0.        ]
Reward: -1  Episode Reward:  52
xxxxx
x   x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NW
[   0.         4557.66188112 4744.72195498  441.58769553]
------
Step:9, Action:East
State  204
Old Q Values:  [   0.         4557.66188112 4744.72195498  441.58769553]
New Q values:  [   0.         4557.66188112 3591.40592189  441.58769553]
Reward: -1  Episode Reward:  51
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2739.22904008 5647.05713299  606.149024   1026.86689092]
------
Step:10, Action:South
State  208
Old Q Values:  [26887.5081544   5311.37719907 -4584.50430574  6821.52388353]
New Q values:  [26887.5081544  63672.90284137 -4584.50430574  6821.52388353]
Reward: 100009  Episode Reward:  100060
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
xg..x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-7643.81886164 -5704.51612281  3250.63239792 -5679.36893145]
------
Step:1, Action:East
State  261
Old Q Values:  [ 7327.63296879  1352.37702619 10208.04490423   -12.17474163]
New Q values:  [7327.63296879 1352.37702619 5381.59975689  -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  4309.93931733  1854.7028197 ]
------
Step:2, Action:East
State  272
Old Q Values:  [ 7699.11541878 -8521.23367799 13696.63354981 14404.92423498]
New Q values:  [ 7699.11541878 -8521.23367799  7027.00538167 14404.92423498]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.g.x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4184.56390596 -4059.26960032   523.5350035   5143.17320581]
------
Step:3, Action:West
State  288
Old Q Values:  [ 4184.56390596 -4059.26960032   523.5350035   5143.17320581]
New Q values:  [ 4184.56390596 -4059.26960032   523.5350035    378.14655282]
Reward: -10001  Episode Reward:  -9983
xxxxx
x ..x
x...x
x g x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[60749.00198889 16053.62807234 16029.85052182  1169.39963074]
------
Step:1, Action:North
State  193
Old Q Values:  [105564.11374507  23958.24019337   9388.55586144    767.35890262]
New Q values:  [42857.00593163 23958.24019337  9388.55586144   767.35890262]
Reward: 9  Episode Reward:  9
xxxxx
x.a.x
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  2086.53477866  2024.51848008]
------
Step:2, Action:East
State  121
Old Q Values:  [    0.             0.         -8338.81083201   946.27032172]
New Q values:  [    0.             0.         -6734.89199161   946.27032172]
Reward: -9991  Episode Reward:  -9982
xxxxx
x. gx
x. .x
x.. x
xxxxx
xxxxx
x...x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[27958.93114926  5608.01966491   790.72804752  3965.05876272]
------
Step:1, Action:North
State  210
Old Q Values:  [27958.93114926  5608.01966491   790.72804752  3965.05876272]
New Q values:  [14752.41073072  5608.01966491   790.72804752  3965.05876272]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
x.. x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  1.18781276e+04 -3.22965309e-01  1.20323665e+03]
------
Step:2, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  1.18781276e+04 -3.22965309e-01  1.20323665e+03]
New Q values:  [ 1.06807480e+02  9.17637425e+03 -3.22965309e-01  1.20323665e+03]
Reward: -1  Episode Reward:  8
xxxxx
x.. x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[14752.41073072  5608.01966491   790.72804752  3965.05876272]
------
Step:3, Action:North
State  208
Old Q Values:  [26887.5081544  63672.90284137 -4584.50430574  6821.52388353]
New Q values:  [13507.31553593 63672.90284137 -4584.50430574  6821.52388353]
Reward: -1  Episode Reward:  7
xxxxx
x..ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  9.17637425e+03 -3.22965309e-01  1.20323665e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  9.17637425e+03 -3.22965309e-01  1.20323665e+03]
New Q values:  [ 1.06807480e+02  2.27718206e+04 -3.22965309e-01  1.20323665e+03]
Reward: -1  Episode Reward:  6
xxxxx
x.. x
xg.ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13507.31553593 63672.90284137 -4584.50430574  6821.52388353]
------
Step:5, Action:South
State  208
Old Q Values:  [13507.31553593 63672.90284137 -4584.50430574  6821.52388353]
New Q values:  [13507.31553593 26729.93030834 -4584.50430574  6821.52388353]
Reward: 9  Episode Reward:  15
xxxxx
xg. x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4184.56390596 -4059.26960032   523.5350035    378.14655282]
------
Step:6, Action:North
State  288
Old Q Values:  [ 4184.56390596 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 9692.20465489 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  14
xxxxx
x.g x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13507.31553593 26729.93030834 -4584.50430574  6821.52388353]
------
Step:7, Action:South
State  208
Old Q Values:  [13507.31553593 26729.93030834 -4584.50430574  6821.52388353]
New Q values:  [13507.31553593 13599.0335198  -4584.50430574  6821.52388353]
Reward: -1  Episode Reward:  13
xxxxx
x..gx
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9692.20465489 -4059.26960032   523.5350035    378.14655282]
------
Step:8, Action:North
State  288
Old Q Values:  [ 9692.20465489 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 7955.9919179  -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  12
xxxxx
x.g x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13507.31553593 13599.0335198  -4584.50430574  6821.52388353]
------
Step:9, Action:South
State  208
Old Q Values:  [13507.31553593 13599.0335198  -4584.50430574  6821.52388353]
New Q values:  [13507.31553593  7825.81098329 -4584.50430574  6821.52388353]
Reward: -1  Episode Reward:  11
xxxxx
x..gx
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7955.9919179  -4059.26960032   523.5350035    378.14655282]
------
Step:10, Action:North
State  288
Old Q Values:  [ 7955.9919179  -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 7233.99142794 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  10
xxxxx
x..gx
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13507.31553593  7825.81098329 -4584.50430574  6821.52388353]
------
Step:11, Action:South
State  208
Old Q Values:  [13507.31553593  7825.81098329 -4584.50430574  6821.52388353]
New Q values:  [13507.31553593  5299.9218217  -4584.50430574  6821.52388353]
Reward: -1  Episode Reward:  9
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7233.99142794 -4059.26960032   523.5350035    378.14655282]
------
Step:12, Action:North
State  288
Old Q Values:  [ 7233.99142794 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 6945.19123195 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  8
xxxxx
x..gx
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13507.31553593  5299.9218217  -4584.50430574  6821.52388353]
------
Step:13, Action:West
State  208
Old Q Values:  [13507.31553593  5299.9218217  -4584.50430574  6821.52388353]
New Q values:  [13507.31553593  5299.9218217  -4584.50430574 15591.1113329 ]
Reward: 9  Episode Reward:  17
xxxxx
x.. x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[42857.00593163 23958.24019337  9388.55586144   767.35890262]
------
Step:14, Action:North
State  195
Old Q Values:  [60749.00198889 16053.62807234 16029.85052182  1169.39963074]
New Q values:  [24930.96122915 16053.62807234 16029.85052182  1169.39963074]
Reward: 9  Episode Reward:  26
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  2086.53477866  2024.51848008]
------
Step:15, Action:East
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  2086.53477866  2024.51848008]
New Q values:  [  -56.91790269 -1902.20915811  7665.56007685  2024.51848008]
Reward: -1  Episode Reward:  25
xxxxx
x. ax
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.27718206e+04 -3.22965309e-01  1.20323665e+03]
------
Step:16, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  2.27718206e+04 -3.22965309e-01  1.20323665e+03]
New Q values:  [ 1.06807480e+02  2.27718206e+04 -3.22965309e-01  2.78036268e+03]
Reward: -1  Episode Reward:  24
xxxxx
x.a x
x.  x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  7665.56007685  2024.51848008]
------
Step:17, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548   419.00931273  2976.39616521]
New Q values:  [ -281.736      -1150.91067548  6998.54989048  2976.39616521]
Reward: -1  Episode Reward:  23
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.27718206e+04 -3.22965309e-01  2.78036268e+03]
------
Step:18, Action:South
State  130
Old Q Values:  [28683.54284864  7729.95359527  -180.00807518 46278.33381956]
New Q values:  [28683.54284864  7517.10465732  -180.00807518 46278.33381956]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[14752.41073072  5608.01966491   790.72804752  3965.05876272]
------
Step:19, Action:North
State  210
Old Q Values:  [14752.41073072  5608.01966491   790.72804752  3965.05876272]
New Q values:  [12731.91045768  5608.01966491   790.72804752  3965.05876272]
Reward: -1  Episode Reward:  21
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.27718206e+04 -3.22965309e-01  2.78036268e+03]
------
Step:20, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  2.27718206e+04 -3.22965309e-01  2.78036268e+03]
New Q values:  [ 1.06807480e+02  1.37854616e+04 -3.22965309e-01  2.78036268e+03]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[13507.31553593  5299.9218217  -4584.50430574 15591.1113329 ]
------
Step:21, Action:North
State  208
Old Q Values:  [13507.31553593  5299.9218217  -4584.50430574 15591.1113329 ]
New Q values:  [ 7997.55855556  5299.9218217  -4584.50430574 15591.1113329 ]
Reward: -1  Episode Reward:  19
xxxxx
x.gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 8650.77447064  660.86649319  396.64640627]
------
Step:22, Action:South
State  136
Old Q Values:  [ 878.22269011 8650.77447064  660.86649319  396.64640627]
New Q values:  [ 878.22269011 8137.04318812  660.86649319  396.64640627]
Reward: -1  Episode Reward:  18
xxxxx
x. gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7997.55855556  5299.9218217  -4584.50430574 15591.1113329 ]
------
Step:23, Action:West
State  208
Old Q Values:  [ 7997.55855556  5299.9218217  -4584.50430574 15591.1113329 ]
New Q values:  [ 7997.55855556  5299.9218217  -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  17
xxxxx
x.g x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[ 3756.98497708 28082.71168167 14412.78382458  2453.92999194]
------
Step:24, Action:South
State  192
Old Q Values:  [ 3756.98497708 28082.71168167 14412.78382458  2453.92999194]
New Q values:  [ 3756.98497708 15553.96194316 14412.78382458  2453.92999194]
Reward: -1  Episode Reward:  16
xxxxx
xg  x
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7699.11541878 -8521.23367799  7027.00538167 14404.92423498]
------
Step:25, Action:West
State  272
Old Q Values:  [ 7699.11541878 -8521.23367799  7027.00538167 14404.92423498]
New Q values:  [ 7699.11541878 -8521.23367799  7027.00538167 18783.3266794 ]
Reward: 9  Episode Reward:  25
xxxxx
x.g x
x.  x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[43386.52328468 15941.62716192 13169.98702937  1875.31501677]
------
Step:26, Action:North
State  257
Old Q Values:  [43386.52328468 15941.62716192 13169.98702937  1875.31501677]
New Q values:  [52271.30853756 15941.62716192 13169.98702937  1875.31501677]
Reward: 9  Episode Reward:  34
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[ 60476.05138135  21430.9929039  116370.99741228      0.        ]
------
Step:27, Action:North
State  176
Old Q Values:  [95513.49952646  1621.55095326 62458.56066226     0.        ]
New Q values:  [116801.00123142   1621.55095326  62458.56066226      0.        ]
Reward: 100009  Episode Reward:  100043
xxxxx
xa  x
xg  x
x   x
xxxxx
xxxxx
x..gx
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[7327.63296879 1352.37702619 5381.59975689  -12.17474163]
------
Step:1, Action:North
State  261
Old Q Values:  [7327.63296879 1352.37702619 5381.59975689  -12.17474163]
New Q values:  [3900.1708019  1352.37702619 5381.59975689  -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[3212.39204794 1236.86188545 1761.98824422    3.33862213]
------
Step:2, Action:North
State  181
Old Q Values:  [3212.39204794 1236.86188545 1761.98824422    3.33862213]
New Q values:  [3.88094062e+03 1.23686189e+03 1.76198824e+03 3.33862213e+00]
Reward: 9  Episode Reward:  18
xxxxx
xa. x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 8635.27934981  476.21588959 -120.29354603]
------
Step:3, Action:South
State  111
Old Q Values:  [-177.44732869 8635.27934981  476.21588959 -120.29354603]
New Q values:  [-177.44732869 4617.79392716  476.21588959 -120.29354603]
Reward: -1  Episode Reward:  17
xxxxx
x . x
xa.gx
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[3.88094062e+03 1.23686189e+03 1.76198824e+03 3.33862213e+00]
------
Step:4, Action:North
State  181
Old Q Values:  [3.88094062e+03 1.23686189e+03 1.76198824e+03 3.33862213e+00]
New Q values:  [2801.49380249 1236.86188545 1761.98824422    3.33862213]
Reward: -1  Episode Reward:  16
xxxxx
xa.gx
x ..x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  4165.72517614 -2165.66138672   232.50800947]
------
Step:5, Action:South
State  109
Old Q Values:  [ -241.10880094  4165.72517614 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  2506.1382112  -2165.66138672   232.50800947]
Reward: -1  Episode Reward:  15
xxxxx
x g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2801.49380249 1236.86188545 1761.98824422    3.33862213]
------
Step:6, Action:North
State  181
Old Q Values:  [2801.49380249 1236.86188545 1761.98824422    3.33862213]
New Q values:  [2505.33569914 1236.86188545 1761.98824422    3.33862213]
Reward: -1  Episode Reward:  14
xxxxx
xa. x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4617.79392716  476.21588959 -120.29354603]
------
Step:7, Action:South
State  109
Old Q Values:  [ -241.10880094  2506.1382112  -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  1753.45599423 -2165.66138672   232.50800947]
Reward: -1  Episode Reward:  13
xxxxx
x g x
xa..x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2505.33569914 1236.86188545 1761.98824422    3.33862213]
------
Step:8, Action:North
State  181
Old Q Values:  [2505.33569914 1236.86188545 1761.98824422    3.33862213]
New Q values:  [2386.87245781 1236.86188545 1761.98824422    3.33862213]
Reward: -1  Episode Reward:  12
xxxxx
xa. x
x g.x
x ..x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4617.79392716  476.21588959 -120.29354603]
------
Step:9, Action:South
State  111
Old Q Values:  [-177.44732869 4617.79392716  476.21588959 -120.29354603]
New Q values:  [-177.44732869 5789.56776894  476.21588959 -120.29354603]
Reward: -1  Episode Reward:  11
xxxxx
x . x
xa..x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2049.1969658   1771.27760536 13143.50066026  1554.80203889]
------
Step:10, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243  4561.97781678     0.        ]
New Q values:  [    0.         -5536.05678243  5991.65920254     0.        ]
Reward: 9  Episode Reward:  20
xxxxx
x . x
x a.x
xg..x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ 4083.29075005  7581.58548952 13871.56025276  2179.39995143]
------
Step:11, Action:East
State  192
Old Q Values:  [ 3756.98497708 15553.96194316 14412.78382458  2453.92999194]
New Q values:  [ 3756.98497708 15553.96194316  7464.63066973  2453.92999194]
Reward: 9  Episode Reward:  29
xxxxx
x . x
xg ax
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2739.22904008 5647.05713299  606.149024   1026.86689092]
------
Step:12, Action:South
State  208
Old Q Values:  [ 7997.55855556  5299.9218217  -4584.50430574 14660.65803766]
New Q values:  [ 7997.55855556  4208.92609827 -4584.50430574 14660.65803766]
Reward: 9  Episode Reward:  38
xxxxx
x . x
x g x
x .ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6945.19123195 -4059.26960032   523.5350035    378.14655282]
------
Step:13, Action:North
State  288
Old Q Values:  [ 6945.19123195 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 1175.67390408 -4059.26960032   523.5350035    378.14655282]
Reward: -10001  Episode Reward:  -9963
xxxxx
x . x
x  gx
x . x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ 4083.29075005  7581.58548952 13871.56025276  2179.39995143]
------
Step:1, Action:East
State  192
Old Q Values:  [ 3756.98497708 15553.96194316  7464.63066973  2453.92999194]
New Q values:  [ 3756.98497708 15553.96194316  7389.44967919  2453.92999194]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x.gax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 7997.55855556  4208.92609827 -4584.50430574 14660.65803766]
------
Step:2, Action:North
State  208
Old Q Values:  [ 7997.55855556  4208.92609827 -4584.50430574 14660.65803766]
New Q values:  [17087.92356809  4208.92609827 -4584.50430574 14660.65803766]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[28683.54284864  7517.10465732  -180.00807518 46278.33381956]
------
Step:3, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  1.37854616e+04 -3.22965309e-01  2.78036268e+03]
New Q values:  [ 1.06807480e+02  1.37854616e+04 -3.22965309e-01  3.27473330e+04]
Reward: 9  Episode Reward:  27
xxxxx
x.a x
x.  x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 105432.62627268]
------
Step:4, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  6998.54989048  2976.39616521]
New Q values:  [ -281.736      -1150.91067548  6998.54989048  3000.71856128]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 6015.866984    223.04911789 -252.78192178]
------
Step:5, Action:South
State  110
Old Q Values:  [-239.29051573 1339.48548179  472.79132618 -180.6       ]
New Q values:  [-239.29051573 1695.85021939  472.79132618 -180.6       ]
Reward: 9  Episode Reward:  45
xxxxx
x   x
xa  x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:SW
[ 1.04129094e+00 -7.77507115e+03  3.84885342e+03  0.00000000e+00]
------
Step:6, Action:East
State  184
Old Q Values:  [  577.4048542      0.         10675.04795287     0.        ]
New Q values:  [ 577.4048542     0.         8665.81097114    0.        ]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xga x
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  14654.63929998  7098.42831386   610.93635926]
------
Step:7, Action:South
State  206
Old Q Values:  [   0.         2245.51354002 1644.59524509    0.        ]
New Q values:  [   0.         2190.58721121 1644.59524509    0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x   x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  4309.93931733  1854.7028197 ]
------
Step:8, Action:East
State  272
Old Q Values:  [ 7699.11541878 -8521.23367799  7027.00538167 18783.3266794 ]
New Q values:  [ 7699.11541878 -8521.23367799  3168.90432389 18783.3266794 ]
Reward: 9  Episode Reward:  52
xxxxx
x   x
x   x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1175.67390408 -4059.26960032   523.5350035    378.14655282]
------
Step:9, Action:North
State  288
Old Q Values:  [ 1175.67390408 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 4289.24269894 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  51
xxxxx
x   x
x  ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[12731.91045768  5608.01966491   790.72804752  3965.05876272]
------
Step:10, Action:North
State  216
Old Q Values:  [2739.22904008 5647.05713299  606.149024   1026.86689092]
New Q values:  [10919.29150237  5647.05713299   606.149024    1026.86689092]
Reward: -1  Episode Reward:  50
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  1.37854616e+04 -3.22965309e-01  3.27473330e+04]
------
Step:11, Action:West
State  136
Old Q Values:  [ 878.22269011 8137.04318812  660.86649319  396.64640627]
New Q values:  [ 878.22269011 8137.04318812  660.86649319 1461.59471572]
Reward: -1  Episode Reward:  49
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  4345.12051072 -4318.40133077]
------
Step:12, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4345.12051072 -4318.40133077]
New Q values:  [-9594.56523706 -8069.05606225  4178.56116072 -4318.40133077]
Reward: -1  Episode Reward:  48
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 8137.04318812  660.86649319 1461.59471572]
------
Step:13, Action:South
State  136
Old Q Values:  [ 878.22269011 8137.04318812  660.86649319 1461.59471572]
New Q values:  [ 878.22269011 6530.00472596  660.86649319 1461.59471572]
Reward: -1  Episode Reward:  47
xxxxx
x  gx
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[10919.29150237  5647.05713299   606.149024    1026.86689092]
------
Step:14, Action:South
State  216
Old Q Values:  [10919.29150237  5647.05713299   606.149024    1026.86689092]
New Q values:  [10919.29150237  3544.99566288   606.149024    1026.86689092]
Reward: -1  Episode Reward:  46
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4289.24269894 -4059.26960032   523.5350035    378.14655282]
------
Step:15, Action:North
State  288
Old Q Values:  [ 4289.24269894 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 4990.88453029 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  45
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[10919.29150237  3544.99566288   606.149024    1026.86689092]
------
Step:16, Action:North
State  216
Old Q Values:  [10919.29150237  3544.99566288   606.149024    1026.86689092]
New Q values:  [6326.11801874 3544.99566288  606.149024   1026.86689092]
Reward: -1  Episode Reward:  44
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 6530.00472596  660.86649319 1461.59471572]
------
Step:17, Action:South
State  136
Old Q Values:  [ 878.22269011 6530.00472596  660.86649319 1461.59471572]
New Q values:  [ 878.22269011 4509.237296    660.86649319 1461.59471572]
Reward: -1  Episode Reward:  43
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[6326.11801874 3544.99566288  606.149024   1026.86689092]
------
Step:18, Action:North
State  216
Old Q Values:  [6326.11801874 3544.99566288  606.149024   1026.86689092]
New Q values:  [12354.04709383  3544.99566288   606.149024    1026.86689092]
Reward: -1  Episode Reward:  42
xxxxx
x  ax
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  1.37854616e+04 -3.22965309e-01  3.27473330e+04]
------
Step:19, Action:West
State  136
Old Q Values:  [ 878.22269011 4509.237296    660.86649319 1461.59471572]
New Q values:  [ 878.22269011 4509.237296    660.86649319 1837.60623451]
Reward: -1  Episode Reward:  41
xxxxx
xga x
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225  4178.56116072 -4318.40133077]
------
Step:20, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  4178.56116072 -4318.40133077]
New Q values:  [-9594.56523706 -8069.05606225  3023.59565309 -4318.40133077]
Reward: -1  Episode Reward:  40
xxxxx
x gax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 4509.237296    660.86649319 1837.60623451]
------
Step:21, Action:South
State  136
Old Q Values:  [ 878.22269011 4509.237296    660.86649319 1837.60623451]
New Q values:  [ 878.22269011 5509.30904655  660.86649319 1837.60623451]
Reward: -1  Episode Reward:  39
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[12354.04709383  3544.99566288   606.149024    1026.86689092]
------
Step:22, Action:North
State  216
Old Q Values:  [12354.04709383  3544.99566288   606.149024    1026.86689092]
New Q values:  [6593.8115515  3544.99566288  606.149024   1026.86689092]
Reward: -1  Episode Reward:  38
xxxxx
xg ax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 5509.30904655  660.86649319 1837.60623451]
------
Step:23, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  1.37854616e+04 -3.22965309e-01  3.27473330e+04]
New Q values:  [ 1.06807480e+02  7.49172811e+03 -3.22965309e-01  3.27473330e+04]
Reward: -1  Episode Reward:  37
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[6593.8115515  3544.99566288  606.149024   1026.86689092]
------
Step:24, Action:North
State  216
Old Q Values:  [6593.8115515  3544.99566288  606.149024   1026.86689092]
New Q values:  [12461.12450694  3544.99566288   606.149024    1026.86689092]
Reward: -1  Episode Reward:  36
xxxxx
x  ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  7.49172811e+03 -3.22965309e-01  3.27473330e+04]
------
Step:25, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  7.49172811e+03 -3.22965309e-01  3.27473330e+04]
New Q values:  [ 1.06807480e+02  7.49172811e+03 -3.22965309e-01  1.51978981e+04]
Reward: -1  Episode Reward:  35
xxxxx
x a x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  6998.54989048  3000.71856128]
------
Step:26, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  3023.59565309 -4318.40133077]
New Q values:  [-9594.56523706 -8069.05606225  2861.6309752  -4318.40133077]
Reward: -1  Episode Reward:  34
xxxxx
xg ax
x   x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 5509.30904655  660.86649319 1837.60623451]
------
Step:27, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  7.49172811e+03 -3.22965309e-01  1.51978981e+04]
New Q values:  [ 1.06807480e+02  6.73442860e+03 -3.22965309e-01  1.51978981e+04]
Reward: -1  Episode Reward:  33
xxxxx
x   x
xg ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[12461.12450694  3544.99566288   606.149024    1026.86689092]
------
Step:28, Action:North
State  216
Old Q Values:  [12461.12450694  3544.99566288   606.149024    1026.86689092]
New Q values:  [9543.21924745 3544.99566288  606.149024   1026.86689092]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.73442860e+03 -3.22965309e-01  1.51978981e+04]
------
Step:29, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  6.73442860e+03 -3.22965309e-01  1.51978981e+04]
New Q values:  [ 1.06807480e+02  6.73442860e+03 -3.22965309e-01  8.37822728e+03]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  7665.56007685  2024.51848008]
------
Step:30, Action:East
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  7665.56007685  2024.51848008]
New Q values:  [  -56.91790269 -1902.20915811  5579.09221553  2024.51848008]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.73442860e+03 -3.22965309e-01  8.37822728e+03]
------
Step:31, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  6.73442860e+03 -3.22965309e-01  8.37822728e+03]
New Q values:  [ 1.06807480e+02  6.73442860e+03 -3.22965309e-01  5.02441858e+03]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x  gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  5579.09221553  2024.51848008]
------
Step:32, Action:East
State  121
Old Q Values:  [    0.             0.         -6734.89199161   946.27032172]
New Q values:  [    0.             0.         -7041.76408268   946.27032172]
Reward: -10001  Episode Reward:  -9972
xxxxx
x  gx
x   x
x.  x
xxxxx
xxxxx
x...x
x.a.x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[24930.96122915 16053.62807234 16029.85052182  1169.39963074]
------
Step:1, Action:North
State  194
Old Q Values:  [ 4083.29075005  7581.58548952 13871.56025276  2179.39995143]
New Q values:  [ 3738.28126717  7581.58548952 13871.56025276  2179.39995143]
Reward: 9  Episode Reward:  9
xxxxx
x.a.x
x. .x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  6998.54989048  3000.71856128]
------
Step:2, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  6998.54989048  3000.71856128]
New Q values:  [ -281.736      -1150.91067548  4825.14853545  3000.71856128]
Reward: 9  Episode Reward:  18
xxxxx
x. ax
x. .x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.73442860e+03 -3.22965309e-01  5.02441858e+03]
------
Step:3, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  6.73442860e+03 -3.22965309e-01  5.02441858e+03]
New Q values:  [ 1.06807480e+02  5.56213721e+03 -3.22965309e-01  5.02441858e+03]
Reward: 9  Episode Reward:  27
xxxxx
x.  x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[9543.21924745 3544.99566288  606.149024   1026.86689092]
------
Step:4, Action:North
State  208
Old Q Values:  [17087.92356809  4208.92609827 -4584.50430574 14660.65803766]
New Q values:  [ 8503.21059121  4208.92609827 -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  26
xxxxx
x. ax
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  5.56213721e+03 -3.22965309e-01  5.02441858e+03]
------
Step:5, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  5.56213721e+03 -3.22965309e-01  5.02441858e+03]
New Q values:  [ 1.06807480e+02  5.08722066e+03 -3.22965309e-01  5.02441858e+03]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
xg ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[9543.21924745 3544.99566288  606.149024   1026.86689092]
------
Step:6, Action:North
State  216
Old Q Values:  [9543.21924745 3544.99566288  606.149024   1026.86689092]
New Q values:  [5469.48041295 3544.99566288  606.149024   1026.86689092]
Reward: -1  Episode Reward:  24
xxxxx
xg ax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 5509.30904655  660.86649319 1837.60623451]
------
Step:7, Action:South
State  136
Old Q Values:  [ 878.22269011 5509.30904655  660.86649319 1837.60623451]
New Q values:  [ 878.22269011 3843.9677425   660.86649319 1837.60623451]
Reward: -1  Episode Reward:  23
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[5469.48041295 3544.99566288  606.149024   1026.86689092]
------
Step:8, Action:North
State  208
Old Q Values:  [ 8503.21059121  4208.92609827 -4584.50430574 14660.65803766]
New Q values:  [ 4553.87455924  4208.92609827 -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  22
xxxxx
x.gax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 3843.9677425   660.86649319 1837.60623451]
------
Step:9, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  5.08722066e+03 -3.22965309e-01  5.02441858e+03]
New Q values:  [ 1.06807480e+02  6.43248568e+03 -3.22965309e-01  5.02441858e+03]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4553.87455924  4208.92609827 -4584.50430574 14660.65803766]
------
Step:10, Action:North
State  208
Old Q Values:  [ 4553.87455924  4208.92609827 -4584.50430574 14660.65803766]
New Q values:  [ 3750.69552623  4208.92609827 -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  20
xxxxx
x. ax
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.43248568e+03 -3.22965309e-01  5.02441858e+03]
------
Step:11, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  6.43248568e+03 -3.22965309e-01  5.02441858e+03]
New Q values:  [ 1.06807480e+02  6.43248568e+03 -3.22965309e-01  3.68289510e+03]
Reward: -1  Episode Reward:  19
xxxxx
x.a x
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  5579.09221553  2024.51848008]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  4825.14853545  3000.71856128]
New Q values:  [ -281.736      -1150.91067548  3859.20511671  3000.71856128]
Reward: -1  Episode Reward:  18
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.43248568e+03 -3.22965309e-01  3.68289510e+03]
------
Step:13, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  6.43248568e+03 -3.22965309e-01  3.68289510e+03]
New Q values:  [ 1.06807480e+02  6.39196741e+03 -3.22965309e-01  3.68289510e+03]
Reward: -1  Episode Reward:  17
xxxxx
x.  x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[12731.91045768  5608.01966491   790.72804752  3965.05876272]
------
Step:14, Action:North
State  216
Old Q Values:  [5469.48041295 3544.99566288  606.149024   1026.86689092]
New Q values:  [4104.78238738 3544.99566288  606.149024   1026.86689092]
Reward: -1  Episode Reward:  16
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.39196741e+03 -3.22965309e-01  3.68289510e+03]
------
Step:15, Action:South
State  136
Old Q Values:  [ 878.22269011 3843.9677425   660.86649319 1837.60623451]
New Q values:  [ 878.22269011 2768.42181322  660.86649319 1837.60623451]
Reward: -1  Episode Reward:  15
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[4104.78238738 3544.99566288  606.149024   1026.86689092]
------
Step:16, Action:North
State  216
Old Q Values:  [4104.78238738 3544.99566288  606.149024   1026.86689092]
New Q values:  [3558.90317716 3544.99566288  606.149024   1026.86689092]
Reward: -1  Episode Reward:  14
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.39196741e+03 -3.22965309e-01  3.68289510e+03]
------
Step:17, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  6.39196741e+03 -3.22965309e-01  3.68289510e+03]
New Q values:  [ 1.06807480e+02  6.95438437e+03 -3.22965309e-01  3.68289510e+03]
Reward: -1  Episode Reward:  13
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 3750.69552623  4208.92609827 -4584.50430574 14660.65803766]
------
Step:18, Action:South
State  216
Old Q Values:  [3558.90317716 3544.99566288  606.149024   1026.86689092]
New Q values:  [3558.90317716 2914.66362424  606.149024   1026.86689092]
Reward: -1  Episode Reward:  12
xxxxx
x.  x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4990.88453029 -4059.26960032   523.5350035    378.14655282]
------
Step:19, Action:North
State  288
Old Q Values:  [ 4990.88453029 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 3063.42476526 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  11
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[3558.90317716 2914.66362424  606.149024   1026.86689092]
------
Step:20, Action:North
State  208
Old Q Values:  [ 3750.69552623  4208.92609827 -4584.50430574 14660.65803766]
New Q values:  [ 2330.20475446  4208.92609827 -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  10
xxxxx
x.gax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 2768.42181322  660.86649319 1837.60623451]
------
Step:21, Action:South
State  136
Old Q Values:  [ 878.22269011 2768.42181322  660.86649319 1837.60623451]
New Q values:  [ 878.22269011 2174.43967843  660.86649319 1837.60623451]
Reward: -1  Episode Reward:  9
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[3558.90317716 2914.66362424  606.149024   1026.86689092]
------
Step:22, Action:North
State  216
Old Q Values:  [3558.90317716 2914.66362424  606.149024   1026.86689092]
New Q values:  [3509.27658313 2914.66362424  606.149024   1026.86689092]
Reward: -1  Episode Reward:  8
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.95438437e+03 -3.22965309e-01  3.68289510e+03]
------
Step:23, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  6.95438437e+03 -3.22965309e-01  3.68289510e+03]
New Q values:  [ 1.06807480e+02  6.60072689e+03 -3.22965309e-01  3.68289510e+03]
Reward: -1  Episode Reward:  7
xxxxx
x.  x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[12731.91045768  5608.01966491   790.72804752  3965.05876272]
------
Step:24, Action:North
State  216
Old Q Values:  [3509.27658313 2914.66362424  606.149024   1026.86689092]
New Q values:  [3383.32869935 2914.66362424  606.149024   1026.86689092]
Reward: -1  Episode Reward:  6
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.60072689e+03 -3.22965309e-01  3.68289510e+03]
------
Step:25, Action:South
State  136
Old Q Values:  [ 878.22269011 2174.43967843  660.86649319 1837.60623451]
New Q values:  [ 878.22269011 1884.17448118  660.86649319 1837.60623451]
Reward: -1  Episode Reward:  5
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[3383.32869935 2914.66362424  606.149024   1026.86689092]
------
Step:26, Action:North
State  208
Old Q Values:  [ 2330.20475446  4208.92609827 -4584.50430574 14660.65803766]
New Q values:  [ 1496.73424614  4208.92609827 -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  4
xxxxx
x.gax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 1884.17448118  660.86649319 1837.60623451]
------
Step:27, Action:South
State  136
Old Q Values:  [ 878.22269011 1884.17448118  660.86649319 1837.60623451]
New Q values:  [ 878.22269011 1768.06840228  660.86649319 1837.60623451]
Reward: -1  Episode Reward:  3
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[3383.32869935 2914.66362424  606.149024   1026.86689092]
------
Step:28, Action:North
State  208
Old Q Values:  [ 1496.73424614  4208.92609827 -4584.50430574 14660.65803766]
New Q values:  [ 1149.37556881  4208.92609827 -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  2
xxxxx
x.gax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 1768.06840228  660.86649319 1837.60623451]
------
Step:29, Action:South
State  136
Old Q Values:  [ 878.22269011 1768.06840228  660.86649319 1837.60623451]
New Q values:  [ 878.22269011 1721.62597072  660.86649319 1837.60623451]
Reward: -1  Episode Reward:  1
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[3383.32869935 2914.66362424  606.149024   1026.86689092]
------
Step:30, Action:North
State  208
Old Q Values:  [ 1149.37556881  4208.92609827 -4584.50430574 14660.65803766]
New Q values:  [ 1010.43209787  4208.92609827 -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  0
xxxxx
x.gax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 1721.62597072  660.86649319 1837.60623451]
------
Step:31, Action:South
State  136
Old Q Values:  [ 878.22269011 1721.62597072  660.86649319 1837.60623451]
New Q values:  [ 878.22269011 1703.04899809  660.86649319 1837.60623451]
Reward: -1  Episode Reward:  -1
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[3383.32869935 2914.66362424  606.149024   1026.86689092]
------
Step:32, Action:North
State  216
Old Q Values:  [3383.32869935 2914.66362424  606.149024   1026.86689092]
New Q values:  [3332.94954584 2914.66362424  606.149024   1026.86689092]
Reward: -1  Episode Reward:  -2
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.60072689e+03 -3.22965309e-01  3.68289510e+03]
------
Step:33, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  6.60072689e+03 -3.22965309e-01  3.68289510e+03]
New Q values:  [ 1.06807480e+02  6.45926389e+03 -3.22965309e-01  3.68289510e+03]
Reward: -1  Episode Reward:  -3
xxxxx
x.  x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[12731.91045768  5608.01966491   790.72804752  3965.05876272]
------
Step:34, Action:North
State  216
Old Q Values:  [3332.94954584 2914.66362424  606.149024   1026.86689092]
New Q values:  [3270.35898597 2914.66362424  606.149024   1026.86689092]
Reward: -1  Episode Reward:  -4
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.45926389e+03 -3.22965309e-01  3.68289510e+03]
------
Step:35, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  6.45926389e+03 -3.22965309e-01  3.68289510e+03]
New Q values:  [ 1.06807480e+02  6.98130297e+03 -3.22965309e-01  3.68289510e+03]
Reward: -1  Episode Reward:  -5
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1010.43209787  4208.92609827 -4584.50430574 14660.65803766]
------
Step:36, Action:South
State  208
Old Q Values:  [ 1010.43209787  4208.92609827 -4584.50430574 14660.65803766]
New Q values:  [ 1010.43209787  2601.99786888 -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  -6
xxxxx
x.g x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3063.42476526 -4059.26960032   523.5350035    378.14655282]
------
Step:37, Action:North
State  288
Old Q Values:  [ 3063.42476526 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 2205.87760189 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  -7
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[3270.35898597 2914.66362424  606.149024   1026.86689092]
------
Step:38, Action:North
State  208
Old Q Values:  [ 1010.43209787  2601.99786888 -4584.50430574 14660.65803766]
New Q values:  [  954.8547095   2601.99786888 -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  -8
xxxxx
x.gax
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 1703.04899809  660.86649319 1837.60623451]
------
Step:39, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  6.98130297e+03 -3.22965309e-01  3.68289510e+03]
New Q values:  [ 1.06807480e+02  7.19011860e+03 -3.22965309e-01  3.68289510e+03]
Reward: -1  Episode Reward:  -9
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  954.8547095   2601.99786888 -4584.50430574 14660.65803766]
------
Step:40, Action:South
State  210
Old Q Values:  [12731.91045768  5608.01966491   790.72804752  3965.05876272]
New Q values:  [12731.91045768  2904.37114653   790.72804752  3965.05876272]
Reward: -1  Episode Reward:  -10
xxxxx
x.  x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2205.87760189 -4059.26960032   523.5350035    378.14655282]
------
Step:41, Action:North
State  288
Old Q Values:  [ 2205.87760189 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 4701.32417806 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  -11
xxxxx
x.  x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[12731.91045768  2904.37114653   790.72804752  3965.05876272]
------
Step:42, Action:North
State  210
Old Q Values:  [12731.91045768  2904.37114653   790.72804752  3965.05876272]
New Q values:  [7249.19976264 2904.37114653  790.72804752 3965.05876272]
Reward: -1  Episode Reward:  -12
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  7.19011860e+03 -3.22965309e-01  3.68289510e+03]
------
Step:43, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  7.19011860e+03 -3.22965309e-01  3.68289510e+03]
New Q values:  [ 1.06807480e+02  5.05020737e+03 -3.22965309e-01  3.68289510e+03]
Reward: -1  Episode Reward:  -13
xxxxx
x.  x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7249.19976264 2904.37114653  790.72804752 3965.05876272]
------
Step:44, Action:North
State  216
Old Q Values:  [3270.35898597 2914.66362424  606.149024   1026.86689092]
New Q values:  [2822.60580485 2914.66362424  606.149024   1026.86689092]
Reward: -1  Episode Reward:  -14
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  5.05020737e+03 -3.22965309e-01  3.68289510e+03]
------
Step:45, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  5.05020737e+03 -3.22965309e-01  3.68289510e+03]
New Q values:  [ 1.06807480e+02  6.41768036e+03 -3.22965309e-01  3.68289510e+03]
Reward: -1  Episode Reward:  -15
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[  954.8547095   2601.99786888 -4584.50430574 14660.65803766]
------
Step:46, Action:South
State  210
Old Q Values:  [7249.19976264 2904.37114653  790.72804752 3965.05876272]
New Q values:  [7249.19976264 2571.54571203  790.72804752 3965.05876272]
Reward: -1  Episode Reward:  -16
xxxxx
x.  x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4701.32417806 -4059.26960032   523.5350035    378.14655282]
------
Step:47, Action:North
State  288
Old Q Values:  [ 4701.32417806 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 4054.68960002 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  -17
xxxxx
x.  x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7249.19976264 2571.54571203  790.72804752 3965.05876272]
------
Step:48, Action:North
State  210
Old Q Values:  [7249.19976264 2571.54571203  790.72804752 3965.05876272]
New Q values:  [4824.38401263 2571.54571203  790.72804752 3965.05876272]
Reward: -1  Episode Reward:  -18
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.41768036e+03 -3.22965309e-01  3.68289510e+03]
------
Step:49, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  6.41768036e+03 -3.22965309e-01  3.68289510e+03]
New Q values:  [ 1.06807480e+02  4.01378735e+03 -3.22965309e-01  3.68289510e+03]
Reward: -1  Episode Reward:  -19
xxxxx
x.  x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4824.38401263 2571.54571203  790.72804752 3965.05876272]
------
Step:50, Action:North
State  208
Old Q Values:  [  954.8547095   2601.99786888 -4584.50430574 14660.65803766]
New Q values:  [ 1585.47808797  2601.99786888 -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  -20
xxxxx
x. ax
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  4.01378735e+03 -3.22965309e-01  3.68289510e+03]
------
Step:51, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  4.01378735e+03 -3.22965309e-01  3.68289510e+03]
New Q values:  [ 1.06807480e+02  4.01378735e+03 -3.22965309e-01  2.63031957e+03]
Reward: -1  Episode Reward:  -21
xxxxx
x.a x
x.g x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  3859.20511671  3000.71856128]
------
Step:52, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  3859.20511671  3000.71856128]
New Q values:  [ -281.736      -1150.91067548  2747.21825085  3000.71856128]
Reward: -1  Episode Reward:  -22
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  4.01378735e+03 -3.22965309e-01  2.63031957e+03]
------
Step:53, Action:South
State  136
Old Q Values:  [ 878.22269011 1703.04899809  660.86649319 1837.60623451]
New Q values:  [ 878.22269011 1555.01868651  660.86649319 1837.60623451]
Reward: -1  Episode Reward:  -23
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2822.60580485 2914.66362424  606.149024   1026.86689092]
------
Step:54, Action:South
State  208
Old Q Values:  [ 1585.47808797  2601.99786888 -4584.50430574 14660.65803766]
New Q values:  [ 1585.47808797  2256.60602756 -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  -24
xxxxx
x.g x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4054.68960002 -4059.26960032   523.5350035    378.14655282]
------
Step:55, Action:North
State  288
Old Q Values:  [ 4054.68960002 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 2495.67492728 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  -25
xxxxx
xg  x
x. ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2822.60580485 2914.66362424  606.149024   1026.86689092]
------
Step:56, Action:South
State  208
Old Q Values:  [ 1585.47808797  2256.60602756 -4584.50430574 14660.65803766]
New Q values:  [ 1585.47808797  1650.74488921 -4584.50430574 14660.65803766]
Reward: -1  Episode Reward:  -26
xxxxx
x.g x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2495.67492728 -4059.26960032   523.5350035    378.14655282]
------
Step:57, Action:North
State  288
Old Q Values:  [ 2495.67492728 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 5395.86738221 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  -27
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1585.47808797  1650.74488921 -4584.50430574 14660.65803766]
------
Step:58, Action:South
State  210
Old Q Values:  [4824.38401263 2571.54571203  790.72804752 3965.05876272]
New Q values:  [4824.38401263 2646.77849948  790.72804752 3965.05876272]
Reward: -1  Episode Reward:  -28
xxxxx
x.  x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5395.86738221 -4059.26960032   523.5350035    378.14655282]
------
Step:59, Action:North
State  288
Old Q Values:  [ 5395.86738221 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 6555.94436418 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  -29
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1585.47808797  1650.74488921 -4584.50430574 14660.65803766]
------
Step:60, Action:South
State  210
Old Q Values:  [4824.38401263 2646.77849948  790.72804752 3965.05876272]
New Q values:  [4824.38401263 3024.89470904  790.72804752 3965.05876272]
Reward: -1  Episode Reward:  -30
xxxxx
x.  x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6555.94436418 -4059.26960032   523.5350035    378.14655282]
------
Step:61, Action:North
State  288
Old Q Values:  [ 6555.94436418 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 4069.09294946 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  -31
xxxxx
x.  x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4824.38401263 3024.89470904  790.72804752 3965.05876272]
------
Step:62, Action:North
State  210
Old Q Values:  [4824.38401263 3024.89470904  790.72804752 3965.05876272]
New Q values:  [3133.28980922 3024.89470904  790.72804752 3965.05876272]
Reward: -1  Episode Reward:  -32
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  4.01378735e+03 -3.22965309e-01  2.63031957e+03]
------
Step:63, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  4.01378735e+03 -3.22965309e-01  2.63031957e+03]
New Q values:  [ 1.06807480e+02  2.79443257e+03 -3.22965309e-01  2.63031957e+03]
Reward: -1  Episode Reward:  -33
xxxxx
x.  x
x. ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3133.28980922 3024.89470904  790.72804752 3965.05876272]
------
Step:64, Action:West
State  208
Old Q Values:  [ 1585.47808797  1650.74488921 -4584.50430574 14660.65803766]
New Q values:  [ 1585.47808797  1650.74488921 -4584.50430574 18720.76499455]
Reward: -1  Episode Reward:  -34
xxxxx
x.  x
x.agx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[42857.00593163 23958.24019337  9388.55586144   767.35890262]
------
Step:65, Action:North
State  195
Old Q Values:  [24930.96122915 16053.62807234 16029.85052182  1169.39963074]
New Q values:  [11645.51215632 16053.62807234 16029.85052182  1169.39963074]
Reward: -1  Episode Reward:  -35
xxxxx
x.a x
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  5579.09221553  2024.51848008]
------
Step:66, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2747.21825085  3000.71856128]
New Q values:  [ -281.736      -1150.91067548  1936.61707065  3000.71856128]
Reward: -1  Episode Reward:  -36
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.79443257e+03 -3.22965309e-01  2.63031957e+03]
------
Step:67, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  2.79443257e+03 -3.22965309e-01  2.63031957e+03]
New Q values:  [ 1.06807480e+02  6.73340253e+03 -3.22965309e-01  2.63031957e+03]
Reward: -1  Episode Reward:  -37
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1585.47808797  1650.74488921 -4584.50430574 18720.76499455]
------
Step:68, Action:South
State  210
Old Q Values:  [3133.28980922 3024.89470904  790.72804752 3965.05876272]
New Q values:  [3133.28980922 2430.08576846  790.72804752 3965.05876272]
Reward: -1  Episode Reward:  -38
xxxxx
x.  x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4069.09294946 -4059.26960032   523.5350035    378.14655282]
------
Step:69, Action:North
State  288
Old Q Values:  [ 4069.09294946 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 7243.26667815 -4059.26960032   523.5350035    378.14655282]
Reward: -1  Episode Reward:  -39
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1585.47808797  1650.74488921 -4584.50430574 18720.76499455]
------
Step:70, Action:South
State  208
Old Q Values:  [ 1585.47808797  1650.74488921 -4584.50430574 18720.76499455]
New Q values:  [ 1585.47808797  2832.67795913 -4584.50430574 18720.76499455]
Reward: -1  Episode Reward:  -40
xxxxx
x.  x
x. gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7243.26667815 -4059.26960032   523.5350035    378.14655282]
------
Step:71, Action:East
State  288
Old Q Values:  [ 7243.26667815 -4059.26960032   523.5350035    378.14655282]
New Q values:  [ 7243.26667815 -4059.26960032  2201.79400485   378.14655282]
Reward: -301  Episode Reward:  -341
xxxxx
x. gx
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7243.26667815 -4059.26960032  2201.79400485   378.14655282]
------
Step:72, Action:North
State  288
Old Q Values:  [ 7243.26667815 -4059.26960032  2201.79400485   378.14655282]
New Q values:  [ 2512.93616963 -4059.26960032  2201.79400485   378.14655282]
Reward: -10001  Episode Reward:  -10342
xxxxx
x.  x
x. gx
x.. x
xxxxx
xxxxx
x...x
xa.gx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2386.87245781 1236.86188545 1761.98824422    3.33862213]
------
Step:1, Action:North
State  181
Old Q Values:  [2386.87245781 1236.86188545 1761.98824422    3.33862213]
New Q values:  [1486.18578139 1236.86188545 1761.98824422    3.33862213]
Reward: 9  Episode Reward:  9
xxxxx
xa.gx
x . x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1753.45599423 -2165.66138672   232.50800947]
------
Step:2, Action:South
State  109
Old Q Values:  [ -241.10880094  1753.45599423 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  1229.37887096 -2165.66138672   232.50800947]
Reward: -1  Episode Reward:  8
xxxxx
x g.x
xa. x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1486.18578139 1236.86188545 1761.98824422    3.33862213]
------
Step:3, Action:East
State  180
Old Q Values:  [ 2652.48164739  3060.24711158  7795.3759946  -4966.32149798]
New Q values:  [ 2652.48164739  3060.24711158  7519.94218783 -4966.32149798]
Reward: 9  Episode Reward:  17
xxxxx
xg..x
x a x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  14654.63929998  7098.42831386   610.93635926]
------
Step:4, Action:South
State  192
Old Q Values:  [ 3756.98497708 15553.96194316  7389.44967919  2453.92999194]
New Q values:  [ 3756.98497708 11861.98278108  7389.44967919  2453.92999194]
Reward: 9  Episode Reward:  26
xxxxx
x ..x
xg  x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7699.11541878 -8521.23367799  3168.90432389 18783.3266794 ]
------
Step:5, Action:West
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  4309.93931733  1854.7028197 ]
New Q values:  [-2561.28592178 -5807.06396197  4309.93931733  2361.76105495]
Reward: 9  Episode Reward:  35
xxxxx
x ..x
x g x
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3900.1708019  1352.37702619 5381.59975689  -12.17474163]
------
Step:6, Action:East
State  260
Old Q Values:  [-7643.81886164 -5704.51612281  3250.63239792 -5679.36893145]
New Q values:  [-7643.81886164 -5704.51612281  2592.63475436 -5679.36893145]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
xg  x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  4309.93931733  2361.76105495]
------
Step:7, Action:East
State  272
Old Q Values:  [ 7699.11541878 -8521.23367799  3168.90432389 18783.3266794 ]
New Q values:  [ 7699.11541878 -8521.23367799  2026.84258044 18783.3266794 ]
Reward: 9  Episode Reward:  43
xxxxx
x ..x
x   x
xg ax
xxxxx
Step:8, Action:West
State  288
Old Q Values:  [ 2512.93616963 -4059.26960032  2201.79400485   378.14655282]
New Q values:  [ 2512.93616963 -4059.26960032  2201.79400485  5785.65662495]
Reward: -1  Episode Reward:  42
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7699.11541878 -8521.23367799  2026.84258044 18783.3266794 ]
------
Step:9, Action:West
State  272
Old Q Values:  [ 7699.11541878 -8521.23367799  2026.84258044 18783.3266794 ]
New Q values:  [ 7699.11541878 -8521.23367799  2026.84258044  9127.21059883]
Reward: -1  Episode Reward:  41
xxxxx
x ..x
x g x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3900.1708019  1352.37702619 5381.59975689  -12.17474163]
------
Step:10, Action:East
State  261
Old Q Values:  [3900.1708019  1352.37702619 5381.59975689  -12.17474163]
New Q values:  [3900.1708019  1352.37702619 3445.02169795  -12.17474163]
Reward: -1  Episode Reward:  40
xxxxx
x g.x
x   x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  4309.93931733  2361.76105495]
------
Step:11, Action:East
State  272
Old Q Values:  [ 7699.11541878 -8521.23367799  2026.84258044  9127.21059883]
New Q values:  [ 7699.11541878 -8521.23367799  2545.83401966  9127.21059883]
Reward: -1  Episode Reward:  39
xxxxx
xg..x
x   x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2512.93616963 -4059.26960032  2201.79400485  5785.65662495]
------
Step:12, Action:West
State  288
Old Q Values:  [ 2512.93616963 -4059.26960032  2201.79400485  5785.65662495]
New Q values:  [ 2512.93616963 -4059.26960032  2201.79400485  5051.82582963]
Reward: -1  Episode Reward:  38
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7699.11541878 -8521.23367799  2545.83401966  9127.21059883]
------
Step:13, Action:West
State  272
Old Q Values:  [ 7699.11541878 -8521.23367799  2545.83401966  9127.21059883]
New Q values:  [ 7699.11541878 -8521.23367799  2545.83401966  4428.07466584]
Reward: -1  Episode Reward:  37
xxxxx
xg..x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-7643.81886164 -5704.51612281  2592.63475436 -5679.36893145]
------
Step:14, Action:East
State  260
Old Q Values:  [-7643.81886164 -5704.51612281  2592.63475436 -5679.36893145]
New Q values:  [-7643.81886164 -5704.51612281  3346.18852738 -5679.36893145]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7699.11541878 -8521.23367799  2545.83401966  4428.07466584]
------
Step:15, Action:North
State  272
Old Q Values:  [ 7699.11541878 -8521.23367799  2545.83401966  4428.07466584]
New Q values:  [ 7240.51424334 -8521.23367799  2545.83401966  4428.07466584]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ 3738.28126717  7581.58548952 13871.56025276  2179.39995143]
------
Step:16, Action:East
State  192
Old Q Values:  [ 3756.98497708 11861.98278108  7389.44967919  2453.92999194]
New Q values:  [ 3756.98497708 11861.98278108  8571.40937004  2453.92999194]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
xg ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 1585.47808797  2832.67795913 -4584.50430574 18720.76499455]
------
Step:17, Action:West
State  210
Old Q Values:  [3133.28980922 2430.08576846  790.72804752 3965.05876272]
New Q values:  [3133.28980922 2430.08576846  790.72804752 5746.89158092]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ 3738.28126717  7581.58548952 13871.56025276  2179.39995143]
------
Step:18, Action:East
State  194
Old Q Values:  [ 3738.28126717  7581.58548952 13871.56025276  2179.39995143]
New Q values:  [3738.28126717 7581.58548952 7272.09157538 2179.39995143]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x  ax
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3133.28980922 2430.08576846  790.72804752 5746.89158092]
------
Step:19, Action:West
State  210
Old Q Values:  [3133.28980922 2430.08576846  790.72804752 5746.89158092]
New Q values:  [3133.28980922 2430.08576846  790.72804752 4572.63227922]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[3738.28126717 7581.58548952 7272.09157538 2179.39995143]
------
Step:20, Action:South
State  192
Old Q Values:  [ 3756.98497708 11861.98278108  8571.40937004  2453.92999194]
New Q values:  [3756.98497708 6916.34738543 8571.40937004 2453.92999194]
Reward: -1  Episode Reward:  30
xxxxx
x ..x
xg  x
x a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 7240.51424334 -8521.23367799  2545.83401966  4428.07466584]
------
Step:21, Action:North
State  272
Old Q Values:  [ 7240.51424334 -8521.23367799  2545.83401966  4428.07466584]
New Q values:  [ 5170.08134419 -8521.23367799  2545.83401966  4428.07466584]
Reward: -1  Episode Reward:  29
xxxxx
x ..x
x a x
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[3738.28126717 7581.58548952 7272.09157538 2179.39995143]
------
Step:22, Action:South
State  194
Old Q Values:  [3738.28126717 7581.58548952 7272.09157538 2179.39995143]
New Q values:  [ 3738.28126717 -1416.94140093  7272.09157538  2179.39995143]
Reward: -10001  Episode Reward:  -9972
xxxxx
x ..x
x   x
x g x
xxxxx
xxxxx
x..ax
x.g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.73340253e+03 -3.22965309e-01  2.63031957e+03]
------
Step:1, Action:South
State  136
Old Q Values:  [ 878.22269011 1555.01868651  660.86649319 1837.60623451]
New Q values:  [ 878.22269011 1501.80656187  660.86649319 1837.60623451]
Reward: 9  Episode Reward:  9
xxxxx
x.g x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2822.60580485 2914.66362424  606.149024   1026.86689092]
------
Step:2, Action:South
State  208
Old Q Values:  [ 1585.47808797  2832.67795913 -4584.50430574 18720.76499455]
New Q values:  [ 1585.47808797  2654.01893254 -4584.50430574 18720.76499455]
Reward: 9  Episode Reward:  18
xxxxx
xg. x
x.  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2512.93616963 -4059.26960032  2201.79400485  5051.82582963]
------
Step:3, Action:West
State  288
Old Q Values:  [ 2512.93616963 -4059.26960032  2201.79400485  5051.82582963]
New Q values:  [ 2512.93616963 -4059.26960032  2201.79400485  3577.15473511]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5170.08134419 -8521.23367799  2545.83401966  4428.07466584]
------
Step:4, Action:North
State  272
Old Q Values:  [ 5170.08134419 -8521.23367799  2545.83401966  4428.07466584]
New Q values:  [-1361.14465131 -8521.23367799  2545.83401966  4428.07466584]
Reward: -10001  Episode Reward:  -9974
xxxxx
x.. x
x.g x
x.  x
xxxxx
xxxxx
xg..x
x.a.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3756.98497708 6916.34738543 8571.40937004 2453.92999194]
------
Step:1, Action:East
State  200
Old Q Values:  [  169.9257398  14654.63929998  7098.42831386   610.93635926]
New Q values:  [  169.9257398  14654.63929998  3719.17041281   610.93635926]
Reward: 9  Episode Reward:  9
xxxxx
x g.x
x. ax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2822.60580485 2914.66362424  606.149024   1026.86689092]
------
Step:2, Action:South
State  208
Old Q Values:  [ 1585.47808797  2654.01893254 -4584.50430574 18720.76499455]
New Q values:  [ 1585.47808797  2140.15399355 -4584.50430574 18720.76499455]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x.g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2512.93616963 -4059.26960032  2201.79400485  3577.15473511]
------
Step:3, Action:West
State  288
Old Q Values:  [ 2512.93616963 -4059.26960032  2201.79400485  3577.15473511]
New Q values:  [ 2512.93616963 -4059.26960032  2201.79400485 -3235.3157062 ]
Reward: -9991  Episode Reward:  -9973
xxxxx
x ..x
x.  x
x.g x
xxxxx
xxxxx
xa..x
x...x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5789.56776894  476.21588959 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 4471.08789309 1868.2303995     0.        ]
New Q values:  [ 221.30610858 2322.4316305  1868.2303995     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa.gx
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1486.18578139 1236.86188545 1761.98824422    3.33862213]
------
Step:2, Action:East
State  181
Old Q Values:  [1486.18578139 1236.86188545 1761.98824422    3.33862213]
New Q values:  [ 1.48618578e+03  1.23686189e+03 -3.90372688e+03  3.33862213e+00]
Reward: -9991  Episode Reward:  -9982
xxxxx
x ..x
x g.x
x.. x
xxxxx
xxxxx
x...x
x.a.x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ 3738.28126717 -1416.94140093  7272.09157538  2179.39995143]
------
Step:1, Action:East
State  194
Old Q Values:  [ 3738.28126717 -1416.94140093  7272.09157538  2179.39995143]
New Q values:  [ 3738.28126717 -1416.94140093  4286.02631392  2179.39995143]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3133.28980922 2430.08576846  790.72804752 4572.63227922]
------
Step:2, Action:West
State  210
Old Q Values:  [3133.28980922 2430.08576846  790.72804752 4572.63227922]
New Q values:  [3133.28980922 2430.08576846  790.72804752 3114.26080587]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x.a x
x.g.x
xxxxx
Step:3, Action:East
State  194
Old Q Values:  [ 3738.28126717 -1416.94140093  4286.02631392  2179.39995143]
New Q values:  [ 3738.28126717 -1416.94140093  2653.79746833  2179.39995143]
Reward: -1  Episode Reward:  7
xxxxx
x...x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3133.28980922 2430.08576846  790.72804752 3114.26080587]
------
Step:4, Action:North
State  210
Old Q Values:  [3133.28980922 2430.08576846  790.72804752 3114.26080587]
New Q values:  [3278.73668132 2430.08576846  790.72804752 3114.26080587]
Reward: 9  Episode Reward:  16
xxxxx
x..ax
x.  x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.73340253e+03 -3.22965309e-01  2.63031957e+03]
------
Step:5, Action:South
State  130
Old Q Values:  [28683.54284864  7517.10465732  -180.00807518 46278.33381956]
New Q values:  [28683.54284864  3989.86286733  -180.00807518 46278.33381956]
Reward: -1  Episode Reward:  15
xxxxx
x.. x
x. ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3278.73668132 2430.08576846  790.72804752 3114.26080587]
------
Step:6, Action:North
State  208
Old Q Values:  [ 1585.47808797  2140.15399355 -4584.50430574 18720.76499455]
New Q values:  [14517.09138106  2140.15399355 -4584.50430574 18720.76499455]
Reward: -1  Episode Reward:  14
xxxxx
x..ax
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[28683.54284864  3989.86286733  -180.00807518 46278.33381956]
------
Step:7, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  6.73340253e+03 -3.22965309e-01  2.63031957e+03]
New Q values:  [ 1.06807480e+02  6.73340253e+03 -3.22965309e-01  1.95774340e+03]
Reward: 9  Episode Reward:  23
xxxxx
x.a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1936.61707065  3000.71856128]
------
Step:8, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1936.61707065  3000.71856128]
New Q values:  [ -281.736      -1150.91067548  1936.61707065  1394.91007955]
Reward: 9  Episode Reward:  32
xxxxx
xa  x
xg  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -6764.65639938   630.74218347  -180.6       ]
------
Step:9, Action:East
State  107
Old Q Values:  [-252.35169558 6015.866984    223.04911789 -252.78192178]
New Q values:  [-252.35169558 6015.866984    669.60476835 -252.78192178]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1936.61707065  1394.91007955]
------
Step:10, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  2861.6309752  -4318.40133077]
New Q values:  [-9594.56523706 -8069.05606225  1695.33426043 -4318.40133077]
Reward: -1  Episode Reward:  30
xxxxx
x gax
x.  x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 1501.80656187  660.86649319 1837.60623451]
------
Step:11, Action:South
State  136
Old Q Values:  [ 878.22269011 1501.80656187  660.86649319 1837.60623451]
New Q values:  [ 878.22269011 1474.52171202  660.86649319 1837.60623451]
Reward: -1  Episode Reward:  29
xxxxx
x  gx
x. ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2822.60580485 2914.66362424  606.149024   1026.86689092]
------
Step:12, Action:South
State  208
Old Q Values:  [14517.09138106  2140.15399355 -4584.50430574 18720.76499455]
New Q values:  [14517.09138106  1615.34244831 -4584.50430574 18720.76499455]
Reward: 9  Episode Reward:  38
xxxxx
x   x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2512.93616963 -4059.26960032  2201.79400485 -3235.3157062 ]
------
Step:13, Action:East
State  288
Old Q Values:  [ 2512.93616963 -4059.26960032  2201.79400485 -3235.3157062 ]
New Q values:  [ 2512.93616963 -4059.26960032  1453.99845283 -3235.3157062 ]
Reward: -301  Episode Reward:  -263
xxxxx
x  gx
x.  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2512.93616963 -4059.26960032  1453.99845283 -3235.3157062 ]
------
Step:14, Action:North
State  288
Old Q Values:  [ 2512.93616963 -4059.26960032  1453.99845283 -3235.3157062 ]
New Q values:  [ 6620.80396622 -4059.26960032  1453.99845283 -3235.3157062 ]
Reward: -1  Episode Reward:  -264
xxxxx
x g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14517.09138106  1615.34244831 -4584.50430574 18720.76499455]
------
Step:15, Action:West
State  208
Old Q Values:  [14517.09138106  1615.34244831 -4584.50430574 18720.76499455]
New Q values:  [14517.09138106  1615.34244831 -4584.50430574 20344.80777731]
Reward: -1  Episode Reward:  -265
xxxxx
x  gx
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[42857.00593163 23958.24019337  9388.55586144   767.35890262]
------
Step:16, Action:North
State  193
Old Q Values:  [42857.00593163 23958.24019337  9388.55586144   767.35890262]
New Q values:  [18815.93003731 23958.24019337  9388.55586144   767.35890262]
Reward: -1  Episode Reward:  -266
xxxxx
x a x
x. gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  5579.09221553  2024.51848008]
------
Step:17, Action:East
State  121
Old Q Values:  [    0.             0.         -7041.76408268   946.27032172]
New Q values:  [    0.             0.         -8266.02376272   946.27032172]
Reward: -10001  Episode Reward:  -10267
xxxxx
x  gx
x.  x
x.  x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 1.48618578e+03  1.23686189e+03 -3.90372688e+03  3.33862213e+00]
------
Step:1, Action:North
State  180
Old Q Values:  [ 2652.48164739  3060.24711158  7519.94218783 -4966.32149798]
New Q values:  [-4092.10583895  3060.24711158  7519.94218783 -4966.32149798]
Reward: -9991  Episode Reward:  -9991
xxxxx
xg .x
x ..x
x...x
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5789.56776894  476.21588959 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 2322.4316305  1868.2303995     0.        ]
New Q values:  [ 221.30610858 4877.42285028 1868.2303995     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2049.1969658   1771.27760536 13143.50066026  1554.80203889]
------
Step:2, Action:East
State  181
Old Q Values:  [ 1.48618578e+03  1.23686189e+03 -3.90372688e+03  3.33862213e+00]
New Q values:  [1486.18578139 1236.86188545 2363.05050763    3.33862213]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NE
[  613.33320563 13063.80419921 -3941.94579649  1311.30124863]
------
Step:3, Action:South
State  193
Old Q Values:  [18815.93003731 23958.24019337  9388.55586144   767.35890262]
New Q values:  [18815.93003731 34343.67030268  9388.55586144   767.35890262]
Reward: 9  Episode Reward:  27
xxxxx
x .gx
x   x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[17892.11210647  1201.72649971  3797.74781399 82516.58075109]
------
Step:4, Action:West
State  277
Old Q Values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 2.71829628e+03]
New Q values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 2.26276975e+03]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3900.1708019  1352.37702619 3445.02169795  -12.17474163]
------
Step:5, Action:North
State  261
Old Q Values:  [3900.1708019  1352.37702619 3445.02169795  -12.17474163]
New Q values:  [5502.51851884 1352.37702619 3445.02169795  -12.17474163]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
xa  x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2049.1969658   1771.27760536 13143.50066026  1554.80203889]
------
Step:6, Action:East
State  181
Old Q Values:  [1486.18578139 1236.86188545 2363.05050763    3.33862213]
New Q values:  [1.48618578e+03 1.23686189e+03 1.12477213e+04 3.33862213e+00]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[18815.93003731 34343.67030268  9388.55586144   767.35890262]
------
Step:7, Action:South
State  195
Old Q Values:  [11645.51215632 16053.62807234 16029.85052182  1169.39963074]
New Q values:  [11645.51215632 31175.82545426 16029.85052182  1169.39963074]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[17892.11210647  1201.72649971  3797.74781399 82516.58075109]
------
Step:8, Action:West
State  277
Old Q Values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 2.26276975e+03]
New Q values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 2.55526346e+03]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5502.51851884 1352.37702619 3445.02169795  -12.17474163]
------
Step:9, Action:North
State  261
Old Q Values:  [5502.51851884 1352.37702619 3445.02169795  -12.17474163]
New Q values:  [5574.72379569 1352.37702619 3445.02169795  -12.17474163]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.48618578e+03 1.23686189e+03 1.12477213e+04 3.33862213e+00]
------
Step:10, Action:North
State  180
Old Q Values:  [-4092.10583895  3060.24711158  7519.94218783 -4966.32149798]
New Q values:  [  544.37199765  3060.24711158  7519.94218783 -4966.32149798]
Reward: -1  Episode Reward:  30
xxxxx
xa..x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SW
[-180.6        7272.71444411 2583.17308616 -180.6       ]
------
Step:11, Action:East
State  102
Old Q Values:  [-180.6        7272.71444411 2583.17308616 -180.6       ]
New Q values:  [-180.6        7272.71444411 3164.98651093 -180.6       ]
Reward: 9  Episode Reward:  39
xxxxx
x a.x
x   x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[1761.89752936 7087.72425489    0.         2070.94656993]
------
Step:12, Action:South
State  118
Old Q Values:  [1761.89752936 7087.72425489    0.         2070.94656993]
New Q values:  [1761.89752936 4220.56752395    0.         2070.94656993]
Reward: -1  Episode Reward:  38
xxxxx
x  .x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1606.11926949 2565.46179201 4620.25940663  231.67262594]
------
Step:13, Action:East
State  196
Old Q Values:  [1606.11926949 2565.46179201 4620.25940663  231.67262594]
New Q values:  [1606.11926949 2565.46179201 7950.94609585  231.67262594]
Reward: -1  Episode Reward:  37
xxxxx
xg .x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14517.09138106  1615.34244831 -4584.50430574 20344.80777731]
------
Step:14, Action:West
State  208
Old Q Values:  [14517.09138106  1615.34244831 -4584.50430574 20344.80777731]
New Q values:  [14517.09138106  1615.34244831 -4584.50430574 10522.60693968]
Reward: -1  Episode Reward:  36
xxxxx
x  .x
xga x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NW
[1606.11926949 2565.46179201 7950.94609585  231.67262594]
------
Step:15, Action:East
State  196
Old Q Values:  [1606.11926949 2565.46179201 7950.94609585  231.67262594]
New Q values:  [1606.11926949 2565.46179201 7534.90585265  231.67262594]
Reward: -1  Episode Reward:  35
xxxxx
x  .x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14517.09138106  1615.34244831 -4584.50430574 10522.60693968]
------
Step:16, Action:North
State  216
Old Q Values:  [2822.60580485 2914.66362424  606.149024   1026.86689092]
New Q values:  [3154.46307957 2914.66362424  606.149024   1026.86689092]
Reward: 9  Episode Reward:  44
xxxxx
x  ax
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.73340253e+03 -3.22965309e-01  1.95774340e+03]
------
Step:17, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  6.73340253e+03 -3.22965309e-01  1.95774340e+03]
New Q values:  [ 1.06807480e+02  6.73340253e+03 -3.22965309e-01  1.88689784e+03]
Reward: -1  Episode Reward:  43
xxxxx
x a x
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262  681.20236034 3681.33493291]
------
Step:18, Action:West
State  127
Old Q Values:  [   0.            1.67014986  895.48700012 1363.21412963]
New Q values:  [0.00000000e+00 1.67014986e+00 8.95487000e+02 2.28155598e+03]
Reward: -1  Episode Reward:  42
xxxxx
xa  x
x  gx
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5789.56776894  476.21588959 -120.29354603]
------
Step:19, Action:South
State  111
Old Q Values:  [-177.44732869 5789.56776894  476.21588959 -120.29354603]
New Q values:  [-177.44732869 5849.45595761  476.21588959 -120.29354603]
Reward: -1  Episode Reward:  41
xxxxx
x   x
xag x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2261.36340498 11780.76283343   154.04646645]
------
Step:20, Action:South
State  189
Old Q Values:  [  533.05203844  2261.36340498 11780.76283343   154.04646645]
New Q values:  [  533.05203844  2576.3625007  11780.76283343   154.04646645]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5574.72379569 1352.37702619 3445.02169795  -12.17474163]
------
Step:21, Action:North
State  261
Old Q Values:  [5574.72379569 1352.37702619 3445.02169795  -12.17474163]
New Q values:  [5763.51836831 1352.37702619 3445.02169795  -12.17474163]
Reward: -1  Episode Reward:  39
xxxxx
x  gx
xa  x
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2576.3625007  11780.76283343   154.04646645]
------
Step:22, Action:East
State  189
Old Q Values:  [  533.05203844  2576.3625007  11780.76283343   154.04646645]
New Q values:  [ 533.05203844 2576.3625007  6343.12806582  154.04646645]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SE ghost_dir:NE
[   0.         1285.92441063 5438.0764415   198.38683706]
------
Step:23, Action:South
State  204
Old Q Values:  [   0.         4557.66188112 3591.40592189  441.58769553]
New Q values:  [   0.         3115.44654765 3591.40592189  441.58769553]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x g x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  4309.93931733  2361.76105495]
------
Step:24, Action:East
State  272
Old Q Values:  [-1361.14465131 -8521.23367799  2545.83401966  4428.07466584]
New Q values:  [-1361.14465131 -8521.23367799 63009.97479773  4428.07466584]
Reward: 100009  Episode Reward:  100046
xxxxx
x g x
x   x
x  ax
xxxxx
xxxxx
xa..x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5849.45595761  476.21588959 -120.29354603]
------
Step:1, Action:South
State  103
Old Q Values:  [ 221.30610858 4877.42285028 1868.2303995     0.        ]
New Q values:  [ 221.30610858 5899.41933819 1868.2303995     0.        ]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xa. x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[ 2049.1969658   1771.27760536 13143.50066026  1554.80203889]
------
Step:2, Action:East
State  183
Old Q Values:  [ 2049.1969658   1771.27760536 13143.50066026  1554.80203889]
New Q values:  [2049.1969658  1771.27760536 9108.85615511 1554.80203889]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x a x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 1.28201863e+04 2.89128867e+03 4.59156348e+03]
------
Step:3, Action:South
State  193
Old Q Values:  [18815.93003731 34343.67030268  9388.55586144   767.35890262]
New Q values:  [18815.93003731 38497.8423464   9388.55586144   767.35890262]
Reward: 9  Episode Reward:  27
xxxxx
x ..x
x  gx
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[17892.11210647  1201.72649971  3797.74781399 82516.58075109]
------
Step:4, Action:West
State  273
Old Q Values:  [17892.11210647  1201.72649971  3797.74781399 82516.58075109]
New Q values:  [17892.11210647  1201.72649971  3797.74781399 34741.08781093]
Reward: 9  Episode Reward:  36
xxxxx
x ..x
x   x
xa gx
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5763.51836831 1352.37702619 3445.02169795  -12.17474163]
------
Step:5, Action:North
State  261
Old Q Values:  [5763.51836831 1352.37702619 3445.02169795  -12.17474163]
New Q values:  [5037.46419386 1352.37702619 3445.02169795  -12.17474163]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
xa  x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2049.1969658  1771.27760536 9108.85615511 1554.80203889]
------
Step:6, Action:East
State  182
Old Q Values:  [    0.         -5536.05678243  5991.65920254     0.        ]
New Q values:  [    0.         -5536.05678243  3517.54806117     0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ 3738.28126717 -1416.94140093  2653.79746833  2179.39995143]
------
Step:7, Action:North
State  196
Old Q Values:  [1606.11926949 2565.46179201 7534.90585265  231.67262594]
New Q values:  [1914.01796498 2565.46179201 7534.90585265  231.67262594]
Reward: 9  Episode Reward:  43
xxxxx
x a.x
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NE ghost_dir:SW
[1761.89752936 4220.56752395    0.         2070.94656993]
------
Step:8, Action:South
State  118
Old Q Values:  [1761.89752936 4220.56752395    0.         2070.94656993]
New Q values:  [1761.89752936 3611.9357666     0.         2070.94656993]
Reward: -1  Episode Reward:  42
xxxxx
x  .x
x a x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[3925.89935061 -200.61022961 6414.36252342    0.        ]
------
Step:9, Action:East
State  196
Old Q Values:  [1914.01796498 2565.46179201 7534.90585265  231.67262594]
New Q values:  [1914.01796498 2565.46179201 7368.48975538  231.67262594]
Reward: -1  Episode Reward:  41
xxxxx
x  .x
xg ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[14517.09138106  1615.34244831 -4584.50430574 10522.60693968]
------
Step:10, Action:North
State  216
Old Q Values:  [3154.46307957 2914.66362424  606.149024   1026.86689092]
New Q values:  [3287.20598946 2914.66362424  606.149024   1026.86689092]
Reward: 9  Episode Reward:  50
xxxxx
x  ax
x g x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.73340253e+03 -3.22965309e-01  1.88689784e+03]
------
Step:11, Action:South
State  136
Old Q Values:  [ 878.22269011 1474.52171202  660.86649319 1837.60623451]
New Q values:  [ 878.22269011 1575.37048165  660.86649319 1837.60623451]
Reward: -1  Episode Reward:  49
xxxxx
x g x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[3287.20598946 2914.66362424  606.149024   1026.86689092]
------
Step:12, Action:North
State  216
Old Q Values:  [3287.20598946 2914.66362424  606.149024   1026.86689092]
New Q values:  [1865.56426614 2914.66362424  606.149024   1026.86689092]
Reward: -1  Episode Reward:  48
xxxxx
xg ax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[ 878.22269011 1575.37048165  660.86649319 1837.60623451]
------
Step:13, Action:West
State  136
Old Q Values:  [ 878.22269011 1575.37048165  660.86649319 1837.60623451]
New Q values:  [  878.22269011  1575.37048165   660.86649319 -4614.29946136]
Reward: -10001  Episode Reward:  -9953
xxxxx
x g x
x   x
x  .x
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.48618578e+03 1.23686189e+03 1.12477213e+04 3.33862213e+00]
------
Step:1, Action:East
State  189
Old Q Values:  [ 533.05203844 2576.3625007  6343.12806582  154.04646645]
New Q values:  [  533.05203844  2576.3625007  14092.00393025   154.04646645]
Reward: 9  Episode Reward:  9
xxxxx
x.. x
x agx
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[18815.93003731 38497.8423464   9388.55586144   767.35890262]
------
Step:2, Action:South
State  195
Old Q Values:  [11645.51215632 31175.82545426 16029.85052182  1169.39963074]
New Q values:  [11645.51215632 22898.05652498 16029.85052182  1169.39963074]
Reward: 9  Episode Reward:  18
xxxxx
x.. x
x  .x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[17892.11210647  1201.72649971  3797.74781399 34741.08781093]
------
Step:3, Action:West
State  273
Old Q Values:  [17892.11210647  1201.72649971  3797.74781399 34741.08781093]
New Q values:  [17892.11210647  1201.72649971  3797.74781399 15413.07438253]
Reward: 9  Episode Reward:  27
xxxxx
x.. x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[5037.46419386 1352.37702619 3445.02169795  -12.17474163]
------
Step:4, Action:North
State  261
Old Q Values:  [5037.46419386 1352.37702619 3445.02169795  -12.17474163]
New Q values:  [4747.04252408 1352.37702619 3445.02169795  -12.17474163]
Reward: -1  Episode Reward:  26
xxxxx
x.. x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2049.1969658  1771.27760536 9108.85615511 1554.80203889]
------
Step:5, Action:East
State  183
Old Q Values:  [2049.1969658  1771.27760536 9108.85615511 1554.80203889]
New Q values:  [2049.1969658  1771.27760536 4764.4268422  1554.80203889]
Reward: -1  Episode Reward:  25
xxxxx
x.. x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ 3738.28126717 -1416.94140093  2653.79746833  2179.39995143]
------
Step:6, Action:North
State  194
Old Q Values:  [ 3738.28126717 -1416.94140093  2653.79746833  2179.39995143]
New Q values:  [ 2605.11298674 -1416.94140093  2653.79746833  2179.39995143]
Reward: 9  Episode Reward:  34
xxxxx
x.a x
x  .x
xg .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:SW
[   0.          331.64678262  681.20236034 3681.33493291]
------
Step:7, Action:West
State  126
Old Q Values:  [   0.          331.64678262  681.20236034 3681.33493291]
New Q values:  [   0.          331.64678262  681.20236034 1986.68903898]
Reward: 9  Episode Reward:  43
xxxxx
xa  x
xg .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1695.85021939  472.79132618 -180.6       ]
------
Step:8, Action:East
State  108
Old Q Values:  [-8463.16477134  2805.00500697  1979.29875994     0.        ]
New Q values:  [-8463.16477134  2805.00500697  1442.37754881     0.        ]
Reward: -1  Episode Reward:  42
xxxxx
xga x
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[    0.          1166.51141701  2170.86014946 -4254.91390044]
------
Step:9, Action:East
State  124
Old Q Values:  [    0.          1166.51141701  2170.86014946 -4254.91390044]
New Q values:  [    0.          1166.51141701  1340.35520428 -4254.91390044]
Reward: -1  Episode Reward:  41
xxxxx
x gax
x  .x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  1575.37048165   660.86649319 -4614.29946136]
------
Step:10, Action:South
State  136
Old Q Values:  [  878.22269011  1575.37048165   660.86649319 -4614.29946136]
New Q values:  [  878.22269011  1509.94727993   660.86649319 -4614.29946136]
Reward: 9  Episode Reward:  50
xxxxx
xg  x
x  ax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[1865.56426614 2914.66362424  606.149024   1026.86689092]
------
Step:11, Action:South
State  208
Old Q Values:  [14517.09138106  1615.34244831 -4584.50430574 10522.60693968]
New Q values:  [14517.09138106 62637.77816919 -4584.50430574 10522.60693968]
Reward: 100009  Episode Reward:  100059
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x.a.x
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1936.61707065  1394.91007955]
------
Step:1, Action:East
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  5579.09221553  2024.51848008]
New Q values:  [  -56.91790269 -1902.20915811  4257.05764385  2024.51848008]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x...x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.73340253e+03 -3.22965309e-01  1.88689784e+03]
------
Step:2, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  6.73340253e+03 -3.22965309e-01  1.88689784e+03]
New Q values:  [ 1.06807480e+02  3.68238201e+03 -3.22965309e-01  1.88689784e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.  x
x..ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3278.73668132 2430.08576846  790.72804752 3114.26080587]
------
Step:3, Action:North
State  208
Old Q Values:  [14517.09138106 62637.77816919 -4584.50430574 10522.60693968]
New Q values:  [ 6910.95115679 62637.77816919 -4584.50430574 10522.60693968]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
x.g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  3.68238201e+03 -3.22965309e-01  1.88689784e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  3.68238201e+03 -3.22965309e-01  1.88689784e+03]
New Q values:  [ 1.06807480e+02  2.02636863e+04 -3.22965309e-01  1.88689784e+03]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
xg.ax
x. .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6910.95115679 62637.77816919 -4584.50430574 10522.60693968]
------
Step:5, Action:South
State  208
Old Q Values:  [ 6910.95115679 62637.77816919 -4584.50430574 10522.60693968]
New Q values:  [ 6910.95115679 27046.75245754 -4584.50430574 10522.60693968]
Reward: 9  Episode Reward:  25
xxxxx
xg  x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6620.80396622 -4059.26960032  1453.99845283 -3235.3157062 ]
------
Step:6, Action:North
State  288
Old Q Values:  [ 6620.80396622 -4059.26960032  1453.99845283 -3235.3157062 ]
New Q values:  [10761.74732375 -4059.26960032  1453.99845283 -3235.3157062 ]
Reward: -1  Episode Reward:  24
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6910.95115679 27046.75245754 -4584.50430574 10522.60693968]
------
Step:7, Action:South
State  208
Old Q Values:  [ 6910.95115679 27046.75245754 -4584.50430574 10522.60693968]
New Q values:  [ 6910.95115679 14046.62518014 -4584.50430574 10522.60693968]
Reward: -1  Episode Reward:  23
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10761.74732375 -4059.26960032  1453.99845283 -3235.3157062 ]
------
Step:8, Action:North
State  288
Old Q Values:  [10761.74732375 -4059.26960032  1453.99845283 -3235.3157062 ]
New Q values:  [ 8518.08648354 -4059.26960032  1453.99845283 -3235.3157062 ]
Reward: -1  Episode Reward:  22
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6910.95115679 14046.62518014 -4584.50430574 10522.60693968]
------
Step:9, Action:South
State  208
Old Q Values:  [ 6910.95115679 14046.62518014 -4584.50430574 10522.60693968]
New Q values:  [ 6910.95115679  8173.47601712 -4584.50430574 10522.60693968]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 8518.08648354 -4059.26960032  1453.99845283 -3235.3157062 ]
------
Step:10, Action:North
State  288
Old Q Values:  [ 8518.08648354 -4059.26960032  1453.99845283 -3235.3157062 ]
New Q values:  [ 6563.41667532 -4059.26960032  1453.99845283 -3235.3157062 ]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6910.95115679  8173.47601712 -4584.50430574 10522.60693968]
------
Step:11, Action:South
State  208
Old Q Values:  [ 6910.95115679  8173.47601712 -4584.50430574 10522.60693968]
New Q values:  [ 6910.95115679  5237.81540944 -4584.50430574 10522.60693968]
Reward: -1  Episode Reward:  19
xxxxx
x.g x
x.. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6563.41667532 -4059.26960032  1453.99845283 -3235.3157062 ]
------
Step:12, Action:North
State  288
Old Q Values:  [ 6563.41667532 -4059.26960032  1453.99845283 -3235.3157062 ]
New Q values:  [ 5781.54875203 -4059.26960032  1453.99845283 -3235.3157062 ]
Reward: -1  Episode Reward:  18
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6910.95115679  5237.81540944 -4584.50430574 10522.60693968]
------
Step:13, Action:West
State  208
Old Q Values:  [ 6910.95115679  5237.81540944 -4584.50430574 10522.60693968]
New Q values:  [ 6910.95115679  5237.81540944 -4584.50430574  6785.86558688]
Reward: 9  Episode Reward:  27
xxxxx
x.g x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3756.98497708 6916.34738543 8571.40937004 2453.92999194]
------
Step:14, Action:East
State  193
Old Q Values:  [18815.93003731 38497.8423464   9388.55586144   767.35890262]
New Q values:  [18815.93003731 38497.8423464   5828.10769162   767.35890262]
Reward: -1  Episode Reward:  26
xxxxx
x. gx
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6910.95115679  5237.81540944 -4584.50430574  6785.86558688]
------
Step:15, Action:West
State  208
Old Q Values:  [ 6910.95115679  5237.81540944 -4584.50430574  6785.86558688]
New Q values:  [ 6910.95115679  5237.81540944 -4584.50430574 14263.09893867]
Reward: -1  Episode Reward:  25
xxxxx
x.  x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[18815.93003731 38497.8423464   5828.10769162   767.35890262]
------
Step:16, Action:South
State  195
Old Q Values:  [11645.51215632 22898.05652498 16029.85052182  1169.39963074]
New Q values:  [11645.51215632 14526.25624193 16029.85052182  1169.39963074]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[17892.11210647  1201.72649971  3797.74781399 15413.07438253]
------
Step:17, Action:North
State  272
Old Q Values:  [-1361.14465131 -8521.23367799 63009.97479773  4428.07466584]
New Q values:  [  251.08137998 -8521.23367799 63009.97479773  4428.07466584]
Reward: -1  Episode Reward:  23
xxxxx
x.  x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[ 2605.11298674 -1416.94140093  2653.79746833  2179.39995143]
------
Step:18, Action:East
State  194
Old Q Values:  [ 2605.11298674 -1416.94140093  2653.79746833  2179.39995143]
New Q values:  [ 2605.11298674 -1416.94140093  2044.53999173  2179.39995143]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3278.73668132 2430.08576846  790.72804752 3114.26080587]
------
Step:19, Action:North
State  210
Old Q Values:  [3278.73668132 2430.08576846  790.72804752 3114.26080587]
New Q values:  [15194.3948184   2430.08576846   790.72804752  3114.26080587]
Reward: -1  Episode Reward:  21
xxxxx
x. ax
x.  x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[28683.54284864  3989.86286733  -180.00807518 46278.33381956]
------
Step:20, Action:West
State  130
Old Q Values:  [28683.54284864  3989.86286733  -180.00807518 46278.33381956]
New Q values:  [28683.54284864  3989.86286733  -180.00807518 50140.52140963]
Reward: -1  Episode Reward:  20
xxxxx
x.a x
xg  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 105432.62627268]
------
Step:21, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1936.61707065  1394.91007955]
New Q values:  [ -281.736      -1150.91067548  1936.61707065  2368.12412702]
Reward: 9  Episode Reward:  29
xxxxx
xa  x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 6015.866984    669.60476835 -252.78192178]
------
Step:22, Action:South
State  105
Old Q Values:  [-180.6        2237.91555819 -764.93196255    0.        ]
New Q values:  [-180.6        3004.31457295 -764.93196255    0.        ]
Reward: 9  Episode Reward:  38
xxxxx
x g x
xa  x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:NE
[ 357.2991616     0.         7012.49449892 -178.98      ]
------
Step:23, Action:East
State  185
Old Q Values:  [ 357.2991616     0.         7012.49449892 -178.98      ]
New Q values:  [ 357.2991616     0.         1200.78958956 -178.98      ]
Reward: -10001  Episode Reward:  -9963
xxxxx
x   x
x g x
x.  x
xxxxx
xxxxx
x.g.x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1.48618578e+03 1.23686189e+03 1.12477213e+04 3.33862213e+00]
------
Step:1, Action:East
State  181
Old Q Values:  [1.48618578e+03 1.23686189e+03 1.12477213e+04 3.33862213e+00]
New Q values:  [1486.18578139 1236.86188545  715.03544416    3.33862213]
Reward: -9991  Episode Reward:  -9991
xxxxx
x. .x
x g.x
x...x
xxxxx
xxxxx
x.a.x
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  4257.05764385  2024.51848008]
------
Step:1, Action:East
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  4257.05764385  2024.51848008]
New Q values:  [  -56.91790269 -1902.20915811  7787.32893451  2024.51848008]
Reward: 9  Episode Reward:  9
xxxxx
x. ax
x.. x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.02636863e+04 -3.22965309e-01  1.88689784e+03]
------
Step:2, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  2.02636863e+04 -3.22965309e-01  1.88689784e+03]
New Q values:  [ 1.06807480e+02  6.38380418e+03 -3.22965309e-01  1.88689784e+03]
Reward: -10001  Episode Reward:  -9992
xxxxx
x.  x
x..gx
x...x
xxxxx
xxxxx
x...x
xga.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3756.98497708 6916.34738543 8571.40937004 2453.92999194]
------
Step:1, Action:East
State  192
Old Q Values:  [3756.98497708 6916.34738543 8571.40937004 2453.92999194]
New Q values:  [3756.98497708 6916.34738543 7712.89342962 2453.92999194]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6910.95115679  5237.81540944 -4584.50430574 14263.09893867]
------
Step:2, Action:North
State  216
Old Q Values:  [1865.56426614 2914.66362424  606.149024   1026.86689092]
New Q values:  [2666.76696173 2914.66362424  606.149024   1026.86689092]
Reward: 9  Episode Reward:  18
xxxxx
x..ax
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.38380418e+03 -3.22965309e-01  1.88689784e+03]
------
Step:3, Action:West
State  136
Old Q Values:  [  878.22269011  1509.94727993   660.86649319 -4614.29946136]
New Q values:  [  878.22269011  1509.94727993   660.86649319 -1556.43868803]
Reward: 9  Episode Reward:  27
xxxxx
x.agx
x   x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8266.02376272   946.27032172]
------
Step:4, Action:West
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  7787.32893451  2024.51848008]
New Q values:  [  -56.91790269 -1902.20915811  7787.32893451  2570.04417931]
Reward: 9  Episode Reward:  36
xxxxx
xa  x
x  gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5849.45595761  476.21588959 -120.29354603]
------
Step:5, Action:South
State  107
Old Q Values:  [-252.35169558 6015.866984    669.60476835 -252.78192178]
New Q values:  [-252.35169558 2685.09244792  669.60476835 -252.78192178]
Reward: -1  Episode Reward:  35
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[836.45017667   0.         931.15218105   0.        ]
------
Step:6, Action:East
State  187
Old Q Values:  [836.45017667   0.         931.15218105   0.        ]
New Q values:  [836.45017667   0.         920.10649505   0.        ]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:7, Action:East
State  203
Old Q Values:  [3.60604218e+00 1.28201863e+04 2.89128867e+03 4.59156348e+03]
New Q values:  [3.60604218e+00 1.28201863e+04 3.05485090e+03 4.59156348e+03]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x  ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 677.77179188 6329.78476486    0.          919.64842823]
------
Step:8, Action:West
State  218
Old Q Values:  [ 677.77179188 6329.78476486    0.          919.64842823]
New Q values:  [ 677.77179188 6329.78476486    0.          915.50499392]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x a x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -8753.98842238  1827.48540877  1278.19575341]
------
Step:9, Action:East
State  200
Old Q Values:  [  169.9257398  14654.63929998  3719.17041281   610.93635926]
New Q values:  [  169.9257398  14654.63929998  2361.4672524    610.93635926]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2666.76696173 2914.66362424  606.149024   1026.86689092]
------
Step:10, Action:South
State  216
Old Q Values:  [2666.76696173 2914.66362424  606.149024   1026.86689092]
New Q values:  [2666.76696173 2905.7300753   606.149024   1026.86689092]
Reward: 9  Episode Reward:  40
xxxxx
x   x
x  gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5781.54875203 -4059.26960032  1453.99845283 -3235.3157062 ]
------
Step:11, Action:East
State  288
Old Q Values:  [ 5781.54875203 -4059.26960032  1453.99845283 -3235.3157062 ]
New Q values:  [ 5781.54875203 -4059.26960032 -3864.53599326 -3235.3157062 ]
Reward: -10301  Episode Reward:  -10261
xxxxx
x   x
x   x
x..gx
xxxxx
xxxxx
x..gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1486.18578139 1236.86188545  715.03544416    3.33862213]
------
Step:1, Action:North
State  181
Old Q Values:  [1486.18578139 1236.86188545  715.03544416    3.33862213]
New Q values:  [2354.71109984 1236.86188545  715.03544416    3.33862213]
Reward: 9  Episode Reward:  9
xxxxx
xa. x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5849.45595761  476.21588959 -120.29354603]
------
Step:2, Action:South
State  109
Old Q Values:  [ -241.10880094  1229.37887096 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  1197.56487833 -2165.66138672   232.50800947]
Reward: -1  Episode Reward:  8
xxxxx
x .gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2354.71109984 1236.86188545  715.03544416    3.33862213]
------
Step:3, Action:North
State  181
Old Q Values:  [2354.71109984 1236.86188545  715.03544416    3.33862213]
New Q values:  [2696.12122722 1236.86188545  715.03544416    3.33862213]
Reward: -1  Episode Reward:  7
xxxxx
xa. x
x .gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5849.45595761  476.21588959 -120.29354603]
------
Step:4, Action:South
State  111
Old Q Values:  [-177.44732869 5849.45595761  476.21588959 -120.29354603]
New Q values:  [-177.44732869 3148.01875121  476.21588959 -120.29354603]
Reward: -1  Episode Reward:  6
xxxxx
x . x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2696.12122722 1236.86188545  715.03544416    3.33862213]
------
Step:5, Action:North
State  180
Old Q Values:  [  544.37199765  3060.24711158  7519.94218783 -4966.32149798]
New Q values:  [  725.90386488  3060.24711158  7519.94218783 -4966.32149798]
Reward: -1  Episode Reward:  5
xxxxx
xa. x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1695.85021939  472.79132618 -180.6       ]
------
Step:6, Action:East
State  111
Old Q Values:  [-177.44732869 3148.01875121  476.21588959 -120.29354603]
New Q values:  [-177.44732869 3148.01875121  906.32359394 -120.29354603]
Reward: 9  Episode Reward:  14
xxxxx
x a x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1936.61707065  2368.12412702]
------
Step:7, Action:West
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1695.33426043 -4318.40133077]
New Q values:  [-9594.56523706 -8069.05606225  1695.33426043 -1368.69106881]
Reward: -1  Episode Reward:  13
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1197.56487833 -2165.66138672   232.50800947]
------
Step:8, Action:South
State  109
Old Q Values:  [ -241.10880094  1197.56487833 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  1287.2623195  -2165.66138672   232.50800947]
Reward: -1  Episode Reward:  12
xxxxx
x  gx
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2696.12122722 1236.86188545  715.03544416    3.33862213]
------
Step:9, Action:North
State  181
Old Q Values:  [2696.12122722 1236.86188545  715.03544416    3.33862213]
New Q values:  [1464.02718674 1236.86188545  715.03544416    3.33862213]
Reward: -1  Episode Reward:  11
xxxxx
xag x
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1287.2623195  -2165.66138672   232.50800947]
------
Step:10, Action:South
State  111
Old Q Values:  [-177.44732869 3148.01875121  906.32359394 -120.29354603]
New Q values:  [-177.44732869 5486.20867956  906.32359394 -120.29354603]
Reward: -1  Episode Reward:  10
xxxxx
x   x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2576.3625007  14092.00393025   154.04646645]
------
Step:11, Action:South
State  189
Old Q Values:  [  533.05203844  2576.3625007  14092.00393025   154.04646645]
New Q values:  [  533.05203844  2460.0577575  14092.00393025   154.04646645]
Reward: 9  Episode Reward:  19
xxxxx
x   x
x .gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[4747.04252408 1352.37702619 3445.02169795  -12.17474163]
------
Step:12, Action:North
State  261
Old Q Values:  [4747.04252408 1352.37702619 3445.02169795  -12.17474163]
New Q values:  [6125.81818871 1352.37702619 3445.02169795  -12.17474163]
Reward: -1  Episode Reward:  18
xxxxx
x   x
xag.x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SE ghost_dir:NE
[  533.05203844  2460.0577575  14092.00393025   154.04646645]
------
Step:13, Action:South
State  180
Old Q Values:  [  725.90386488  3060.24711158  7519.94218783 -4966.32149798]
New Q values:  [  725.90386488  2227.35540285  7519.94218783 -4966.32149798]
Reward: -1  Episode Reward:  17
xxxxx
x   x
xg..x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NW
[-7643.81886164 -5704.51612281  3346.18852738 -5679.36893145]
------
Step:14, Action:East
State  260
Old Q Values:  [-7643.81886164 -5704.51612281  3346.18852738 -5679.36893145]
New Q values:  [-7643.81886164 -5704.51612281  2636.85720615 -5679.36893145]
Reward: 9  Episode Reward:  26
xxxxx
x   x
x ..x
xga.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NW
[-2561.28592178 -5807.06396197  4309.93931733  2361.76105495]
------
Step:15, Action:East
State  272
Old Q Values:  [  251.08137998 -8521.23367799 63009.97479773  4428.07466584]
New Q values:  [  251.08137998 -8521.23367799 26943.8545447   4428.07466584]
Reward: 9  Episode Reward:  35
xxxxx
x   x
xg..x
x  ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5781.54875203 -4059.26960032 -3864.53599326 -3235.3157062 ]
------
Step:16, Action:North
State  288
Old Q Values:  [ 5781.54875203 -4059.26960032 -3864.53599326 -3235.3157062 ]
New Q values:  [ 6876.33794633 -4059.26960032 -3864.53599326 -3235.3157062 ]
Reward: 9  Episode Reward:  44
xxxxx
x   x
x .ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[15194.3948184   2430.08576846   790.72804752  3114.26080587]
------
Step:17, Action:North
State  208
Old Q Values:  [ 6910.95115679  5237.81540944 -4584.50430574 14263.09893867]
New Q values:  [ 4678.92171799  5237.81540944 -4584.50430574 14263.09893867]
Reward: -1  Episode Reward:  43
xxxxx
x  ax
xg. x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.38380418e+03 -3.22965309e-01  1.88689784e+03]
------
Step:18, Action:South
State  136
Old Q Values:  [  878.22269011  1509.94727993   660.86649319 -1556.43868803]
New Q values:  [  878.22269011  4882.30859357   660.86649319 -1556.43868803]
Reward: -1  Episode Reward:  42
xxxxx
xg  x
x .ax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4678.92171799  5237.81540944 -4584.50430574 14263.09893867]
------
Step:19, Action:West
State  208
Old Q Values:  [ 4678.92171799  5237.81540944 -4584.50430574 14263.09893867]
New Q values:  [ 4678.92171799  5237.81540944 -4584.50430574 68024.50760435]
Reward: 100009  Episode Reward:  100051
xxxxx
x   x
xga x
x   x
xxxxx
xxxxx
xg..x
x...x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  251.08137998 -8521.23367799 26943.8545447   4428.07466584]
------
Step:1, Action:East
State  272
Old Q Values:  [  251.08137998 -8521.23367799 26943.8545447   4428.07466584]
New Q values:  [  251.08137998 -8521.23367799 12845.84320178  4428.07466584]
Reward: 9  Episode Reward:  9
xxxxx
x ..x
xg..x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6876.33794633 -4059.26960032 -3864.53599326 -3235.3157062 ]
------
Step:2, Action:North
State  288
Old Q Values:  [ 6876.33794633 -4059.26960032 -3864.53599326 -3235.3157062 ]
New Q values:  [ 7314.25362405 -4059.26960032 -3864.53599326 -3235.3157062 ]
Reward: 9  Episode Reward:  18
xxxxx
x ..x
x..ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[15194.3948184   2430.08576846   790.72804752  3114.26080587]
------
Step:3, Action:North
State  210
Old Q Values:  [15194.3948184   2430.08576846   790.72804752  3114.26080587]
New Q values:  [7998.29918263 2430.08576846  790.72804752 3114.26080587]
Reward: 9  Episode Reward:  27
xxxxx
x .ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.38380418e+03 -3.22965309e-01  1.88689784e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  6.38380418e+03 -3.22965309e-01  1.88689784e+03]
New Q values:  [ 1.06807480e+02  2.29602740e+04 -3.22965309e-01  1.88689784e+03]
Reward: -1  Episode Reward:  26
xxxxx
x . x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4678.92171799  5237.81540944 -4584.50430574 68024.50760435]
------
Step:5, Action:South
State  208
Old Q Values:  [ 4678.92171799  5237.81540944 -4584.50430574 68024.50760435]
New Q values:  [ 4678.92171799  4288.80225099 -4584.50430574 68024.50760435]
Reward: -1  Episode Reward:  25
xxxxx
x . x
xg. x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7314.25362405 -4059.26960032 -3864.53599326 -3235.3157062 ]
------
Step:6, Action:North
State  288
Old Q Values:  [ 7314.25362405 -4059.26960032 -3864.53599326 -3235.3157062 ]
New Q values:  [23332.45373093 -4059.26960032 -3864.53599326 -3235.3157062 ]
Reward: -1  Episode Reward:  24
xxxxx
x . x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 4678.92171799  4288.80225099 -4584.50430574 68024.50760435]
------
Step:7, Action:North
State  208
Old Q Values:  [ 4678.92171799  4288.80225099 -4584.50430574 68024.50760435]
New Q values:  [ 8759.0508737   4288.80225099 -4584.50430574 68024.50760435]
Reward: -1  Episode Reward:  23
xxxxx
x .ax
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.29602740e+04 -3.22965309e-01  1.88689784e+03]
------
Step:8, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  2.29602740e+04 -3.22965309e-01  1.88689784e+03]
New Q values:  [ 1.06807480e+02  2.95908619e+04 -3.22965309e-01  1.88689784e+03]
Reward: -1  Episode Reward:  22
xxxxx
x . x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 8759.0508737   4288.80225099 -4584.50430574 68024.50760435]
------
Step:9, Action:North
State  208
Old Q Values:  [ 8759.0508737   4288.80225099 -4584.50430574 68024.50760435]
New Q values:  [12380.27890847  4288.80225099 -4584.50430574 68024.50760435]
Reward: -1  Episode Reward:  21
xxxxx
x .ax
x..gx
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.95908619e+04 -3.22965309e-01  1.88689784e+03]
------
Step:10, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  2.95908619e+04 -3.22965309e-01  1.88689784e+03]
New Q values:  [ 1.06807480e+02  2.95908619e+04 -3.22965309e-01  1.47059637e+03]
Reward: 9  Episode Reward:  30
xxxxx
x a x
x.g x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1936.61707065  2368.12412702]
------
Step:11, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  1936.61707065  2368.12412702]
New Q values:  [ -281.736      -1150.91067548  1936.61707065  1135.87230585]
Reward: -1  Episode Reward:  29
xxxxx
xa  x
xg. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SW
[ -180.6        -6764.65639938   630.74218347  -180.6       ]
------
Step:12, Action:East
State  106
Old Q Values:  [ -180.6        -6764.65639938   630.74218347  -180.6       ]
New Q values:  [ -180.6        -6764.65639938   832.68199458  -180.6       ]
Reward: -1  Episode Reward:  28
xxxxx
x a x
x.. x
xg  x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  1936.61707065  1135.87230585]
------
Step:13, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  1936.61707065  1135.87230585]
New Q values:  [ -281.736      -1150.91067548  9651.30538725  1135.87230585]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x.. x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.95908619e+04 -3.22965309e-01  1.47059637e+03]
------
Step:14, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  2.95908619e+04 -3.22965309e-01  1.47059637e+03]
New Q values:  [ 1.06807480e+02  3.22430970e+04 -3.22965309e-01  1.47059637e+03]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[12380.27890847  4288.80225099 -4584.50430574 68024.50760435]
------
Step:15, Action:North
State  208
Old Q Values:  [12380.27890847  4288.80225099 -4584.50430574 68024.50760435]
New Q values:  [ 6416.20414146  4288.80225099 -4584.50430574 68024.50760435]
Reward: -1  Episode Reward:  25
xxxxx
x gax
x.. x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011  4882.30859357   660.86649319 -1556.43868803]
------
Step:16, Action:South
State  136
Old Q Values:  [  878.22269011  4882.30859357   660.86649319 -1556.43868803]
New Q values:  [  878.22269011 22359.67571874   660.86649319 -1556.43868803]
Reward: -1  Episode Reward:  24
xxxxx
xg  x
x..ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6416.20414146  4288.80225099 -4584.50430574 68024.50760435]
------
Step:17, Action:West
State  216
Old Q Values:  [2666.76696173 2905.7300753   606.149024   1026.86689092]
New Q values:  [2666.76696173 2905.7300753   606.149024   4812.53854636]
Reward: 9  Episode Reward:  33
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  14654.63929998  2361.4672524    610.93635926]
------
Step:18, Action:South
State  194
Old Q Values:  [ 2605.11298674 -1416.94140093  2044.53999173  2179.39995143]
New Q values:  [2605.11298674 3286.37640016 2044.53999173 2179.39995143]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x.  x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  251.08137998 -8521.23367799 12845.84320178  4428.07466584]
------
Step:19, Action:East
State  272
Old Q Values:  [  251.08137998 -8521.23367799 12845.84320178  4428.07466584]
New Q values:  [  251.08137998 -8521.23367799 12137.47339999  4428.07466584]
Reward: -1  Episode Reward:  31
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[23332.45373093 -4059.26960032 -3864.53599326 -3235.3157062 ]
------
Step:20, Action:North
State  288
Old Q Values:  [23332.45373093 -4059.26960032 -3864.53599326 -3235.3157062 ]
New Q values:  [29739.73377368 -4059.26960032 -3864.53599326 -3235.3157062 ]
Reward: -1  Episode Reward:  30
xxxxx
xg  x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6416.20414146  4288.80225099 -4584.50430574 68024.50760435]
------
Step:21, Action:West
State  216
Old Q Values:  [2666.76696173 2905.7300753   606.149024   4812.53854636]
New Q values:  [2666.76696173 2905.7300753   606.149024   6320.80720854]
Reward: -1  Episode Reward:  29
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  14654.63929998  2361.4672524    610.93635926]
------
Step:22, Action:South
State  194
Old Q Values:  [2605.11298674 3286.37640016 2044.53999173 2179.39995143]
New Q values:  [2605.11298674 4955.19258006 2044.53999173 2179.39995143]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x.  x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[  251.08137998 -8521.23367799 12137.47339999  4428.07466584]
------
Step:23, Action:East
State  272
Old Q Values:  [  251.08137998 -8521.23367799 12137.47339999  4428.07466584]
New Q values:  [  251.08137998 -8521.23367799 13776.3094921   4428.07466584]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[29739.73377368 -4059.26960032 -3864.53599326 -3235.3157062 ]
------
Step:24, Action:North
State  288
Old Q Values:  [29739.73377368 -4059.26960032 -3864.53599326 -3235.3157062 ]
New Q values:  [32302.64579078 -4059.26960032 -3864.53599326 -3235.3157062 ]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 6416.20414146  4288.80225099 -4584.50430574 68024.50760435]
------
Step:25, Action:North
State  208
Old Q Values:  [ 6416.20414146  4288.80225099 -4584.50430574 68024.50760435]
New Q values:  [ 9273.7843722   4288.80225099 -4584.50430574 68024.50760435]
Reward: -1  Episode Reward:  25
xxxxx
x gax
x.  x
x.  x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011 22359.67571874   660.86649319 -1556.43868803]
------
Step:26, Action:South
State  136
Old Q Values:  [  878.22269011 22359.67571874   660.86649319 -1556.43868803]
New Q values:  [  878.22269011 29350.6225688    660.86649319 -1556.43868803]
Reward: -1  Episode Reward:  24
xxxxx
xg  x
x. ax
x.  x
xxxxx
Step:27, Action:East
State  208
Old Q Values:  [ 9273.7843722   4288.80225099 -4584.50430574 68024.50760435]
New Q values:  [ 9273.7843722   4288.80225099 18392.95055901 68024.50760435]
Reward: -301  Episode Reward:  -277
xxxxx
x g x
x. ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[ 9273.7843722   4288.80225099 18392.95055901 68024.50760435]
------
Step:28, Action:West
State  208
Old Q Values:  [ 9273.7843722   4288.80225099 18392.95055901 68024.50760435]
New Q values:  [ 9273.7843722   4288.80225099 18392.95055901 23523.07107063]
Reward: -10001  Episode Reward:  -10278
xxxxx
x   x
x.g x
x.  x
xxxxx
xxxxx
x...x
xag.x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1464.02718674 1236.86188545  715.03544416    3.33862213]
------
Step:1, Action:North
State  181
Old Q Values:  [1464.02718674 1236.86188545  715.03544416    3.33862213]
New Q values:  [ 977.18957054 1236.86188545  715.03544416    3.33862213]
Reward: 9  Episode Reward:  9
xxxxx
xag.x
x  .x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094  1287.2623195  -2165.66138672   232.50800947]
------
Step:2, Action:South
State  109
Old Q Values:  [ -241.10880094  1287.2623195  -2165.66138672   232.50800947]
New Q values:  [ -241.10880094   885.36349343 -2165.66138672   232.50800947]
Reward: -1  Episode Reward:  8
xxxxx
x .gx
xa .x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 977.18957054 1236.86188545  715.03544416    3.33862213]
------
Step:3, Action:South
State  181
Old Q Values:  [ 977.18957054 1236.86188545  715.03544416    3.33862213]
New Q values:  [ 977.18957054 2337.89021079  715.03544416    3.33862213]
Reward: 9  Episode Reward:  17
xxxxx
x g.x
x  .x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[6125.81818871 1352.37702619 3445.02169795  -12.17474163]
------
Step:4, Action:North
State  261
Old Q Values:  [6125.81818871 1352.37702619 3445.02169795  -12.17474163]
New Q values:  [3151.09433872 1352.37702619 3445.02169795  -12.17474163]
Reward: -1  Episode Reward:  16
xxxxx
x .gx
xa .x
x ..x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[ 977.18957054 2337.89021079  715.03544416    3.33862213]
------
Step:5, Action:South
State  181
Old Q Values:  [ 977.18957054 2337.89021079  715.03544416    3.33862213]
New Q values:  [ 977.18957054 1968.0625937   715.03544416    3.33862213]
Reward: -1  Episode Reward:  15
xxxxx
x ..x
x  gx
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3151.09433872 1352.37702619 3445.02169795  -12.17474163]
------
Step:6, Action:East
State  261
Old Q Values:  [3151.09433872 1352.37702619 3445.02169795  -12.17474163]
New Q values:  [3151.09433872 1352.37702619 2149.98771624  -12.17474163]
Reward: 9  Episode Reward:  24
xxxxx
x ..x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 5.04537534e+02 2.28379178e+03 2.55526346e+03]
------
Step:7, Action:West
State  277
Old Q Values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 2.55526346e+03]
New Q values:  [1.64433000e+00 5.04537534e+02 2.28379178e+03 1.96683368e+03]
Reward: -1  Episode Reward:  23
xxxxx
x ..x
x  gx
xa .x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[3151.09433872 1352.37702619 2149.98771624  -12.17474163]
------
Step:8, Action:North
State  261
Old Q Values:  [3151.09433872 1352.37702619 2149.98771624  -12.17474163]
New Q values:  [2689.16578815 1352.37702619 2149.98771624  -12.17474163]
Reward: -1  Episode Reward:  22
xxxxx
x ..x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2049.1969658  1771.27760536 4764.4268422  1554.80203889]
------
Step:9, Action:East
State  183
Old Q Values:  [2049.1969658  1771.27760536 4764.4268422  1554.80203889]
New Q values:  [2049.1969658  1771.27760536 3829.4794939  1554.80203889]
Reward: -1  Episode Reward:  21
xxxxx
x ..x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[3925.89935061 -200.61022961 6414.36252342    0.        ]
------
Step:10, Action:East
State  194
Old Q Values:  [2605.11298674 4955.19258006 2044.53999173 2179.39995143]
New Q values:  [2605.11298674 4955.19258006 3222.70575148 2179.39995143]
Reward: 9  Episode Reward:  30
xxxxx
x ..x
x  ax
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7998.29918263 2430.08576846  790.72804752 3114.26080587]
------
Step:11, Action:North
State  208
Old Q Values:  [ 9273.7843722   4288.80225099 18392.95055901 23523.07107063]
New Q values:  [18757.07017177  4288.80225099 18392.95055901 23523.07107063]
Reward: 9  Episode Reward:  39
xxxxx
x .ax
xg  x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[28683.54284864  3989.86286733  -180.00807518 50140.52140963]
------
Step:12, Action:West
State  136
Old Q Values:  [  878.22269011 29350.6225688    660.86649319 -1556.43868803]
New Q values:  [  878.22269011 29350.6225688    660.86649319  -215.06891393]
Reward: 9  Episode Reward:  48
xxxxx
xga x
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SE ghost_dir:NW
[    0.          1166.51141701  1340.35520428 -4254.91390044]
------
Step:13, Action:East
State  124
Old Q Values:  [    0.          1166.51141701  1340.35520428 -4254.91390044]
New Q values:  [    0.          1166.51141701  9340.72885235 -4254.91390044]
Reward: -1  Episode Reward:  47
xxxxx
x gax
x   x
x  .x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011 29350.6225688    660.86649319  -215.06891393]
------
Step:14, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  3.22430970e+04 -3.22965309e-01  1.47059637e+03]
New Q values:  [ 1.06807480e+02  1.47928810e+04 -3.22965309e-01  1.47059637e+03]
Reward: -1  Episode Reward:  46
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2666.76696173 2905.7300753   606.149024   6320.80720854]
------
Step:15, Action:South
State  208
Old Q Values:  [18757.07017177  4288.80225099 18392.95055901 23523.07107063]
New Q values:  [18757.07017177 71411.71463763 18392.95055901 23523.07107063]
Reward: 100009  Episode Reward:  100055
xxxxx
x   x
xg  x
x  ax
xxxxx
xxxxx
x...x
xa..x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2049.1969658  1771.27760536 3829.4794939  1554.80203889]
------
Step:1, Action:East
State  183
Old Q Values:  [2049.1969658  1771.27760536 3829.4794939  1554.80203889]
New Q values:  [2049.1969658  1771.27760536 6346.14695411 1554.80203889]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x a.x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SE
[11645.51215632 14526.25624193 16029.85052182  1169.39963074]
------
Step:2, Action:East
State  194
Old Q Values:  [2605.11298674 4955.19258006 3222.70575148 2179.39995143]
New Q values:  [2605.11298674 4955.19258006 3693.97205538 2179.39995143]
Reward: 9  Episode Reward:  18
xxxxx
x...x
x  ax
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[7998.29918263 2430.08576846  790.72804752 3114.26080587]
------
Step:3, Action:North
State  210
Old Q Values:  [7998.29918263 2430.08576846  790.72804752 3114.26080587]
New Q values:  [18246.87609594  2430.08576846   790.72804752  3114.26080587]
Reward: 9  Episode Reward:  27
xxxxx
x..ax
x   x
x. gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[28683.54284864  3989.86286733  -180.00807518 50140.52140963]
------
Step:4, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  1.47928810e+04 -3.22965309e-01  1.47059637e+03]
New Q values:  [ 1.06807480e+02  1.47928810e+04 -3.22965309e-01  3.22234264e+04]
Reward: 9  Episode Reward:  36
xxxxx
x.a x
x   x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SW
[  -180.6          3557.6642036   49543.89769946 105432.62627268]
------
Step:5, Action:West
State  122
Old Q Values:  [ -281.736      -1150.91067548  9651.30538725  1135.87230585]
New Q values:  [ -281.736      -1150.91067548  9651.30538725  1265.27665671]
Reward: 9  Episode Reward:  45
xxxxx
xa  x
x g x
x. .x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 2685.09244792  669.60476835 -252.78192178]
------
Step:6, Action:South
State  107
Old Q Values:  [-252.35169558 2685.09244792  669.60476835 -252.78192178]
New Q values:  [-252.35169558 1349.46892768  669.60476835 -252.78192178]
Reward: -1  Episode Reward:  44
xxxxx
x   x
xa  x
x.g.x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[836.45017667   0.         920.10649505   0.        ]
------
Step:7, Action:East
State  187
Old Q Values:  [836.45017667   0.         920.10649505   0.        ]
New Q values:  [ 836.45017667    0.         4213.49848903    0.        ]
Reward: -1  Episode Reward:  43
xxxxx
x   x
x a x
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SE
[3.60604218e+00 1.28201863e+04 3.05485090e+03 4.59156348e+03]
------
Step:8, Action:South
State  202
Old Q Values:  [    0.         -8753.98842238  1827.48540877  1278.19575341]
New Q values:  [    0.         -5369.30252132  1827.48540877  1278.19575341]
Reward: -10001  Episode Reward:  -9958
xxxxx
x   x
x   x
x.g.x
xxxxx
xxxxx
xa..x
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SW
[-239.29051573 1695.85021939  472.79132618 -180.6       ]
------
Step:1, Action:East
State  111
Old Q Values:  [-177.44732869 5486.20867956  906.32359394 -120.29354603]
New Q values:  [-177.44732869 5486.20867956 3263.32105375 -120.29354603]
Reward: 9  Episode Reward:  9
xxxxx
x a.x
x g.x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  9651.30538725  1265.27665671]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  1695.33426043 -1368.69106881]
New Q values:  [-9594.56523706 -8069.05606225  9488.72047481 -1368.69106881]
Reward: 9  Episode Reward:  18
xxxxx
x gax
x ..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011 29350.6225688    660.86649319  -215.06891393]
------
Step:3, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  1.47928810e+04 -3.22965309e-01  3.22234264e+04]
New Q values:  [ 1.06807480e+02  7.81879455e+03 -3.22965309e-01  3.22234264e+04]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2666.76696173 2905.7300753   606.149024   6320.80720854]
------
Step:4, Action:South
State  216
Old Q Values:  [2666.76696173 2905.7300753   606.149024   6320.80720854]
New Q values:  [ 2666.76696173 10858.48576735   606.149024    6320.80720854]
Reward: 9  Episode Reward:  36
xxxxx
x   x
xg. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[32302.64579078 -4059.26960032 -3864.53599326 -3235.3157062 ]
------
Step:5, Action:North
State  288
Old Q Values:  [32302.64579078 -4059.26960032 -3864.53599326 -3235.3157062 ]
New Q values:  [18394.52114509 -4059.26960032 -3864.53599326 -3235.3157062 ]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x .ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[18246.87609594  2430.08576846   790.72804752  3114.26080587]
------
Step:6, Action:North
State  210
Old Q Values:  [18246.87609594  2430.08576846   790.72804752  3114.26080587]
New Q values:  [16965.17836776  2430.08576846   790.72804752  3114.26080587]
Reward: -1  Episode Reward:  34
xxxxx
x  ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  7.81879455e+03 -3.22965309e-01  3.22234264e+04]
------
Step:7, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  7.81879455e+03 -3.22965309e-01  3.22234264e+04]
New Q values:  [ 1.06807480e+02  7.81879455e+03 -3.22965309e-01  1.52249693e+04]
Reward: -1  Episode Reward:  33
xxxxx
x a x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  7787.32893451  2570.04417931]
------
Step:8, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  9651.30538725  1265.27665671]
New Q values:  [ -281.736      -1150.91067548  8427.41293076  1265.27665671]
Reward: -1  Episode Reward:  32
xxxxx
x  ax
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  7.81879455e+03 -3.22965309e-01  1.52249693e+04]
------
Step:9, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  7.81879455e+03 -3.22965309e-01  1.52249693e+04]
New Q values:  [ 1.06807480e+02  7.81879455e+03 -3.22965309e-01  8.42558638e+03]
Reward: -1  Episode Reward:  31
xxxxx
x a x
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  7787.32893451  2570.04417931]
------
Step:10, Action:East
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  7787.32893451  2570.04417931]
New Q values:  [  -56.91790269 -1902.20915811  5642.00748826  2570.04417931]
Reward: -1  Episode Reward:  30
xxxxx
x  ax
x . x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  7.81879455e+03 -3.22965309e-01  8.42558638e+03]
------
Step:11, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  7.81879455e+03 -3.22965309e-01  8.42558638e+03]
New Q values:  [ 1.06807480e+02  7.81879455e+03 -3.22965309e-01  5.89785843e+03]
Reward: -1  Episode Reward:  29
xxxxx
x a x
x . x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  8427.41293076  1265.27665671]
------
Step:12, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  8427.41293076  1265.27665671]
New Q values:  [ -281.736      -1150.91067548  5716.00353786  1265.27665671]
Reward: -1  Episode Reward:  28
xxxxx
x  ax
x . x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  7.81879455e+03 -3.22965309e-01  5.89785843e+03]
------
Step:13, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  7.81879455e+03 -3.22965309e-01  5.89785843e+03]
New Q values:  [ 1.06807480e+02  6.38446355e+03 -3.22965309e-01  5.89785843e+03]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2666.76696173 10858.48576735   606.149024    6320.80720854]
------
Step:14, Action:South
State  210
Old Q Values:  [16965.17836776  2430.08576846   790.72804752  3114.26080587]
New Q values:  [16965.17836776  6489.79065091   790.72804752  3114.26080587]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x . x
xg.ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[18394.52114509 -4059.26960032 -3864.53599326 -3235.3157062 ]
------
Step:15, Action:North
State  288
Old Q Values:  [18394.52114509 -4059.26960032 -3864.53599326 -3235.3157062 ]
New Q values:  [12446.76196837 -4059.26960032 -3864.53599326 -3235.3157062 ]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x .ax
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[16965.17836776  6489.79065091   790.72804752  3114.26080587]
------
Step:16, Action:North
State  210
Old Q Values:  [16965.17836776  6489.79065091   790.72804752  3114.26080587]
New Q values:  [8700.81041239 6489.79065091  790.72804752 3114.26080587]
Reward: -1  Episode Reward:  24
xxxxx
x  ax
x . x
xg. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  6.38446355e+03 -3.22965309e-01  5.89785843e+03]
------
Step:17, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  6.38446355e+03 -3.22965309e-01  5.89785843e+03]
New Q values:  [ 1.06807480e+02  5.81073115e+03 -3.22965309e-01  5.89785843e+03]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xg.ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[ 2666.76696173 10858.48576735   606.149024    6320.80720854]
------
Step:18, Action:South
State  216
Old Q Values:  [ 2666.76696173 10858.48576735   606.149024    6320.80720854]
New Q values:  [2666.76696173 8076.82289745  606.149024   6320.80720854]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x g x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[12446.76196837 -4059.26960032 -3864.53599326 -3235.3157062 ]
------
Step:19, Action:North
State  288
Old Q Values:  [12446.76196837 -4059.26960032 -3864.53599326 -3235.3157062 ]
New Q values:  [ 1401.15165658 -4059.26960032 -3864.53599326 -3235.3157062 ]
Reward: -10001  Episode Reward:  -9979
xxxxx
x   x
x .gx
x.. x
xxxxx
xxxxx
x.g.x
x...x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 1401.15165658 -4059.26960032 -3864.53599326 -3235.3157062 ]
------
Step:1, Action:North
State  288
Old Q Values:  [ 1401.15165658 -4059.26960032 -3864.53599326 -3235.3157062 ]
New Q values:  [21989.37505392 -4059.26960032 -3864.53599326 -3235.3157062 ]
Reward: 9  Episode Reward:  9
xxxxx
xg .x
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18757.07017177 71411.71463763 18392.95055901 23523.07107063]
------
Step:2, Action:South
State  208
Old Q Values:  [18757.07017177 71411.71463763 18392.95055901 23523.07107063]
New Q values:  [18757.07017177 35160.89837123 18392.95055901 23523.07107063]
Reward: -1  Episode Reward:  8
xxxxx
x.g.x
x.. x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[21989.37505392 -4059.26960032 -3864.53599326 -3235.3157062 ]
------
Step:3, Action:North
State  288
Old Q Values:  [21989.37505392 -4059.26960032 -3864.53599326 -3235.3157062 ]
New Q values:  [19343.41953294 -4059.26960032 -3864.53599326 -3235.3157062 ]
Reward: -1  Episode Reward:  7
xxxxx
x. gx
x..ax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18757.07017177 35160.89837123 18392.95055901 23523.07107063]
------
Step:4, Action:South
State  208
Old Q Values:  [18757.07017177 35160.89837123 18392.95055901 23523.07107063]
New Q values:  [18757.07017177 19866.78520837 18392.95055901 23523.07107063]
Reward: -1  Episode Reward:  6
xxxxx
x. .x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19343.41953294 -4059.26960032 -3864.53599326 -3235.3157062 ]
------
Step:5, Action:West
State  288
Old Q Values:  [19343.41953294 -4059.26960032 -3864.53599326 -3235.3157062 ]
New Q values:  [19343.41953294 -4059.26960032 -3864.53599326  4078.90734946]
Reward: 9  Episode Reward:  15
xxxxx
x. gx
x.. x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[17892.11210647  1201.72649971  3797.74781399 15413.07438253]
------
Step:6, Action:North
State  272
Old Q Values:  [  251.08137998 -8521.23367799 13776.3094921   4428.07466584]
New Q values:  [ 2419.70058088 -8521.23367799 13776.3094921   4428.07466584]
Reward: 9  Episode Reward:  24
xxxxx
x.g.x
x.a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NW
[3756.98497708 6916.34738543 7712.89342962 2453.92999194]
------
Step:7, Action:East
State  192
Old Q Values:  [3756.98497708 6916.34738543 7712.89342962 2453.92999194]
New Q values:  [ 3756.98497708  6916.34738543 10141.47869304  2453.92999194]
Reward: -1  Episode Reward:  23
xxxxx
x. .x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18757.07017177 19866.78520837 18392.95055901 23523.07107063]
------
Step:8, Action:South
State  208
Old Q Values:  [18757.07017177 19866.78520837 18392.95055901 23523.07107063]
New Q values:  [18757.07017177 13749.13994323 18392.95055901 23523.07107063]
Reward: -1  Episode Reward:  22
xxxxx
x. .x
x. gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19343.41953294 -4059.26960032 -3864.53599326  4078.90734946]
------
Step:9, Action:West
State  288
Old Q Values:  [19343.41953294 -4059.26960032 -3864.53599326  4078.90734946]
New Q values:  [19343.41953294 -4059.26960032 -3864.53599326  6998.59657172]
Reward: -1  Episode Reward:  21
xxxxx
x. gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[17892.11210647  1201.72649971  3797.74781399 15413.07438253]
------
Step:10, Action:North
State  273
Old Q Values:  [17892.11210647  1201.72649971  3797.74781399 15413.07438253]
New Q values:  [18705.59754651  1201.72649971  3797.74781399 15413.07438253]
Reward: -1  Episode Reward:  20
xxxxx
x. .x
x.agx
x.  x
xxxxx
Step:11, Action:East
State  192
Old Q Values:  [ 3756.98497708  6916.34738543 10141.47869304  2453.92999194]
New Q values:  [3756.98497708 6916.34738543 1098.43508512 2453.92999194]
Reward: -10001  Episode Reward:  -9981
xxxxx
x. .x
x.g x
x.  x
xxxxx
xxxxx
x...x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18757.07017177 13749.13994323 18392.95055901 23523.07107063]
------
Step:1, Action:North
State  216
Old Q Values:  [2666.76696173 8076.82289745  606.149024   6320.80720854]
New Q values:  [2841.46431424 8076.82289745  606.149024   6320.80720854]
Reward: 9  Episode Reward:  9
xxxxx
x..ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  5.81073115e+03 -3.22965309e-01  5.89785843e+03]
------
Step:2, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  5.81073115e+03 -3.22965309e-01  5.89785843e+03]
New Q values:  [ 1.06807480e+02  5.81073115e+03 -3.22965309e-01  4.07934443e+03]
Reward: 9  Episode Reward:  18
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  5716.00353786  1265.27665671]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  5716.00353786  1265.27665671]
New Q values:  [ -281.736      -1150.91067548  4029.02076032  1265.27665671]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
xg  x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  5.81073115e+03 -3.22965309e-01  4.07934443e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  5.81073115e+03 -3.22965309e-01  4.07934443e+03]
New Q values:  [ 1.06807480e+02  4.74673933e+03 -3.22965309e-01  4.07934443e+03]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2841.46431424 8076.82289745  606.149024   6320.80720854]
------
Step:5, Action:South
State  210
Old Q Values:  [8700.81041239 6489.79065091  790.72804752 3114.26080587]
New Q values:  [8700.81041239 8404.34212025  790.72804752 3114.26080587]
Reward: 9  Episode Reward:  25
xxxxx
x.  x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[19343.41953294 -4059.26960032 -3864.53599326  6998.59657172]
------
Step:6, Action:North
State  288
Old Q Values:  [19343.41953294 -4059.26960032 -3864.53599326  6998.59657172]
New Q values:  [10347.01093689 -4059.26960032 -3864.53599326  6998.59657172]
Reward: -1  Episode Reward:  24
xxxxx
x.  x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[8700.81041239 8404.34212025  790.72804752 3114.26080587]
------
Step:7, Action:North
State  210
Old Q Values:  [8700.81041239 8404.34212025  790.72804752 3114.26080587]
New Q values:  [4903.7459638  8404.34212025  790.72804752 3114.26080587]
Reward: -1  Episode Reward:  23
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  4.74673933e+03 -3.22965309e-01  4.07934443e+03]
------
Step:8, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  4.74673933e+03 -3.22965309e-01  4.07934443e+03]
New Q values:  [ 1.06807480e+02  4.41939837e+03 -3.22965309e-01  4.07934443e+03]
Reward: -1  Episode Reward:  22
xxxxx
x.  x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4903.7459638  8404.34212025  790.72804752 3114.26080587]
------
Step:9, Action:South
State  210
Old Q Values:  [4903.7459638  8404.34212025  790.72804752 3114.26080587]
New Q values:  [4903.7459638  6465.24012917  790.72804752 3114.26080587]
Reward: -1  Episode Reward:  21
xxxxx
x.  x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[10347.01093689 -4059.26960032 -3864.53599326  6998.59657172]
------
Step:10, Action:North
State  288
Old Q Values:  [10347.01093689 -4059.26960032 -3864.53599326  6998.59657172]
New Q values:  [ 6077.77641351 -4059.26960032 -3864.53599326  6998.59657172]
Reward: -1  Episode Reward:  20
xxxxx
x.  x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4903.7459638  6465.24012917  790.72804752 3114.26080587]
------
Step:11, Action:South
State  210
Old Q Values:  [4903.7459638  6465.24012917  790.72804752 3114.26080587]
New Q values:  [4903.7459638  4685.07502318  790.72804752 3114.26080587]
Reward: -1  Episode Reward:  19
xxxxx
x.  x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 6077.77641351 -4059.26960032 -3864.53599326  6998.59657172]
------
Step:12, Action:North
State  288
Old Q Values:  [ 6077.77641351 -4059.26960032 -3864.53599326  6998.59657172]
New Q values:  [ 9487.43188659 -4059.26960032 -3864.53599326  6998.59657172]
Reward: -1  Episode Reward:  18
xxxxx
x.  x
x.gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18757.07017177 13749.13994323 18392.95055901 23523.07107063]
------
Step:13, Action:North
State  216
Old Q Values:  [2841.46431424 8076.82289745  606.149024   6320.80720854]
New Q values:  [2461.80523605 8076.82289745  606.149024   6320.80720854]
Reward: -1  Episode Reward:  17
xxxxx
x. ax
xg  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  4.41939837e+03 -3.22965309e-01  4.07934443e+03]
------
Step:14, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  4.41939837e+03 -3.22965309e-01  4.07934443e+03]
New Q values:  [ 1.06807480e+02  3.23828314e+03 -3.22965309e-01  4.07934443e+03]
Reward: -1  Episode Reward:  16
xxxxx
x.  x
x. ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[4903.7459638  4685.07502318  790.72804752 3114.26080587]
------
Step:15, Action:North
State  210
Old Q Values:  [4903.7459638  4685.07502318  790.72804752 3114.26080587]
New Q values:  [3184.70171575 4685.07502318  790.72804752 3114.26080587]
Reward: -1  Episode Reward:  15
xxxxx
x. ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  3.23828314e+03 -3.22965309e-01  4.07934443e+03]
------
Step:16, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  3.23828314e+03 -3.22965309e-01  4.07934443e+03]
New Q values:  [ 1.06807480e+02  3.23828314e+03 -3.22965309e-01  3.32374002e+03]
Reward: -1  Episode Reward:  14
xxxxx
x.a x
x.  x
x..gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SE
[  -56.91790269 -1902.20915811  5642.00748826  2570.04417931]
------
Step:17, Action:East
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  5642.00748826  2570.04417931]
New Q values:  [  -56.91790269 -1902.20915811  3253.32500134  2570.04417931]
Reward: -1  Episode Reward:  13
xxxxx
x. ax
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  3.23828314e+03 -3.22965309e-01  3.32374002e+03]
------
Step:18, Action:West
State  136
Old Q Values:  [  878.22269011 29350.6225688    660.86649319  -215.06891393]
New Q values:  [  878.22269011 29350.6225688    660.86649319   197.25353094]
Reward: -1  Episode Reward:  12
xxxxx
x.agx
x.  x
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NE
[    0.             0.         -8266.02376272   946.27032172]
------
Step:19, Action:West
State  123
Old Q Values:  [  -56.91790269 -1902.20915811  3253.32500134  2570.04417931]
New Q values:  [  -56.91790269 -1902.20915811  3253.32500134  1438.25835003]
Reward: 9  Episode Reward:  21
xxxxx
xa  x
x. gx
x.. x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SW ghost_dir:SE
[-252.35169558 1349.46892768  669.60476835 -252.78192178]
------
Step:20, Action:South
State  107
Old Q Values:  [-252.35169558 1349.46892768  669.60476835 -252.78192178]
New Q values:  [-252.35169558 1809.23711778  669.60476835 -252.78192178]
Reward: 9  Episode Reward:  30
xxxxx
x   x
xa  x
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:SW ghost_dir:SE
[ 836.45017667    0.         4213.49848903    0.        ]
------
Step:21, Action:East
State  187
Old Q Values:  [ 836.45017667    0.         4213.49848903    0.        ]
New Q values:  [ 836.45017667    0.         2233.04501824    0.        ]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5369.30252132  1827.48540877  1278.19575341]
------
Step:22, Action:East
State  200
Old Q Values:  [  169.9257398  14654.63929998  2361.4672524    610.93635926]
New Q values:  [  169.9257398  14654.63929998  3367.03377019   610.93635926]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x gax
x.. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2461.80523605 8076.82289745  606.149024   6320.80720854]
------
Step:23, Action:South
State  216
Old Q Values:  [2461.80523605 8076.82289745  606.149024   6320.80720854]
New Q values:  [2461.80523605 6076.35872496  606.149024   6320.80720854]
Reward: -1  Episode Reward:  27
xxxxx
x   x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 9487.43188659 -4059.26960032 -3864.53599326  6998.59657172]
------
Step:24, Action:North
State  288
Old Q Values:  [ 9487.43188659 -4059.26960032 -3864.53599326  6998.59657172]
New Q values:  [ 5693.30818409 -4059.26960032 -3864.53599326  6998.59657172]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x  ax
xg. x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:SW
[ 677.77179188 6329.78476486    0.          915.50499392]
------
Step:25, Action:South
State  216
Old Q Values:  [2461.80523605 6076.35872496  606.149024   6320.80720854]
New Q values:  [2461.80523605 4529.5224615   606.149024   6320.80720854]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xg  x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5693.30818409 -4059.26960032 -3864.53599326  6998.59657172]
------
Step:26, Action:West
State  288
Old Q Values:  [ 5693.30818409 -4059.26960032 -3864.53599326  6998.59657172]
New Q values:  [ 5693.30818409 -4059.26960032 -3864.53599326  6937.73147632]
Reward: 9  Episode Reward:  34
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2419.70058088 -8521.23367799 13776.3094921   4428.07466584]
------
Step:27, Action:East
State  272
Old Q Values:  [ 2419.70058088 -8521.23367799 13776.3094921   4428.07466584]
New Q values:  [ 2419.70058088 -8521.23367799  7591.24323974  4428.07466584]
Reward: -1  Episode Reward:  33
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5693.30818409 -4059.26960032 -3864.53599326  6937.73147632]
------
Step:28, Action:West
State  288
Old Q Values:  [ 5693.30818409 -4059.26960032 -3864.53599326  6937.73147632]
New Q values:  [ 5693.30818409 -4059.26960032 -3864.53599326  5051.86556245]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 2419.70058088 -8521.23367799  7591.24323974  4428.07466584]
------
Step:29, Action:East
State  273
Old Q Values:  [18705.59754651  1201.72649971  3797.74781399 15413.07438253]
New Q values:  [18705.59754651  1201.72649971  3226.49158082 15413.07438253]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x  gx
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5693.30818409 -4059.26960032 -3864.53599326  5051.86556245]
------
Step:30, Action:West
State  288
Old Q Values:  [ 5693.30818409 -4059.26960032 -3864.53599326  5051.86556245]
New Q values:  [ 5693.30818409 -4059.26960032 -3864.53599326  7631.82548893]
Reward: -1  Episode Reward:  30
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[18705.59754651  1201.72649971  3226.49158082 15413.07438253]
------
Step:31, Action:North
State  272
Old Q Values:  [ 2419.70058088 -8521.23367799  7591.24323974  4428.07466584]
New Q values:  [ 5363.67202235 -8521.23367799  7591.24323974  4428.07466584]
Reward: -1  Episode Reward:  29
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  14654.63929998  3367.03377019   610.93635926]
------
Step:32, Action:South
State  201
Old Q Values:  [  613.33320563 13063.80419921 -3941.94579649  1311.30124863]
New Q values:  [  613.33320563 10836.60094363 -3941.94579649  1311.30124863]
Reward: -1  Episode Reward:  28
xxxxx
x  gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[18705.59754651  1201.72649971  3226.49158082 15413.07438253]
------
Step:33, Action:North
State  272
Old Q Values:  [ 5363.67202235 -8521.23367799  7591.24323974  4428.07466584]
New Q values:  [ 6541.26059893 -8521.23367799  7591.24323974  4428.07466584]
Reward: -1  Episode Reward:  27
xxxxx
x g x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[  169.9257398  14654.63929998  3367.03377019   610.93635926]
------
Step:34, Action:South
State  200
Old Q Values:  [  169.9257398  14654.63929998  3367.03377019   610.93635926]
New Q values:  [ 169.9257398  8138.62869191 3367.03377019  610.93635926]
Reward: -1  Episode Reward:  26
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6541.26059893 -8521.23367799  7591.24323974  4428.07466584]
------
Step:35, Action:East
State  272
Old Q Values:  [ 6541.26059893 -8521.23367799  7591.24323974  4428.07466584]
New Q values:  [ 6541.26059893 -8521.23367799  5325.44494257  4428.07466584]
Reward: -1  Episode Reward:  25
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5693.30818409 -4059.26960032 -3864.53599326  7631.82548893]
------
Step:36, Action:West
State  288
Old Q Values:  [ 5693.30818409 -4059.26960032 -3864.53599326  7631.82548893]
New Q values:  [ 5693.30818409 -4059.26960032 -3864.53599326  5014.50837525]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 6541.26059893 -8521.23367799  5325.44494257  4428.07466584]
------
Step:37, Action:North
State  272
Old Q Values:  [ 6541.26059893 -8521.23367799  5325.44494257  4428.07466584]
New Q values:  [ 5057.49284715 -8521.23367799  5325.44494257  4428.07466584]
Reward: -1  Episode Reward:  23
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  8138.62869191 3367.03377019  610.93635926]
------
Step:38, Action:South
State  200
Old Q Values:  [ 169.9257398  8138.62869191 3367.03377019  610.93635926]
New Q values:  [ 169.9257398  4852.48495954 3367.03377019  610.93635926]
Reward: -1  Episode Reward:  22
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5057.49284715 -8521.23367799  5325.44494257  4428.07466584]
------
Step:39, Action:East
State  272
Old Q Values:  [ 5057.49284715 -8521.23367799  5325.44494257  4428.07466584]
New Q values:  [ 5057.49284715 -8521.23367799  3837.57043226  4428.07466584]
Reward: -1  Episode Reward:  21
xxxxx
x g x
x   x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 5693.30818409 -4059.26960032 -3864.53599326  5014.50837525]
------
Step:40, Action:North
State  288
Old Q Values:  [ 5693.30818409 -4059.26960032 -3864.53599326  5014.50837525]
New Q values:  [ 4172.9654362  -4059.26960032 -3864.53599326  5014.50837525]
Reward: -1  Episode Reward:  20
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2461.80523605 4529.5224615   606.149024   6320.80720854]
------
Step:41, Action:West
State  216
Old Q Values:  [2461.80523605 4529.5224615   606.149024   6320.80720854]
New Q values:  [2461.80523605 4529.5224615   606.149024   3983.46837128]
Reward: -1  Episode Reward:  19
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  4852.48495954 3367.03377019  610.93635926]
------
Step:42, Action:South
State  200
Old Q Values:  [ 169.9257398  4852.48495954 3367.03377019  610.93635926]
New Q values:  [ 169.9257398  3457.64183796 3367.03377019  610.93635926]
Reward: -1  Episode Reward:  18
xxxxx
xg  x
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 5057.49284715 -8521.23367799  3837.57043226  4428.07466584]
------
Step:43, Action:North
State  272
Old Q Values:  [ 5057.49284715 -8521.23367799  3837.57043226  4428.07466584]
New Q values:  [ 3059.68969025 -8521.23367799  3837.57043226  4428.07466584]
Reward: -1  Episode Reward:  17
xxxxx
x   x
xga x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  3457.64183796 3367.03377019  610.93635926]
------
Step:44, Action:South
State  194
Old Q Values:  [2605.11298674 4955.19258006 3693.97205538 2179.39995143]
New Q values:  [2605.11298674 3309.89943178 3693.97205538 2179.39995143]
Reward: -1  Episode Reward:  16
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3059.68969025 -8521.23367799  3837.57043226  4428.07466584]
------
Step:45, Action:East
State  272
Old Q Values:  [ 3059.68969025 -8521.23367799  3837.57043226  4428.07466584]
New Q values:  [ 3059.68969025 -8521.23367799  3038.78068548  4428.07466584]
Reward: -1  Episode Reward:  15
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4172.9654362  -4059.26960032 -3864.53599326  5014.50837525]
------
Step:46, Action:West
State  288
Old Q Values:  [ 4172.9654362  -4059.26960032 -3864.53599326  5014.50837525]
New Q values:  [ 4172.9654362  -4059.26960032 -3864.53599326  3333.62574985]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x   x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 3059.68969025 -8521.23367799  3038.78068548  4428.07466584]
------
Step:47, Action:North
State  272
Old Q Values:  [ 3059.68969025 -8521.23367799  3038.78068548  4428.07466584]
New Q values:  [ 1771.52149873 -8521.23367799  3038.78068548  4428.07466584]
Reward: -1  Episode Reward:  13
xxxxx
x   x
x a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:SW
[    0.         -5369.30252132  1827.48540877  1278.19575341]
------
Step:48, Action:East
State  200
Old Q Values:  [ 169.9257398  3457.64183796 3367.03377019  610.93635926]
New Q values:  [ 169.9257398  3457.64183796 2705.07024653  610.93635926]
Reward: -1  Episode Reward:  12
xxxxx
x   x
x gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2461.80523605 4529.5224615   606.149024   3983.46837128]
------
Step:49, Action:South
State  216
Old Q Values:  [2461.80523605 4529.5224615   606.149024   3983.46837128]
New Q values:  [2461.80523605 3063.09861546  606.149024   3983.46837128]
Reward: -1  Episode Reward:  11
xxxxx
x   x
xg  x
x. ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 4172.9654362  -4059.26960032 -3864.53599326  3333.62574985]
------
Step:50, Action:North
State  288
Old Q Values:  [ 4172.9654362  -4059.26960032 -3864.53599326  3333.62574985]
New Q values:  [ 2863.62668586 -4059.26960032 -3864.53599326  3333.62574985]
Reward: -1  Episode Reward:  10
xxxxx
xg  x
x  ax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2461.80523605 3063.09861546  606.149024   3983.46837128]
------
Step:51, Action:West
State  216
Old Q Values:  [2461.80523605 3063.09861546  606.149024   3983.46837128]
New Q values:  [2461.80523605 3063.09861546  606.149024   2630.0798999 ]
Reward: -1  Episode Reward:  9
xxxxx
xg  x
x a x
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:SW ghost_dir:NW
[ 169.9257398  3457.64183796 2705.07024653  610.93635926]
------
Step:52, Action:South
State  200
Old Q Values:  [ 169.9257398  3457.64183796 2705.07024653  610.93635926]
New Q values:  [ 169.9257398  2710.87913494 2705.07024653  610.93635926]
Reward: -1  Episode Reward:  8
xxxxx
x   x
xg  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1771.52149873 -8521.23367799  3038.78068548  4428.07466584]
------
Step:53, Action:West
State  272
Old Q Values:  [ 1771.52149873 -8521.23367799  3038.78068548  4428.07466584]
New Q values:  [ 1771.52149873 -8521.23367799  3038.78068548 77095.01268255]
Reward: 90009  Episode Reward:  90017
xxxxx
x   x
x   x
xg  x
xxxxx
xxxxx
x..ax
x..gx
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  3.23828314e+03 -3.22965309e-01  3.32374002e+03]
------
Step:1, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  3.23828314e+03 -3.22965309e-01  3.32374002e+03]
New Q values:  [ 1.06807480e+02  3.23828314e+03 -3.22965309e-01  2.54360224e+03]
Reward: 9  Episode Reward:  9
xxxxx
x.a x
x.g x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  4029.02076032  1265.27665671]
------
Step:2, Action:East
State  120
Old Q Values:  [-9594.56523706 -8069.05606225  9488.72047481 -1368.69106881]
New Q values:  [-9594.56523706 -8069.05606225 12600.07496057 -1368.69106881]
Reward: -1  Episode Reward:  8
xxxxx
x.gax
x.. x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:NW
[  878.22269011 29350.6225688    660.86649319   197.25353094]
------
Step:3, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  3.23828314e+03 -3.22965309e-01  2.54360224e+03]
New Q values:  [ 1.06807480e+02  2.21364284e+03 -3.22965309e-01  2.54360224e+03]
Reward: -1  Episode Reward:  7
xxxxx
x.  x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2461.80523605 3063.09861546  606.149024   2630.0798999 ]
------
Step:4, Action:South
State  208
Old Q Values:  [18757.07017177 13749.13994323 18392.95055901 23523.07107063]
New Q values:  [18757.07017177  6505.14370225 18392.95055901 23523.07107063]
Reward: 9  Episode Reward:  16
xxxxx
x.  x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2863.62668586 -4059.26960032 -3864.53599326  3333.62574985]
------
Step:5, Action:West
State  288
Old Q Values:  [ 2863.62668586 -4059.26960032 -3864.53599326  3333.62574985]
New Q values:  [ 2863.62668586 -4059.26960032 -3864.53599326 24467.35410471]
Reward: 9  Episode Reward:  25
xxxxx
x.  x
x.g x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1771.52149873 -8521.23367799  3038.78068548 77095.01268255]
------
Step:6, Action:West
State  272
Old Q Values:  [ 1771.52149873 -8521.23367799  3038.78068548 77095.01268255]
New Q values:  [ 1771.52149873 -8521.23367799  3038.78068548 46524.79763429]
Reward: 9  Episode Reward:  34
xxxxx
x.  x
x.. x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[52271.30853756 15941.62716192 13169.98702937  1875.31501677]
------
Step:7, Action:North
State  257
Old Q Values:  [52271.30853756 15941.62716192 13169.98702937  1875.31501677]
New Q values:  [55825.22263871 15941.62716192 13169.98702937  1875.31501677]
Reward: 9  Episode Reward:  43
xxxxx
x.  x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[ 60476.05138135  21430.9929039  116370.99741228      0.        ]
------
Step:8, Action:North
State  181
Old Q Values:  [ 977.18957054 1968.0625937   715.03544416    3.33862213]
New Q values:  [2042.13843208 1968.0625937   715.03544416    3.33862213]
Reward: 9  Episode Reward:  52
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5486.20867956 3263.32105375 -120.29354603]
------
Step:9, Action:South
State  109
Old Q Values:  [ -241.10880094   885.36349343 -2165.66138672   232.50800947]
New Q values:  [ -241.10880094   966.186927   -2165.66138672   232.50800947]
Reward: -1  Episode Reward:  51
xxxxx
x  gx
xa. x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2042.13843208 1968.0625937   715.03544416    3.33862213]
------
Step:10, Action:North
State  181
Old Q Values:  [2042.13843208 1968.0625937   715.03544416    3.33862213]
New Q values:  [2462.1179767  1968.0625937   715.03544416    3.33862213]
Reward: -1  Episode Reward:  50
xxxxx
xa  x
x .gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5486.20867956 3263.32105375 -120.29354603]
------
Step:11, Action:South
State  99
Old Q Values:  [    0.         30578.35123687 58945.17153785     0.        ]
New Q values:  [    0.         47142.03971843 58945.17153785     0.        ]
Reward: -1  Episode Reward:  49
xxxxx
x   x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NW ghost_dir:NE
[ 60476.05138135  21430.9929039  116370.99741228      0.        ]
------
Step:12, Action:North
State  183
Old Q Values:  [2049.1969658  1771.27760536 6346.14695411 1554.80203889]
New Q values:  [2464.94139019 1771.27760536 6346.14695411 1554.80203889]
Reward: -1  Episode Reward:  48
xxxxx
xa  x
x . x
x g x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 5486.20867956 3263.32105375 -120.29354603]
------
Step:13, Action:South
State  111
Old Q Values:  [-177.44732869 5486.20867956 3263.32105375 -120.29354603]
New Q values:  [-177.44732869 4097.72755805 3263.32105375 -120.29354603]
Reward: -1  Episode Reward:  47
xxxxx
x   x
xa. x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2464.94139019 1771.27760536 6346.14695411 1554.80203889]
------
Step:14, Action:East
State  179
Old Q Values:  [82228.67666629 18995.54020685 50537.28135001     0.        ]
New Q values:  [82228.67666629 18995.54020685 81328.50415662     0.        ]
Reward: 100009  Episode Reward:  100056
xxxxx
x   x
x a x
x g x
xxxxx
xxxxx
xa.gx
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:NE
[ -241.10880094   966.186927   -2165.66138672   232.50800947]
------
Step:1, Action:South
State  109
Old Q Values:  [ -241.10880094   966.186927   -2165.66138672   232.50800947]
New Q values:  [ -241.10880094  1130.51016381 -2165.66138672   232.50800947]
Reward: 9  Episode Reward:  9
xxxxx
x g x
xa..x
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2462.1179767  1968.0625937   715.03544416    3.33862213]
------
Step:2, Action:North
State  180
Old Q Values:  [  725.90386488  2227.35540285  7519.94218783 -4966.32149798]
New Q values:  [-4868.73695196  2227.35540285  7519.94218783 -4966.32149798]
Reward: -10001  Episode Reward:  -9992
xxxxx
xg. x
x ..x
x...x
xxxxx
xxxxx
xga.x
x...x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:NW
[-9594.56523706 -8069.05606225 12600.07496057 -1368.69106881]
------
Step:1, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  4029.02076032  1265.27665671]
New Q values:  [ -281.736      -1150.91067548  2380.08897497  1265.27665671]
Reward: 9  Episode Reward:  9
xxxxx
x  ax
xg..x
x...x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.21364284e+03 -3.22965309e-01  2.54360224e+03]
------
Step:2, Action:West
State  138
Old Q Values:  [ 1.06807480e+02  2.21364284e+03 -3.22965309e-01  2.54360224e+03]
New Q values:  [ 1.06807480e+02  2.21364284e+03 -3.22965309e-01  1.73086759e+03]
Reward: -1  Episode Reward:  8
xxxxx
x a x
x...x
xg..x
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:SW ghost_dir:SW
[ -281.736      -1150.91067548  2380.08897497  1265.27665671]
------
Step:3, Action:East
State  122
Old Q Values:  [ -281.736      -1150.91067548  2380.08897497  1265.27665671]
New Q values:  [ -281.736      -1150.91067548  1615.52844173  1265.27665671]
Reward: -1  Episode Reward:  7
xxxxx
x  ax
x...x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.21364284e+03 -3.22965309e-01  1.73086759e+03]
------
Step:4, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  2.21364284e+03 -3.22965309e-01  1.73086759e+03]
New Q values:  [ 1.06807480e+02  2.29637964e+03 -3.22965309e-01  1.73086759e+03]
Reward: 9  Episode Reward:  16
xxxxx
x   x
x..ax
x..gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[3184.70171575 4685.07502318  790.72804752 3114.26080587]
------
Step:5, Action:North
State  210
Old Q Values:  [3184.70171575 4685.07502318  790.72804752 3114.26080587]
New Q values:  [1962.19457908 4685.07502318  790.72804752 3114.26080587]
Reward: -1  Episode Reward:  15
xxxxx
x  ax
x.. x
x.g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.29637964e+03 -3.22965309e-01  1.73086759e+03]
------
Step:6, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  2.29637964e+03 -3.22965309e-01  1.73086759e+03]
New Q values:  [ 1.06807480e+02  1.83688144e+03 -3.22965309e-01  1.73086759e+03]
Reward: -1  Episode Reward:  14
xxxxx
x   x
x.gax
x...x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2461.80523605 3063.09861546  606.149024   2630.0798999 ]
------
Step:7, Action:South
State  208
Old Q Values:  [18757.07017177  6505.14370225 18392.95055901 23523.07107063]
New Q values:  [18757.07017177  9947.66371231 18392.95055901 23523.07107063]
Reward: 9  Episode Reward:  23
xxxxx
x   x
x..gx
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2863.62668586 -4059.26960032 -3864.53599326 24467.35410471]
------
Step:8, Action:West
State  288
Old Q Values:  [ 2863.62668586 -4059.26960032 -3864.53599326 24467.35410471]
New Q values:  [ 2863.62668586 -4059.26960032 -3864.53599326 15404.02090583]
Reward: 9  Episode Reward:  32
xxxxx
x   x
x.. x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[18705.59754651  1201.72649971  3226.49158082 15413.07438253]
------
Step:9, Action:North
State  272
Old Q Values:  [ 1771.52149873 -8521.23367799  3038.78068548 46524.79763429]
New Q values:  [ 1822.20021611 -8521.23367799  3038.78068548 46524.79763429]
Reward: 9  Episode Reward:  41
xxxxx
x   x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[2605.11298674 3309.89943178 3693.97205538 2179.39995143]
------
Step:10, Action:East
State  194
Old Q Values:  [2605.11298674 3309.89943178 3693.97205538 2179.39995143]
New Q values:  [2605.11298674 3309.89943178 2882.51132911 2179.39995143]
Reward: -1  Episode Reward:  40
xxxxx
x   x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1962.19457908 4685.07502318  790.72804752 3114.26080587]
------
Step:11, Action:South
State  210
Old Q Values:  [1962.19457908 4685.07502318  790.72804752 3114.26080587]
New Q values:  [1962.19457908 6494.63628102  790.72804752 3114.26080587]
Reward: -1  Episode Reward:  39
xxxxx
x   x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 2863.62668586 -4059.26960032 -3864.53599326 15404.02090583]
------
Step:12, Action:North
State  288
Old Q Values:  [ 2863.62668586 -4059.26960032 -3864.53599326 15404.02090583]
New Q values:  [ 3093.24155865 -4059.26960032 -3864.53599326 15404.02090583]
Reward: -1  Episode Reward:  38
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1962.19457908 6494.63628102  790.72804752 3114.26080587]
------
Step:13, Action:West
State  208
Old Q Values:  [18757.07017177  9947.66371231 18392.95055901 23523.07107063]
New Q values:  [18757.07017177  9947.66371231 18392.95055901 20957.98113217]
Reward: -1  Episode Reward:  37
xxxxx
x   x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[18815.93003731 38497.8423464   5828.10769162   767.35890262]
------
Step:14, Action:South
State  195
Old Q Values:  [11645.51215632 14526.25624193 16029.85052182  1169.39963074]
New Q values:  [11645.51215632 11421.58176073 16029.85052182  1169.39963074]
Reward: -1  Episode Reward:  36
xxxxx
x   x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[18705.59754651  1201.72649971  3226.49158082 15413.07438253]
------
Step:15, Action:North
State  272
Old Q Values:  [ 1822.20021611 -8521.23367799  3038.78068548 46524.79763429]
New Q values:  [ 1721.24991598 -8521.23367799  3038.78068548 46524.79763429]
Reward: -1  Episode Reward:  35
xxxxx
x   x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[2605.11298674 3309.89943178 2882.51132911 2179.39995143]
------
Step:16, Action:East
State  194
Old Q Values:  [2605.11298674 3309.89943178 2882.51132911 2179.39995143]
New Q values:  [2605.11298674 3309.89943178 3100.79541595 2179.39995143]
Reward: -1  Episode Reward:  34
xxxxx
x   x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1962.19457908 6494.63628102  790.72804752 3114.26080587]
------
Step:17, Action:South
State  210
Old Q Values:  [1962.19457908 6494.63628102  790.72804752 3114.26080587]
New Q values:  [1962.19457908 7218.46078416  790.72804752 3114.26080587]
Reward: -1  Episode Reward:  33
xxxxx
x   x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3093.24155865 -4059.26960032 -3864.53599326 15404.02090583]
------
Step:18, Action:North
State  288
Old Q Values:  [ 3093.24155865 -4059.26960032 -3864.53599326 15404.02090583]
New Q values:  [ 3402.23485871 -4059.26960032 -3864.53599326 15404.02090583]
Reward: -1  Episode Reward:  32
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1962.19457908 7218.46078416  790.72804752 3114.26080587]
------
Step:19, Action:West
State  210
Old Q Values:  [1962.19457908 7218.46078416  790.72804752 3114.26080587]
New Q values:  [1962.19457908 7218.46078416  790.72804752 2238.07415188]
Reward: -1  Episode Reward:  31
xxxxx
x   x
x.a x
x.g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[2605.11298674 3309.89943178 3100.79541595 2179.39995143]
------
Step:20, Action:East
State  194
Old Q Values:  [2605.11298674 3309.89943178 3100.79541595 2179.39995143]
New Q values:  [2605.11298674 3309.89943178 3405.25640163 2179.39995143]
Reward: -1  Episode Reward:  30
xxxxx
x   x
x. ax
xg  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1962.19457908 7218.46078416  790.72804752 2238.07415188]
------
Step:21, Action:South
State  210
Old Q Values:  [1962.19457908 7218.46078416  790.72804752 2238.07415188]
New Q values:  [1962.19457908 7507.99058541  790.72804752 2238.07415188]
Reward: -1  Episode Reward:  29
xxxxx
x   x
x.  x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 3402.23485871 -4059.26960032 -3864.53599326 15404.02090583]
------
Step:22, Action:North
State  288
Old Q Values:  [ 3402.23485871 -4059.26960032 -3864.53599326 15404.02090583]
New Q values:  [ 7647.68828313 -4059.26960032 -3864.53599326 15404.02090583]
Reward: -1  Episode Reward:  28
xxxxx
x   x
x.gax
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18757.07017177  9947.66371231 18392.95055901 20957.98113217]
------
Step:23, Action:North
State  210
Old Q Values:  [1962.19457908 7507.99058541  790.72804752 2238.07415188]
New Q values:  [1335.34226414 7507.99058541  790.72804752 2238.07415188]
Reward: -1  Episode Reward:  27
xxxxx
x  ax
x.  x
x.g x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  1.83688144e+03 -3.22965309e-01  1.73086759e+03]
------
Step:24, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  1.83688144e+03 -3.22965309e-01  1.73086759e+03]
New Q values:  [ 1.06807480e+02  2.98654975e+03 -3.22965309e-01  1.73086759e+03]
Reward: -1  Episode Reward:  26
xxxxx
x   x
x. ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1335.34226414 7507.99058541  790.72804752 2238.07415188]
------
Step:25, Action:West
State  208
Old Q Values:  [18757.07017177  9947.66371231 18392.95055901 20957.98113217]
New Q values:  [18757.07017177  9947.66371231 18392.95055901 19931.94515679]
Reward: -1  Episode Reward:  25
xxxxx
x   x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[18815.93003731 38497.8423464   5828.10769162   767.35890262]
------
Step:26, Action:South
State  195
Old Q Values:  [11645.51215632 11421.58176073 16029.85052182  1169.39963074]
New Q values:  [11645.51215632 10179.71196824 16029.85052182  1169.39963074]
Reward: -1  Episode Reward:  24
xxxxx
x   x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[18705.59754651  1201.72649971  3226.49158082 15413.07438253]
------
Step:27, Action:North
State  273
Old Q Values:  [18705.59754651  1201.72649971  3226.49158082 15413.07438253]
New Q values:  [19030.99172252  1201.72649971  3226.49158082 15413.07438253]
Reward: -1  Episode Reward:  23
xxxxx
x   x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[18815.93003731 38497.8423464   5828.10769162   767.35890262]
------
Step:28, Action:South
State  195
Old Q Values:  [11645.51215632 10179.71196824 16029.85052182  1169.39963074]
New Q values:  [11645.51215632  9780.58230405 16029.85052182  1169.39963074]
Reward: -1  Episode Reward:  22
xxxxx
x   x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[19030.99172252  1201.72649971  3226.49158082 15413.07438253]
------
Step:29, Action:North
State  273
Old Q Values:  [19030.99172252  1201.72649971  3226.49158082 15413.07438253]
New Q values:  [19161.14939293  1201.72649971  3226.49158082 15413.07438253]
Reward: -1  Episode Reward:  21
xxxxx
x   x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[18815.93003731 38497.8423464   5828.10769162   767.35890262]
------
Step:30, Action:South
State  193
Old Q Values:  [18815.93003731 38497.8423464   5828.10769162   767.35890262]
New Q values:  [18815.93003731 21146.88175644  5828.10769162   767.35890262]
Reward: -1  Episode Reward:  20
xxxxx
x  gx
x.  x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[19161.14939293  1201.72649971  3226.49158082 15413.07438253]
------
Step:31, Action:North
State  273
Old Q Values:  [19161.14939293  1201.72649971  3226.49158082 15413.07438253]
New Q values:  [14007.9242841   1201.72649971  3226.49158082 15413.07438253]
Reward: -1  Episode Reward:  19
xxxxx
x   x
x.agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[18815.93003731 21146.88175644  5828.10769162   767.35890262]
------
Step:32, Action:South
State  195
Old Q Values:  [11645.51215632  9780.58230405 16029.85052182  1169.39963074]
New Q values:  [11645.51215632  8535.55523638 16029.85052182  1169.39963074]
Reward: -1  Episode Reward:  18
xxxxx
x   x
x.  x
x.agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[14007.9242841   1201.72649971  3226.49158082 15413.07438253]
------
Step:33, Action:West
State  272
Old Q Values:  [ 1721.24991598 -8521.23367799  3038.78068548 46524.79763429]
New Q values:  [ 1721.24991598 -8521.23367799  3038.78068548 35362.88584533]
Reward: 9  Episode Reward:  27
xxxxx
x   x
x.  x
xag x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[55825.22263871 15941.62716192 13169.98702937  1875.31501677]
------
Step:34, Action:North
State  257
Old Q Values:  [55825.22263871 15941.62716192 13169.98702937  1875.31501677]
New Q values:  [107004.09205537  15941.62716192  13169.98702937   1875.31501677]
Reward: 100009  Episode Reward:  100036
xxxxx
x   x
xa  x
x  gx
xxxxx
xxxxx
x...x
xg..x
x..ax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7647.68828313 -4059.26960032 -3864.53599326 15404.02090583]
------
Step:1, Action:West
State  288
Old Q Values:  [ 7647.68828313 -4059.26960032 -3864.53599326 15404.02090583]
New Q values:  [ 7647.68828313 -4059.26960032 -3864.53599326 16775.87411593]
Reward: 9  Episode Reward:  9
xxxxx
x...x
x ..x
xga x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1721.24991598 -8521.23367799  3038.78068548 35362.88584533]
------
Step:2, Action:East
State  272
Old Q Values:  [ 1721.24991598 -8521.23367799  3038.78068548 35362.88584533]
New Q values:  [ 1721.24991598 -8521.23367799  6247.67450897 35362.88584533]
Reward: -1  Episode Reward:  8
xxxxx
x...x
x ..x
x.gax
xxxxx
--------Current State:
State ID:3 row:3, col:3 food_dir:NW ghost_dir:NW
[ 7647.68828313 -4059.26960032 -3864.53599326 16775.87411593]
------
Step:3, Action:North
State  288
Old Q Values:  [ 7647.68828313 -4059.26960032 -3864.53599326 16775.87411593]
New Q values:  [ 5316.87248888 -4059.26960032 -3864.53599326 16775.87411593]
Reward: 9  Episode Reward:  17
xxxxx
x...x
x .ax
x. gx
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:SW
[1335.34226414 7507.99058541  790.72804752 2238.07415188]
------
Step:4, Action:West
State  208
Old Q Values:  [18757.07017177  9947.66371231 18392.95055901 19931.94515679]
New Q values:  [18757.07017177  9947.66371231 18392.95055901 14322.24258965]
Reward: 9  Episode Reward:  26
xxxxx
x...x
x agx
x.  x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[18815.93003731 21146.88175644  5828.10769162   767.35890262]
------
Step:5, Action:South
State  193
Old Q Values:  [18815.93003731 21146.88175644  5828.10769162   767.35890262]
New Q values:  [18815.93003731 13082.07501733  5828.10769162   767.35890262]
Reward: -1  Episode Reward:  25
xxxxx
x..gx
x   x
x.a x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[14007.9242841   1201.72649971  3226.49158082 15413.07438253]
------
Step:6, Action:West
State  272
Old Q Values:  [ 1721.24991598 -8521.23367799  6247.67450897 35362.88584533]
New Q values:  [ 1721.24991598 -8521.23367799  6247.67450897 46251.78195474]
Reward: 9  Episode Reward:  34
xxxxx
x.g.x
x   x
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NW ghost_dir:NE
[107004.09205537  15941.62716192  13169.98702937   1875.31501677]
------
Step:7, Action:North
State  261
Old Q Values:  [2689.16578815 1352.37702619 2149.98771624  -12.17474163]
New Q values:  [1813.70170827 1352.37702619 2149.98771624  -12.17474163]
Reward: -1  Episode Reward:  33
xxxxx
x...x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2462.1179767  1968.0625937   715.03544416    3.33862213]
------
Step:8, Action:North
State  181
Old Q Values:  [2462.1179767  1968.0625937   715.03544416    3.33862213]
New Q values:  [2422.78563026 1968.0625937   715.03544416    3.33862213]
Reward: 9  Episode Reward:  42
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         4775.12813192    0.            0.        ]
------
Step:9, Action:South
State  101
Old Q Values:  [   0.         4775.12813192    0.            0.        ]
New Q values:  [   0.         2636.28694185    0.            0.        ]
Reward: -1  Episode Reward:  41
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2422.78563026 1968.0625937   715.03544416    3.33862213]
------
Step:10, Action:North
State  181
Old Q Values:  [2422.78563026 1968.0625937   715.03544416    3.33862213]
New Q values:  [2738.34005356 1968.0625937   715.03544416    3.33862213]
Reward: -1  Episode Reward:  40
xxxxx
xa..x
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:SE
[ 221.30610858 5899.41933819 1868.2303995     0.        ]
------
Step:11, Action:South
State  103
Old Q Values:  [ 221.30610858 5899.41933819 1868.2303995     0.        ]
New Q values:  [ 221.30610858 3180.66975134 1868.2303995     0.        ]
Reward: -1  Episode Reward:  39
xxxxx
x ..x
xag x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[2738.34005356 1968.0625937   715.03544416    3.33862213]
------
Step:12, Action:North
State  181
Old Q Values:  [2738.34005356 1968.0625937   715.03544416    3.33862213]
New Q values:  [1885.62210398 1968.0625937   715.03544416    3.33862213]
Reward: -1  Episode Reward:  38
xxxxx
xag.x
x   x
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:NE ghost_dir:NE
[   0.         2636.28694185    0.            0.        ]
------
Step:13, Action:South
State  101
Old Q Values:  [   0.         2636.28694185    0.            0.        ]
New Q values:  [   0.         1644.33355485    0.            0.        ]
Reward: -1  Episode Reward:  37
xxxxx
x .gx
xa  x
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:NE
[1885.62210398 1968.0625937   715.03544416    3.33862213]
------
Step:14, Action:South
State  181
Old Q Values:  [1885.62210398 1968.0625937   715.03544416    3.33862213]
New Q values:  [1885.62210398 1431.62135235  715.03544416    3.33862213]
Reward: -1  Episode Reward:  36
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1813.70170827 1352.37702619 2149.98771624  -12.17474163]
------
Step:15, Action:East
State  261
Old Q Values:  [1813.70170827 1352.37702619 2149.98771624  -12.17474163]
New Q values:  [1813.70170827 1352.37702619 5483.31740125  -12.17474163]
Reward: -1  Episode Reward:  35
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[14007.9242841   1201.72649971  3226.49158082 15413.07438253]
------
Step:16, Action:West
State  273
Old Q Values:  [14007.9242841   1201.72649971  3226.49158082 15413.07438253]
New Q values:  [14007.9242841   1201.72649971  3226.49158082  7809.62497339]
Reward: -1  Episode Reward:  34
xxxxx
x ..x
x  gx
xa  x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1813.70170827 1352.37702619 5483.31740125  -12.17474163]
------
Step:17, Action:East
State  261
Old Q Values:  [1813.70170827 1352.37702619 5483.31740125  -12.17474163]
New Q values:  [1813.70170827 1352.37702619 6395.10424573  -12.17474163]
Reward: -1  Episode Reward:  33
xxxxx
x ..x
x   x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[14007.9242841   1201.72649971  3226.49158082  7809.62497339]
------
Step:18, Action:North
State  272
Old Q Values:  [ 1721.24991598 -8521.23367799  6247.67450897 46251.78195474]
New Q values:  [ 1709.47688688 -8521.23367799  6247.67450897 46251.78195474]
Reward: -1  Episode Reward:  32
xxxxx
x ..x
x a x
x g x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:SW
[2605.11298674 3309.89943178 3405.25640163 2179.39995143]
------
Step:19, Action:East
State  192
Old Q Values:  [3756.98497708 6916.34738543 1098.43508512 2453.92999194]
New Q values:  [3756.98497708 6916.34738543 6065.89508558 2453.92999194]
Reward: -1  Episode Reward:  31
xxxxx
x ..x
x gax
x   x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[18757.07017177  9947.66371231 18392.95055901 14322.24258965]
------
Step:20, Action:North
State  208
Old Q Values:  [18757.07017177  9947.66371231 18392.95055901 14322.24258965]
New Q values:  [22550.3844916   9947.66371231 18392.95055901 14322.24258965]
Reward: 9  Episode Reward:  40
xxxxx
x .ax
x  gx
x   x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:NW ghost_dir:SW
[28683.54284864  3989.86286733  -180.00807518 50140.52140963]
------
Step:21, Action:West
State  130
Old Q Values:  [28683.54284864  3989.86286733  -180.00807518 50140.52140963]
New Q values:  [ 28683.54284864   3989.86286733   -180.00807518 118996.28249986]
Reward: 100009  Episode Reward:  100049
xxxxx
x a x
x   x
x  gx
xxxxx
xxxxx
x...x
xg..x
x.a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NW
[ 1709.47688688 -8521.23367799  6247.67450897 46251.78195474]
------
Step:1, Action:West
State  272
Old Q Values:  [ 1709.47688688 -8521.23367799  6247.67450897 46251.78195474]
New Q values:  [ 1709.47688688 -8521.23367799  6247.67450897 13297.16994374]
Reward: -9991  Episode Reward:  -9991
xxxxx
x...x
x ..x
xg .x
xxxxx
xxxxx
x.g.x
x...x
xa..x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1813.70170827 1352.37702619 6395.10424573  -12.17474163]
------
Step:1, Action:East
State  261
Old Q Values:  [1813.70170827 1352.37702619 6395.10424573  -12.17474163]
New Q values:  [1813.70170827 1352.37702619 6765.81898352  -12.17474163]
Reward: 9  Episode Reward:  9
xxxxx
x. gx
x...x
x a.x
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NW ghost_dir:NE
[14007.9242841   1201.72649971  3226.49158082  7809.62497339]
------
Step:2, Action:North
State  273
Old Q Values:  [14007.9242841   1201.72649971  3226.49158082  7809.62497339]
New Q values:  [11253.34872483  1201.72649971  3226.49158082  7809.62497339]
Reward: 9  Episode Reward:  18
xxxxx
x. .x
x.agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NW ghost_dir:NE
[18815.93003731 13082.07501733  5828.10769162   767.35890262]
------
Step:3, Action:North
State  195
Old Q Values:  [11645.51215632  8535.55523638 16029.85052182  1169.39963074]
New Q values:  [43592.27879854  8535.55523638 16029.85052182  1169.39963074]
Reward: -1  Episode Reward:  17
xxxxx
x.a.x
x. .x
x  gx
xxxxx
--------Current State:
State ID:1 row:1, col:2 food_dir:NW ghost_dir:SE
[  -180.6          1117.48597573  18639.04650494 129782.24645338]
------
Step:4, Action:West
State  126
Old Q Values:  [   0.          331.64678262  681.20236034 1986.68903898]
New Q values:  [   0.          331.64678262  681.20236034 2029.39388301]
Reward: 9  Episode Reward:  26
xxxxx
xa .x
x. .x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:1 food_dir:SE ghost_dir:SE
[-177.44732869 4097.72755805 3263.32105375 -120.29354603]
------
Step:5, Action:South
State  103
Old Q Values:  [ 221.30610858 3180.66975134 1868.2303995     0.        ]
New Q values:  [ 221.30610858 3181.51198677 1868.2303995     0.        ]
Reward: 9  Episode Reward:  35
xxxxx
x  .x
xa .x
x  gx
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SE
[2464.94139019 1771.27760536 6346.14695411 1554.80203889]
------
Step:6, Action:East
State  181
Old Q Values:  [1885.62210398 1431.62135235  715.03544416    3.33862213]
New Q values:  [1885.62210398 1431.62135235  757.14423588    3.33862213]
Reward: -1  Episode Reward:  34
xxxxx
x  .x
x agx
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:NE
[-5833.78831344  1572.43352738 -8159.85842071   403.06255908]
------
Step:7, Action:South
State  199
Old Q Values:  [  916.09264659  1175.46486763 23844.07972775  1915.70494401]
New Q values:  [  916.09264659  1154.72348098 23844.07972775  1915.70494401]
Reward: -1  Episode Reward:  33
xxxxx
x  .x
x  .x
x agx
xxxxx
--------Current State:
State ID:3 row:3, col:2 food_dir:NE ghost_dir:NE
[1.64433000e+00 5.04537534e+02 2.28379178e+03 1.96683368e+03]
------
Step:8, Action:West
State  276
Old Q Values:  [-2561.28592178 -5807.06396197  4309.93931733  2361.76105495]
New Q values:  [-2561.28592178 -5807.06396197  4309.93931733  2973.85011704]
Reward: -1  Episode Reward:  32
xxxxx
x  .x
x  .x
xag.x
xxxxx
--------Current State:
State ID:3 row:3, col:1 food_dir:NE ghost_dir:NE
[1813.70170827 1352.37702619 6765.81898352  -12.17474163]
------
Step:9, Action:North
State  260
Old Q Values:  [-7643.81886164 -5704.51612281  2636.85720615 -5679.36893145]
New Q values:  [-2002.86312631 -5704.51612281  2636.85720615 -5679.36893145]
Reward: -1  Episode Reward:  31
xxxxx
x  .x
xa .x
xg .x
xxxxx
--------Current State:
State ID:2 row:2, col:1 food_dir:NE ghost_dir:SW
[    0.         -5536.05678243  3517.54806117     0.        ]
------
Step:10, Action:East
State  183
Old Q Values:  [2464.94139019 1771.27760536 6346.14695411 1554.80203889]
New Q values:  [2464.94139019 1771.27760536 4462.16753867 1554.80203889]
Reward: -1  Episode Reward:  30
xxxxx
x  .x
x a.x
x g.x
xxxxx
--------Current State:
State ID:2 row:2, col:2 food_dir:NE ghost_dir:SW
[3925.89935061 -200.61022961 6414.36252342    0.        ]
------
Step:11, Action:East
State  196
Old Q Values:  [1914.01796498 2565.46179201 7368.48975538  231.67262594]
New Q values:  [1914.01796498 2565.46179201 9717.91124963  231.67262594]
Reward: 9  Episode Reward:  39
xxxxx
x  .x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:NW ghost_dir:NW
[22550.3844916   9947.66371231 18392.95055901 14322.24258965]
------
Step:12, Action:North
State  218
Old Q Values:  [ 677.77179188 6329.78476486    0.          915.50499392]
New Q values:  [1172.47364244 6329.78476486    0.          915.50499392]
Reward: 9  Episode Reward:  48
xxxxx
x  ax
x   x
x g.x
xxxxx
--------Current State:
State ID:1 row:1, col:3 food_dir:SW ghost_dir:SW
[ 1.06807480e+02  2.98654975e+03 -3.22965309e-01  1.73086759e+03]
------
Step:13, Action:South
State  138
Old Q Values:  [ 1.06807480e+02  2.98654975e+03 -3.22965309e-01  1.73086759e+03]
New Q values:  [ 1.06807480e+02  2.11294949e+03 -3.22965309e-01  1.73086759e+03]
Reward: -1  Episode Reward:  47
xxxxx
x   x
x gax
x  .x
xxxxx
--------Current State:
State ID:2 row:2, col:3 food_dir:SW ghost_dir:NW
[2461.80523605 3063.09861546  606.149024   2630.0798999 ]
------
Step:14, Action:South
State  208
Old Q Values:  [22550.3844916   9947.66371231 18392.95055901 14322.24258965]
New Q values:  [22550.3844916  69017.2277197  18392.95055901 14322.24258965]
Reward: 100009  Episode Reward:  100056
xxxxx
x g x
x   x
x  ax
xxxxx
<Figure size 2160x1800 with 0 Axes>
In [14]:
from Game import Game
from policies.qLearning import QLearningParams
from Maps import *
import random

"""
  kick starts a particular experiment/ game
"""

if __name__ == '__main__':
    params = QLearningParams()
    params.gamma = 0.1
    params.eps = 0.01
    params.alpha = 0.9

    game = Game(policy="qlearning",
                params=params,
                num_episodes=2200,
                num_steps_per_episode=800,
                mapp=medium_map,
                num_ghosts=1,
                verbose=False)
    game.run()
    game.report_results()
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 ...
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
Episode # 0
Episode # 100
Episode # 200
Episode # 300
Episode # 400
Episode # 500
Episode # 600
Episode # 700
Episode # 800
Episode # 900
Episode # 1000
Episode # 1100
Episode # 1200
Episode # 1300
Episode # 1400
Episode # 1500
Episode # 1600
Episode # 1700
Episode # 1800
Episode # 1900
Episode # 2000
Episode # 2100
<Figure size 2160x1800 with 0 Axes>
In [26]:
from Game import Game
from policies.qLearning import QLearningParams
from Maps import *
import random

"""
  kick starts a particular experiment/ game
"""

if __name__ == '__main__':
    params = QLearningParams()
    params.gamma = 0.9
    params.eps = 0.01
    params.alpha = 0.4

    game = Game(policy="qlearning",
                params=params,
                num_episodes=3100,
                num_steps_per_episode=600,
                mapp=large_map,
                num_ghosts=1,
                verbose=False)
    game.run()
    game.report_results()
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 ...
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
Episode # 0
Episode # 100
Episode # 200
Episode # 300
Episode # 400
Episode # 500
Episode # 600
Episode # 700
Episode # 800
Episode # 900
Episode # 1000
Episode # 1100
Episode # 1200
Episode # 1300
Episode # 1400
Episode # 1500
Episode # 1600
Episode # 1700
Episode # 1800
Episode # 1900
Episode # 2000
Episode # 2100
Episode # 2200
Episode # 2300
Episode # 2400
Episode # 2500
Episode # 2600
Episode # 2700
Episode # 2800
Episode # 2900
Episode # 3000
<Figure size 2160x1800 with 0 Axes>
In [3]: